gcc/config/aarch64/aarch64-sve.md

   1 ;; Machine description for AArch64 SVE.
   2 ;; Copyright (C) 2009-2020 Free Software Foundation, Inc.
   3 ;; Contributed by ARM Ltd.
   4 ;;
   5 ;; This file is part of GCC.
   6 ;;
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published by
   9 ;; the Free Software Foundation; either version 3, or (at your option)
  10 ;; any later version.
  11 ;;
  12 ;; GCC is distributed in the hope that it will be useful, but
  13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 ;; General Public License for more details.
  16 ;;
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 ;; The file is organised into the following sections (search for the full
  22 ;; line):
  23 ;;
  24 ;; == General notes
  25 ;; ---- Note on the handling of big-endian SVE
  26 ;; ---- Description of UNSPEC_PTEST
  27 ;; ---- Description of UNSPEC_PRED_Z
  28 ;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
  29 ;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
  30 ;; ---- Note on FFR handling
  31 ;;
  32 ;; == Moves
  33 ;; ---- Moves of single vectors
  34 ;; ---- Moves of multiple vectors
  35 ;; ---- Moves of predicates
  36 ;; ---- Moves relating to the FFR
  37 ;;
  38 ;; == Loads
  39 ;; ---- Normal contiguous loads
  40 ;; ---- Extending contiguous loads
  41 ;; ---- First-faulting contiguous loads
  42 ;; ---- First-faulting extending contiguous loads
  43 ;; ---- Non-temporal contiguous loads
  44 ;; ---- Normal gather loads
  45 ;; ---- Extending gather loads
  46 ;; ---- First-faulting gather loads
  47 ;; ---- First-faulting extending gather loads
  48 ;;
  49 ;; == Prefetches
  50 ;; ---- Contiguous prefetches
  51 ;; ---- Gather prefetches
  52 ;;
  53 ;; == Stores
  54 ;; ---- Normal contiguous stores
  55 ;; ---- Truncating contiguous stores
  56 ;; ---- Non-temporal contiguous stores
  57 ;; ---- Normal scatter stores
  58 ;; ---- Truncating scatter stores
  59 ;;
  60 ;; == Vector creation
  61 ;; ---- [INT,FP] Duplicate element
  62 ;; ---- [INT,FP] Initialize from individual elements
  63 ;; ---- [INT] Linear series
  64 ;; ---- [PRED] Duplicate element
  65 ;;
  66 ;; == Vector decomposition
  67 ;; ---- [INT,FP] Extract index
  68 ;; ---- [INT,FP] Extract active element
  69 ;; ---- [PRED] Extract index
  70 ;;
  71 ;; == Unary arithmetic
  72 ;; ---- [INT] General unary arithmetic corresponding to rtx codes
  73 ;; ---- [INT] General unary arithmetic corresponding to unspecs
  74 ;; ---- [INT] Sign and zero extension
  75 ;; ---- [INT] Truncation
  76 ;; ---- [INT] Logical inverse
  77 ;; ---- [FP<-INT] General unary arithmetic that maps to unspecs
  78 ;; ---- [FP] General unary arithmetic corresponding to unspecs
  79 ;; ---- [PRED] Inverse
  80
  81 ;; == Binary arithmetic
  82 ;; ---- [INT] General binary arithmetic corresponding to rtx codes
  83 ;; ---- [INT] Addition
  84 ;; ---- [INT] Subtraction
  85 ;; ---- [INT] Take address
  86 ;; ---- [INT] Absolute difference
  87 ;; ---- [INT] Saturating addition and subtraction
  88 ;; ---- [INT] Highpart multiplication
  89 ;; ---- [INT] Division
  90 ;; ---- [INT] Binary logical operations
  91 ;; ---- [INT] Binary logical operations (inverted second input)
  92 ;; ---- [INT] Shifts (rounding towards -Inf)
  93 ;; ---- [INT] Shifts (rounding towards 0)
  94 ;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs
  95 ;; ---- [FP] General binary arithmetic corresponding to rtx codes
  96 ;; ---- [FP] General binary arithmetic corresponding to unspecs
  97 ;; ---- [FP] Addition
  98 ;; ---- [FP] Complex addition
  99 ;; ---- [FP] Subtraction
 100 ;; ---- [FP] Absolute difference
 101 ;; ---- [FP] Multiplication
 102 ;; ---- [FP] Binary logical operations
 103 ;; ---- [FP] Sign copying
 104 ;; ---- [FP] Maximum and minimum
 105 ;; ---- [PRED] Binary logical operations
 106 ;; ---- [PRED] Binary logical operations (inverted second input)
 107 ;; ---- [PRED] Binary logical operations (inverted result)
 108 ;;
 109 ;; == Ternary arithmetic
 110 ;; ---- [INT] MLA and MAD
 111 ;; ---- [INT] MLS and MSB
 112 ;; ---- [INT] Dot product
 113 ;; ---- [INT] Sum of absolute differences
 114 ;; ---- [FP] General ternary arithmetic corresponding to unspecs
 115 ;; ---- [FP] Complex multiply-add
 116 ;; ---- [FP] Trigonometric multiply-add
 117 ;;
 118 ;; == Comparisons and selects
 119 ;; ---- [INT,FP] Select based on predicates
 120 ;; ---- [INT,FP] Compare and select
 121 ;; ---- [INT] Comparisons
 122 ;; ---- [INT] While tests
 123 ;; ---- [FP] Direct comparisons
 124 ;; ---- [FP] Absolute comparisons
 125 ;; ---- [PRED] Select
 126 ;; ---- [PRED] Test bits
 127 ;;
 128 ;; == Reductions
 129 ;; ---- [INT,FP] Conditional reductions
 130 ;; ---- [INT] Tree reductions
 131 ;; ---- [FP] Tree reductions
 132 ;; ---- [FP] Left-to-right reductions
 133 ;;
 134 ;; == Permutes
 135 ;; ---- [INT,FP] General permutes
 136 ;; ---- [INT,FP] Special-purpose unary permutes
 137 ;; ---- [INT,FP] Special-purpose binary permutes
 138 ;; ---- [PRED] Special-purpose unary permutes
 139 ;; ---- [PRED] Special-purpose binary permutes
 140 ;;
 141 ;; == Conversions
 142 ;; ---- [INT<-INT] Packs
 143 ;; ---- [INT<-INT] Unpacks
 144 ;; ---- [INT<-FP] Conversions
 145 ;; ---- [INT<-FP] Packs
 146 ;; ---- [INT<-FP] Unpacks
 147 ;; ---- [FP<-INT] Conversions
 148 ;; ---- [FP<-INT] Packs
 149 ;; ---- [FP<-INT] Unpacks
 150 ;; ---- [FP<-FP] Packs
 151 ;; ---- [FP<-FP] Unpacks
 152 ;; ---- [PRED<-PRED] Packs
 153 ;; ---- [PRED<-PRED] Unpacks
 154 ;;
 155 ;; == Vector partitioning
 156 ;; ---- [PRED] Unary partitioning
 157 ;; ---- [PRED] Binary partitioning
 158 ;; ---- [PRED] Scalarization
 159 ;;
 160 ;; == Counting elements
 161 ;; ---- [INT] Count elements in a pattern (scalar)
 162 ;; ---- [INT] Increment by the number of elements in a pattern (scalar)
 163 ;; ---- [INT] Increment by the number of elements in a pattern (vector)
 164 ;; ---- [INT] Decrement by the number of elements in a pattern (scalar)
 165 ;; ---- [INT] Decrement by the number of elements in a pattern (vector)
 166 ;; ---- [INT] Count elements in a predicate (scalar)
 167 ;; ---- [INT] Increment by the number of elements in a predicate (scalar)
 168 ;; ---- [INT] Increment by the number of elements in a predicate (vector)
 169 ;; ---- [INT] Decrement by the number of elements in a predicate (scalar)
 170 ;; ---- [INT] Decrement by the number of elements in a predicate (vector)
 171
 172 ;; =========================================================================
 173 ;; == General notes
 174 ;; =========================================================================
 175 ;;
 176 ;; -------------------------------------------------------------------------
 177 ;; ---- Note on the handling of big-endian SVE
 178 ;; -------------------------------------------------------------------------
 179 ;;
 180 ;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the
 181 ;; same way as movdi or movti would: the first byte of memory goes
 182 ;; into the most significant byte of the register and the last byte
 183 ;; of memory goes into the least significant byte of the register.
 184 ;; This is the most natural ordering for Advanced SIMD and matches
 185 ;; the ABI layout for 64-bit and 128-bit vector types.
 186 ;;
 187 ;; As a result, the order of bytes within the register is what GCC
 188 ;; expects for a big-endian target, and subreg offsets therefore work
 189 ;; as expected, with the first element in memory having subreg offset 0
 190 ;; and the last element in memory having the subreg offset associated
 191 ;; with a big-endian lowpart.  However, this ordering also means that
 192 ;; GCC's lane numbering does not match the architecture's numbering:
 193 ;; GCC always treats the element at the lowest address in memory
 194 ;; (subreg offset 0) as element 0, while the architecture treats
 195 ;; the least significant end of the register as element 0.
 196 ;;
 197 ;; The situation for SVE is different.  We want the layout of the
 198 ;; SVE register to be same for mov<mode> as it is for maskload<mode>:
 199 ;; logically, a mov<mode> load must be indistinguishable from a
 200 ;; maskload<mode> whose mask is all true.  We therefore need the
 201 ;; register layout to match LD1 rather than LDR.  The ABI layout of
 202 ;; SVE types also matches LD1 byte ordering rather than LDR byte ordering.
 203 ;;
 204 ;; As a result, the architecture lane numbering matches GCC's lane
 205 ;; numbering, with element 0 always being the first in memory.
 206 ;; However:
 207 ;;
 208 ;; - Applying a subreg offset to a register does not give the element
 209 ;;   that GCC expects: the first element in memory has the subreg offset
 210 ;;   associated with a big-endian lowpart while the last element in memory
 211 ;;   has subreg offset 0.  We handle this via TARGET_CAN_CHANGE_MODE_CLASS.
 212 ;;
 213 ;; - We cannot use LDR and STR for spill slots that might be accessed
 214 ;;   via subregs, since although the elements have the order GCC expects,
 215 ;;   the order of the bytes within the elements is different.  We instead
 216 ;;   access spill slots via LD1 and ST1, using secondary reloads to
 217 ;;   reserve a predicate register.
 218 ;;
 219 ;; -------------------------------------------------------------------------
 220 ;; ---- Description of UNSPEC_PTEST
 221 ;; -------------------------------------------------------------------------
 222 ;;
 223 ;; SVE provides a PTEST instruction for testing the active lanes of a
 224 ;; predicate and setting the flags based on the result.  The associated
 225 ;; condition code tests are:
 226 ;;
 227 ;; - any   (= ne): at least one active bit is set
 228 ;; - none  (= eq): all active bits are clear (*)
 229 ;; - first (= mi): the first active bit is set
 230 ;; - nfrst (= pl): the first active bit is clear (*)
 231 ;; - last  (= cc): the last active bit is set
 232 ;; - nlast (= cs): the last active bit is clear (*)
 233 ;;
 234 ;; where the conditions marked (*) are also true when there are no active
 235 ;; lanes (i.e. when the governing predicate is a PFALSE).  The flags results
 236 ;; of a PTEST use the condition code mode CC_NZC.
 237 ;;
 238 ;; PTEST is always a .B operation (i.e. it always operates on VNx16BI).
 239 ;; This means that for other predicate modes, we need a governing predicate
 240 ;; in which all bits are defined.
 241 ;;
 242 ;; For example, most predicated .H operations ignore the odd bits of the
 243 ;; governing predicate, so that an active lane is represented by the
 244 ;; bits "1x" and an inactive lane by the bits "0x", where "x" can be
 245 ;; any value.  To test a .H predicate, we instead need "10" and "00"
 246 ;; respectively, so that the condition only tests the even bits of the
 247 ;; predicate.
 248 ;;
 249 ;; Several instructions set the flags as a side-effect, in the same way
 250 ;; that a separate PTEST would.  It's important for code quality that we
 251 ;; use these flags results as often as possible, particularly in the case
 252 ;; of WHILE* and RDFFR.
 253 ;;
 254 ;; Also, some of the instructions that set the flags are unpredicated
 255 ;; and instead implicitly test all .B, .H, .S or .D elements, as though
 256 ;; they were predicated on a PTRUE of that size.  For example, a .S
 257 ;; WHILELO sets the flags in the same way as a PTEST with a .S PTRUE
 258 ;; would.
 259 ;;
 260 ;; We therefore need to represent PTEST operations in a way that
 261 ;; makes it easy to combine them with both predicated and unpredicated
 262 ;; operations, while using a VNx16BI governing predicate for all
 263 ;; predicate modes.  We do this using:
 264 ;;
 265 ;;   (unspec:CC_NZC [gp cast_gp ptrue_flag op] UNSPEC_PTEST)
 266 ;;
 267 ;; where:
 268 ;;
 269 ;; - GP is the real VNx16BI governing predicate
 270 ;;
 271 ;; - CAST_GP is GP cast to the mode of OP.  All bits dropped by casting
 272 ;;   GP to CAST_GP are guaranteed to be clear in GP.
 273 ;;
 274 ;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
 275 ;;   SVE_KNOWN_PTRUE if we know that CAST_GP (rather than GP) is all-true and
 276 ;;   SVE_MAYBE_NOT_PTRUE otherwise.
 277 ;;
 278 ;; - OP is the predicate we want to test, of the same mode as CAST_GP.
 279 ;;
 280 ;; -------------------------------------------------------------------------
 281 ;; ---- Description of UNSPEC_PRED_Z
 282 ;; -------------------------------------------------------------------------
 283 ;;
 284 ;; SVE integer comparisons are predicated and return zero for inactive
 285 ;; lanes.  Sometimes we use them with predicates that are all-true and
 286 ;; sometimes we use them with general predicates.
 287 ;;
 288 ;; The integer comparisons also set the flags and so build-in the effect
 289 ;; of a PTEST.  We therefore want to be able to combine integer comparison
 290 ;; patterns with PTESTs of the result.  One difficulty with doing this is
 291 ;; that (as noted above) the PTEST is always a .B operation and so can place
 292 ;; stronger requirements on the governing predicate than the comparison does.
 293 ;;
 294 ;; For example, when applying a separate PTEST to the result of a full-vector
 295 ;; .H comparison, the PTEST must be predicated on a .H PTRUE instead of a
 296 ;; .B PTRUE.  In constrast, the comparison might be predicated on either
 297 ;; a .H PTRUE or a .B PTRUE, since the values of odd-indexed predicate
 298 ;; bits don't matter for .H operations.
 299 ;;
 300 ;; We therefore can't rely on a full-vector comparison using the same
 301 ;; predicate register as a following PTEST.  We instead need to remember
 302 ;; whether a comparison is known to be a full-vector comparison and use
 303 ;; this information in addition to a check for equal predicate registers.
 304 ;; At the same time, it's useful to have a common representation for all
 305 ;; integer comparisons, so that they can be handled by a single set of
 306 ;; patterns.
 307 ;;
 308 ;; We therefore take a similar approach to UNSPEC_PTEST above and use:
 309 ;;
 310 ;;   (unspec:<M:VPRED> [gp ptrue_flag (code:M op0 op1)] UNSPEC_PRED_Z)
 311 ;;
 312 ;; where:
 313 ;;
 314 ;; - GP is the governing predicate, of mode <M:VPRED>
 315 ;;
 316 ;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
 317 ;;   SVE_KNOWN_PTRUE if we know that GP is all-true and SVE_MAYBE_NOT_PTRUE
 318 ;;   otherwise
 319 ;;
 320 ;; - CODE is the comparison code
 321 ;;
 322 ;; - OP0 and OP1 are the values being compared, of mode M
 323 ;;
 324 ;; The "Z" in UNSPEC_PRED_Z indicates that inactive lanes are zero.
 325 ;;
 326 ;; -------------------------------------------------------------------------
 327 ;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
 328 ;; -------------------------------------------------------------------------
 329 ;;
 330 ;; Many SVE integer operations are predicated.  We can generate them
 331 ;; from four sources:
 332 ;;
 333 ;; (1) Using normal unpredicated optabs.  In this case we need to create
 334 ;;     an all-true predicate register to act as the governing predicate
 335 ;;     for the SVE instruction.  There are no inactive lanes, and thus
 336 ;;     the values of inactive lanes don't matter.
 337 ;;
 338 ;; (2) Using _x ACLE functions.  In this case the function provides a
 339 ;;     specific predicate and some lanes might be inactive.  However,
 340 ;;     as for (1), the values of the inactive lanes don't matter.
 341 ;;     We can make extra lanes active without changing the behavior
 342 ;;     (although for code-quality reasons we should avoid doing so
 343 ;;     needlessly).
 344 ;;
 345 ;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
 346 ;;     These optabs have a predicate operand that specifies which lanes are
 347 ;;     active and another operand that provides the values of inactive lanes.
 348 ;;
 349 ;; (4) Using _m and _z ACLE functions.  These functions map to the same
 350 ;;     patterns as (3), with the _z functions setting inactive lanes to zero
 351 ;;     and the _m functions setting the inactive lanes to one of the function
 352 ;;     arguments.
 353 ;;
 354 ;; For (1) and (2) we need a way of attaching the predicate to a normal
 355 ;; unpredicated integer operation.  We do this using:
 356 ;;
 357 ;;   (unspec:M [pred (code:M (op0 op1 ...))] UNSPEC_PRED_X)
 358 ;;
 359 ;; where (code:M (op0 op1 ...)) is the normal integer operation and PRED
 360 ;; is a predicate of mode <M:VPRED>.  PRED might or might not be a PTRUE;
 361 ;; it always is for (1), but might not be for (2).
 362 ;;
 363 ;; The unspec as a whole has the same value as (code:M ...) when PRED is
 364 ;; all-true.  It is always semantically valid to replace PRED with a PTRUE,
 365 ;; but as noted above, we should only do so if there's a specific benefit.
 366 ;;
 367 ;; (The "_X" in the unspec is named after the ACLE functions in (2).)
 368 ;;
 369 ;; For (3) and (4) we can simply use the SVE port's normal representation
 370 ;; of a predicate-based select:
 371 ;;
 372 ;;   (unspec:M [pred (code:M (op0 op1 ...)) inactive] UNSPEC_SEL)
 373 ;;
 374 ;; where INACTIVE specifies the values of inactive lanes.
 375 ;;
 376 ;; We can also use the UNSPEC_PRED_X wrapper in the UNSPEC_SEL rather
 377 ;; than inserting the integer operation directly.  This is mostly useful
 378 ;; if we want the combine pass to merge an integer operation with an explicit
 379 ;; vcond_mask (in other words, with a following SEL instruction).  However,
 380 ;; it's generally better to merge such operations at the gimple level
 381 ;; using (3).
 382 ;;
 383 ;; -------------------------------------------------------------------------
 384 ;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
 385 ;; -------------------------------------------------------------------------
 386 ;;
 387 ;; Most SVE floating-point operations are predicated.  We can generate
 388 ;; them from four sources:
 389 ;;
 390 ;; (1) Using normal unpredicated optabs.  In this case we need to create
 391 ;;     an all-true predicate register to act as the governing predicate
 392 ;;     for the SVE instruction.  There are no inactive lanes, and thus
 393 ;;     the values of inactive lanes don't matter.
 394 ;;
 395 ;; (2) Using _x ACLE functions.  In this case the function provides a
 396 ;;     specific predicate and some lanes might be inactive.  However,
 397 ;;     as for (1), the values of the inactive lanes don't matter.
 398 ;;
 399 ;;     The instruction must have the same exception behavior as the
 400 ;;     function call unless things like command-line flags specifically
 401 ;;     allow otherwise.  For example, with -ffast-math, it is OK to
 402 ;;     raise exceptions for inactive lanes, but normally it isn't.
 403 ;;
 404 ;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
 405 ;;     These optabs have a predicate operand that specifies which lanes are
 406 ;;     active and another operand that provides the values of inactive lanes.
 407 ;;
 408 ;; (4) Using _m and _z ACLE functions.  These functions map to the same
 409 ;;     patterns as (3), with the _z functions setting inactive lanes to zero
 410 ;;     and the _m functions setting the inactive lanes to one of the function
 411 ;;     arguments.
 412 ;;
 413 ;; So:
 414 ;;
 415 ;; - In (1), the predicate is known to be all true and the pattern can use
 416 ;;   unpredicated operations where available.
 417 ;;
 418 ;; - In (2), the predicate might or might not be all true.  The pattern can
 419 ;;   use unpredicated instructions if the predicate is all-true or if things
 420 ;;   like command-line flags allow exceptions for inactive lanes.
 421 ;;
 422 ;; - (3) and (4) represent a native SVE predicated operation.  Some lanes
 423 ;;   might be inactive and inactive lanes of the result must have specific
 424 ;;   values.  There is no scope for using unpredicated instructions (and no
 425 ;;   reason to want to), so the question about command-line flags doesn't
 426 ;;   arise.
 427 ;;
 428 ;; It would be inaccurate to model (2) as an rtx code like (sqrt ...)
 429 ;; in combination with a separate predicate operand, e.g.
 430 ;;
 431 ;;   (unspec [(match_operand:<VPRED> 1 "register_operand" "Upl")
 432 ;;            (sqrt:SVE_FULL_F 2 "register_operand" "w")]
 433 ;;           ....)
 434 ;;
 435 ;; because (sqrt ...) can raise an exception for any lane, including
 436 ;; inactive ones.  We therefore need to use an unspec instead.
 437 ;;
 438 ;; Also, (2) requires some way of distinguishing the case in which the
 439 ;; predicate might have inactive lanes and cannot be changed from the
 440 ;; case in which the predicate has no inactive lanes or can be changed.
 441 ;; This information is also useful when matching combined FP patterns
 442 ;; in which the predicates might not be equal.
 443 ;;
 444 ;; We therefore model FP operations as an unspec of the form:
 445 ;;
 446 ;;   (unspec [pred strictness op0 op1 ...] UNSPEC_COND_<MNEMONIC>)
 447 ;;
 448 ;; where:
 449 ;;
 450 ;; - PRED is the governing predicate.
 451 ;;
 452 ;; - STRICTNESS is a CONST_INT that conceptually has mode SI.  It has the
 453 ;;   value SVE_STRICT_GP if PRED might have inactive lanes and if those
 454 ;;   lanes must remain inactive.  It has the value SVE_RELAXED_GP otherwise.
 455 ;;
 456 ;; - OP0 OP1 ... are the normal input operands to the operation.
 457 ;;
 458 ;; - MNEMONIC is the mnemonic of the associated SVE instruction.
 459 ;;
 460 ;; -------------------------------------------------------------------------
 461 ;; ---- Note on FFR handling
 462 ;; -------------------------------------------------------------------------
 463 ;;
 464 ;; Logically we want to divide FFR-related instructions into regions
 465 ;; that contain exactly one of:
 466 ;;
 467 ;; - a single write to the FFR
 468 ;; - any number of reads from the FFR (but only one read is likely)
 469 ;; - any number of LDFF1 and LDNF1 instructions
 470 ;;
 471 ;; However, LDFF1 and LDNF1 instructions should otherwise behave like
 472 ;; normal loads as far as possible.  This means that they should be
 473 ;; schedulable within a region in the same way that LD1 would be,
 474 ;; and they should be deleted as dead if the result is unused.  The loads
 475 ;; should therefore not write to the FFR, since that would both serialize
 476 ;; the loads with respect to each other and keep the loads live for any
 477 ;; later RDFFR.
 478 ;;
 479 ;; We get around this by using a fake "FFR token" (FFRT) to help describe
 480 ;; the dependencies.  Writing to the FFRT starts a new "FFRT region",
 481 ;; while using the FFRT keeps the instruction within its region.
 482 ;; Specifically:
 483 ;;
 484 ;; - Writes start a new FFRT region as well as setting the FFR:
 485 ;;
 486 ;;       W1: parallel (FFRT = <new value>, FFR = <actual FFR value>)
 487 ;;
 488 ;; - Loads use an LD1-like instruction that also uses the FFRT, so that the
 489 ;;   loads stay within the same FFRT region:
 490 ;;
 491 ;;       L1: load data while using the FFRT
 492 ;;
 493 ;;   In addition, any FFRT region that includes a load also has at least one
 494 ;;   instance of:
 495 ;;
 496 ;;       L2: FFR = update(FFR, FFRT)  [type == no_insn]
 497 ;;
 498 ;;   to make it clear that the region both reads from and writes to the FFR.
 499 ;;
 500 ;; - Reads do the following:
 501 ;;
 502 ;;       R1: FFRT = FFR               [type == no_insn]
 503 ;;       R2: read from the FFRT
 504 ;;       R3: FFRT = update(FFRT)      [type == no_insn]
 505 ;;
 506 ;;   R1 and R3 both create new FFRT regions, so that previous LDFF1s and
 507 ;;   LDNF1s cannot move forwards across R1 and later LDFF1s and LDNF1s
 508 ;;   cannot move backwards across R3.
 509 ;;
 510 ;; This way, writes are only kept alive by later loads or reads,
 511 ;; and write/read pairs fold normally.  For two consecutive reads,
 512 ;; the first R3 is made dead by the second R1, which in turn becomes
 513 ;; redundant with the first R1.  We then have:
 514 ;;
 515 ;;     first R1: FFRT = FFR
 516 ;;     first read from the FFRT
 517 ;;     second read from the FFRT
 518 ;;     second R3: FFRT = update(FFRT)
 519 ;;
 520 ;; i.e. the two FFRT regions collapse into a single one with two
 521 ;; independent reads.
 522 ;;
 523 ;; The model still prevents some valid optimizations though.  For example,
 524 ;; if all loads in an FFRT region are deleted as dead, nothing would remove
 525 ;; the L2 instructions.
 526
 527 ;; =========================================================================
 528 ;; == Moves
 529 ;; =========================================================================
 530
 531 ;; -------------------------------------------------------------------------
 532 ;; ---- Moves of single vectors
 533 ;; -------------------------------------------------------------------------
 534 ;; Includes:
 535 ;; - MOV  (including aliases)
 536 ;; - LD1B (contiguous form)
 537 ;; - LD1D (    "    "     )
 538 ;; - LD1H (    "    "     )
 539 ;; - LD1W (    "    "     )
 540 ;; - LDR
 541 ;; - ST1B (contiguous form)
 542 ;; - ST1D (    "    "     )
 543 ;; - ST1H (    "    "     )
 544 ;; - ST1W (    "    "     )
 545 ;; - STR
 546 ;; -------------------------------------------------------------------------
 547
 548 (define_expand "mov<mode>"
 549   [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
 550         (match_operand:SVE_ALL 1 "general_operand"))]
 551   "TARGET_SVE"
 552   {
 553     /* Use the predicated load and store patterns where possible.
 554        This is required for big-endian targets (see the comment at the
 555        head of the file) and increases the addressing choices for
 556        little-endian.  */
 557     if ((MEM_P (operands[0]) || MEM_P (operands[1]))
 558         && can_create_pseudo_p ())
 559       {
 560         aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
 561         DONE;
 562       }
 563
 564     if (CONSTANT_P (operands[1]))
 565       {
 566         aarch64_expand_mov_immediate (operands[0], operands[1]);
 567         DONE;
 568       }
 569
 570     /* Optimize subregs on big-endian targets: we can use REV[BHW]
 571        instead of going through memory.  */
 572     if (BYTES_BIG_ENDIAN
 573         && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1]))
 574       DONE;
 575   }
 576 )
 577
 578 (define_expand "movmisalign<mode>"
 579   [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
 580         (match_operand:SVE_ALL 1 "general_operand"))]
 581   "TARGET_SVE"
 582   {
 583     /* Equivalent to a normal move for our purpooses.  */
 584     emit_move_insn (operands[0], operands[1]);
 585     DONE;
 586   }
 587 )
 588
 589 ;; Unpredicated moves that can use LDR and STR, i.e. full vectors for which
 590 ;; little-endian ordering is acceptable.  Only allow memory operations during
 591 ;; and after RA; before RA we want the predicated load and store patterns to
 592 ;; be used instead.
 593 (define_insn "*aarch64_sve_mov<mode>_ldr_str"
 594   [(set (match_operand:SVE_FULL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
 595         (match_operand:SVE_FULL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
 596   "TARGET_SVE
 597    && (<MODE>mode == VNx16QImode || !BYTES_BIG_ENDIAN)
 598    && ((lra_in_progress || reload_completed)
 599        || (register_operand (operands[0], <MODE>mode)
 600            && nonmemory_operand (operands[1], <MODE>mode)))"
 601   "@
 602    ldr\t%0, %1
 603    str\t%1, %0
 604    mov\t%0.d, %1.d
 605    * return aarch64_output_sve_mov_immediate (operands[1]);"
 606 )
 607
 608 ;; Unpredicated moves that cannot use LDR and STR, i.e. partial vectors
 609 ;; or vectors for which little-endian ordering isn't acceptable.  Memory
 610 ;; accesses require secondary reloads.
 611 (define_insn "*aarch64_sve_mov<mode>_no_ldr_str"
 612   [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
 613         (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))]
 614   "TARGET_SVE
 615    && <MODE>mode != VNx16QImode
 616    && (BYTES_BIG_ENDIAN
 617        || maybe_ne (BYTES_PER_SVE_VECTOR, GET_MODE_SIZE (<MODE>mode)))"
 618   "@
 619    mov\t%0.d, %1.d
 620    * return aarch64_output_sve_mov_immediate (operands[1]);"
 621 )
 622
 623 ;; Handle memory reloads for modes that can't use LDR and STR.  We use
 624 ;; byte PTRUE for all modes to try to encourage reuse.  This pattern
 625 ;; needs constraints because it is returned by TARGET_SECONDARY_RELOAD.
 626 (define_expand "aarch64_sve_reload_mem"
 627   [(parallel
 628      [(set (match_operand 0)
 629            (match_operand 1))
 630       (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])]
 631   "TARGET_SVE"
 632   {
 633     /* Create a PTRUE.  */
 634     emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
 635
 636     /* Refer to the PTRUE in the appropriate mode for this move.  */
 637     machine_mode mode = GET_MODE (operands[0]);
 638     rtx pred = gen_lowpart (aarch64_sve_pred_mode (mode), operands[2]);
 639
 640     /* Emit a predicated load or store.  */
 641     aarch64_emit_sve_pred_move (operands[0], pred, operands[1]);
 642     DONE;
 643   }
 644 )
 645
 646 ;; A predicated move in which the predicate is known to be all-true.
 647 ;; Note that this pattern is generated directly by aarch64_emit_sve_pred_move,
 648 ;; so changes to this pattern will need changes there as well.
 649 (define_insn_and_split "@aarch64_pred_mov<mode>"
 650   [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, w, m")
 651         (unspec:SVE_ALL
 652           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
 653            (match_operand:SVE_ALL 2 "nonimmediate_operand" "w, m, w")]
 654           UNSPEC_PRED_X))]
 655   "TARGET_SVE
 656    && (register_operand (operands[0], <MODE>mode)
 657        || register_operand (operands[2], <MODE>mode))"
 658   "@
 659    #
 660    ld1<Vesize>\t%0.<Vctype>, %1/z, %2
 661    st1<Vesize>\t%2.<Vctype>, %1, %0"
 662   "&& register_operand (operands[0], <MODE>mode)
 663    && register_operand (operands[2], <MODE>mode)"
 664   [(set (match_dup 0) (match_dup 2))]
 665 )
 666
 667 ;; A pattern for optimizing SUBREGs that have a reinterpreting effect
 668 ;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move
 669 ;; for details.  We use a special predicate for operand 2 to reduce
 670 ;; the number of patterns.
 671 (define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be"
 672   [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w")
 673         (unspec:SVE_ALL
 674           [(match_operand:VNx16BI 1 "register_operand" "Upl")
 675            (match_operand 2 "aarch64_any_register_operand" "w")]
 676           UNSPEC_REV_SUBREG))]
 677   "TARGET_SVE && BYTES_BIG_ENDIAN"
 678   "#"
 679   "&& reload_completed"
 680   [(const_int 0)]
 681   {
 682     aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]);
 683     DONE;
 684   }
 685 )
 686
 687 ;; Reinterpret operand 1 in operand 0's mode, without changing its contents.
 688 ;; This is equivalent to a subreg on little-endian targets but not for
 689 ;; big-endian; see the comment at the head of the file for details.
 690 (define_expand "@aarch64_sve_reinterpret<mode>"
 691   [(set (match_operand:SVE_ALL 0 "register_operand")
 692         (unspec:SVE_ALL
 693           [(match_operand 1 "aarch64_any_register_operand")]
 694           UNSPEC_REINTERPRET))]
 695   "TARGET_SVE"
 696   {
 697     machine_mode src_mode = GET_MODE (operands[1]);
 698     if (targetm.can_change_mode_class (<MODE>mode, src_mode, FP_REGS))
 699       {
 700         emit_move_insn (operands[0], gen_lowpart (<MODE>mode, operands[1]));
 701         DONE;
 702       }
 703   }
 704 )
 705
 706 ;; A pattern for handling type punning on big-endian targets.  We use a
 707 ;; special predicate for operand 1 to reduce the number of patterns.
 708 (define_insn_and_split "*aarch64_sve_reinterpret<mode>"
 709   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
 710         (unspec:SVE_ALL
 711           [(match_operand 1 "aarch64_any_register_operand" "w")]
 712           UNSPEC_REINTERPRET))]
 713   "TARGET_SVE"
 714   "#"
 715   "&& reload_completed"
 716   [(set (match_dup 0) (match_dup 1))]
 717   {
 718     operands[1] = aarch64_replace_reg_mode (operands[1], <MODE>mode);
 719   }
 720 )
 721
 722 ;; -------------------------------------------------------------------------
 723 ;; ---- Moves of multiple vectors
 724 ;; -------------------------------------------------------------------------
 725 ;; All patterns in this section are synthetic and split to real
 726 ;; instructions after reload.
 727 ;; -------------------------------------------------------------------------
 728
 729 (define_expand "mov<mode>"
 730   [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand")
 731         (match_operand:SVE_STRUCT 1 "general_operand"))]
 732   "TARGET_SVE"
 733   {
 734     /* Big-endian loads and stores need to be done via LD1 and ST1;
 735        see the comment at the head of the file for details.  */
 736     if ((MEM_P (operands[0]) || MEM_P (operands[1]))
 737         && BYTES_BIG_ENDIAN)
 738       {
 739         gcc_assert (can_create_pseudo_p ());
 740         aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
 741         DONE;
 742       }
 743
 744     if (CONSTANT_P (operands[1]))
 745       {
 746         aarch64_expand_mov_immediate (operands[0], operands[1]);
 747         DONE;
 748       }
 749   }
 750 )
 751
 752 ;; Unpredicated structure moves (little-endian).
 753 (define_insn "*aarch64_sve_mov<mode>_le"
 754   [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
 755         (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
 756   "TARGET_SVE && !BYTES_BIG_ENDIAN"
 757   "#"
 758   [(set_attr "length" "<insn_length>")]
 759 )
 760
 761 ;; Unpredicated structure moves (big-endian).  Memory accesses require
 762 ;; secondary reloads.
 763 (define_insn "*aarch64_sve_mov<mode>_be"
 764   [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w")
 765         (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))]
 766   "TARGET_SVE && BYTES_BIG_ENDIAN"
 767   "#"
 768   [(set_attr "length" "<insn_length>")]
 769 )
 770
 771 ;; Split unpredicated structure moves into pieces.  This is the same
 772 ;; for both big-endian and little-endian code, although it only needs
 773 ;; to handle memory operands for little-endian code.
 774 (define_split
 775   [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand")
 776         (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))]
 777   "TARGET_SVE && reload_completed"
 778   [(const_int 0)]
 779   {
 780     rtx dest = operands[0];
 781     rtx src = operands[1];
 782     if (REG_P (dest) && REG_P (src))
 783       aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>);
 784     else
 785       for (unsigned int i = 0; i < <vector_count>; ++i)
 786         {
 787           rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode,
 788                                              i * BYTES_PER_SVE_VECTOR);
 789           rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode,
 790                                             i * BYTES_PER_SVE_VECTOR);
 791           emit_insn (gen_rtx_SET (subdest, subsrc));
 792         }
 793     DONE;
 794   }
 795 )
 796
 797 ;; Predicated structure moves.  This works for both endiannesses but in
 798 ;; practice is only useful for big-endian.
 799 (define_insn_and_split "@aarch64_pred_mov<mode>"
 800   [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, w, Utx")
 801         (unspec:SVE_STRUCT
 802           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
 803            (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "w, Utx, w")]
 804           UNSPEC_PRED_X))]
 805   "TARGET_SVE
 806    && (register_operand (operands[0], <MODE>mode)
 807        || register_operand (operands[2], <MODE>mode))"
 808   "#"
 809   "&& reload_completed"
 810   [(const_int 0)]
 811   {
 812     for (unsigned int i = 0; i < <vector_count>; ++i)
 813       {
 814         rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0],
 815                                            <MODE>mode,
 816                                            i * BYTES_PER_SVE_VECTOR);
 817         rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2],
 818                                           <MODE>mode,
 819                                           i * BYTES_PER_SVE_VECTOR);
 820         aarch64_emit_sve_pred_move (subdest, operands[1], subsrc);
 821       }
 822     DONE;
 823   }
 824   [(set_attr "length" "<insn_length>")]
 825 )
 826
 827 ;; -------------------------------------------------------------------------
 828 ;; ---- Moves of predicates
 829 ;; -------------------------------------------------------------------------
 830 ;; Includes:
 831 ;; - MOV
 832 ;; - LDR
 833 ;; - PFALSE
 834 ;; - PTRUE
 835 ;; - PTRUES
 836 ;; - STR
 837 ;; -------------------------------------------------------------------------
 838
 839 (define_expand "mov<mode>"
 840   [(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
 841         (match_operand:PRED_ALL 1 "general_operand"))]
 842   "TARGET_SVE"
 843   {
 844     if (GET_CODE (operands[0]) == MEM)
 845       operands[1] = force_reg (<MODE>mode, operands[1]);
 846
 847     if (CONSTANT_P (operands[1]))
 848       {
 849         aarch64_expand_mov_immediate (operands[0], operands[1]);
 850         DONE;
 851       }
 852   }
 853 )
 854
 855 (define_insn "*aarch64_sve_mov<mode>"
 856   [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa")
 857         (match_operand:PRED_ALL 1 "aarch64_mov_operand" "Upa, Upa, m, Dn"))]
 858   "TARGET_SVE
 859    && (register_operand (operands[0], <MODE>mode)
 860        || register_operand (operands[1], <MODE>mode))"
 861   "@
 862    mov\t%0.b, %1.b
 863    str\t%1, %0
 864    ldr\t%0, %1
 865    * return aarch64_output_sve_mov_immediate (operands[1]);"
 866 )
 867
 868 ;; Match PTRUES Pn.B when both the predicate and flags are useful.
 869 (define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_cc"
 870   [(set (reg:CC_NZC CC_REGNUM)
 871         (unspec:CC_NZC
 872           [(match_operand 2)
 873            (match_operand 3)
 874            (const_int SVE_KNOWN_PTRUE)
 875            (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
 876              [(unspec:VNx16BI
 877                 [(match_operand:SI 4 "const_int_operand")
 878                  (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")]
 879                 UNSPEC_PTRUE)])]
 880           UNSPEC_PTEST))
 881    (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
 882         (match_dup 1))]
 883   "TARGET_SVE"
 884   {
 885     return aarch64_output_sve_ptrues (operands[1]);
 886   }
 887   "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
 888   {
 889     operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode);
 890   }
 891 )
 892
 893 ;; Match PTRUES Pn.[HSD] when both the predicate and flags are useful.
 894 (define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_cc"
 895   [(set (reg:CC_NZC CC_REGNUM)
 896         (unspec:CC_NZC
 897           [(match_operand 2)
 898            (match_operand 3)
 899            (const_int SVE_KNOWN_PTRUE)
 900            (subreg:PRED_HSD
 901              (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
 902                [(unspec:VNx16BI
 903                   [(match_operand:SI 4 "const_int_operand")
 904                    (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")]
 905                   UNSPEC_PTRUE)]) 0)]
 906           UNSPEC_PTEST))
 907    (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
 908         (match_dup 1))]
 909   "TARGET_SVE"
 910   {
 911     return aarch64_output_sve_ptrues (operands[1]);
 912   }
 913   "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
 914   {
 915     operands[2] = CONSTM1_RTX (VNx16BImode);
 916     operands[3] = CONSTM1_RTX (<MODE>mode);
 917   }
 918 )
 919
 920 ;; Match PTRUES Pn.B when only the flags result is useful (which is
 921 ;; a way of testing VL).
 922 (define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_ptest"
 923   [(set (reg:CC_NZC CC_REGNUM)
 924         (unspec:CC_NZC
 925           [(match_operand 2)
 926            (match_operand 3)
 927            (const_int SVE_KNOWN_PTRUE)
 928            (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
 929              [(unspec:VNx16BI
 930                 [(match_operand:SI 4 "const_int_operand")
 931                  (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")]
 932                 UNSPEC_PTRUE)])]
 933           UNSPEC_PTEST))
 934    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
 935   "TARGET_SVE"
 936   {
 937     return aarch64_output_sve_ptrues (operands[1]);
 938   }
 939   "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
 940   {
 941     operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode);
 942   }
 943 )
 944
 945 ;; Match PTRUES Pn.[HWD] when only the flags result is useful (which is
 946 ;; a way of testing VL).
 947 (define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_ptest"
 948   [(set (reg:CC_NZC CC_REGNUM)
 949         (unspec:CC_NZC
 950           [(match_operand 2)
 951            (match_operand 3)
 952            (const_int SVE_KNOWN_PTRUE)
 953            (subreg:PRED_HSD
 954              (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
 955                [(unspec:VNx16BI
 956                   [(match_operand:SI 4 "const_int_operand")
 957                    (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")]
 958                   UNSPEC_PTRUE)]) 0)]
 959           UNSPEC_PTEST))
 960    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
 961   "TARGET_SVE"
 962   {
 963     return aarch64_output_sve_ptrues (operands[1]);
 964   }
 965   "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
 966   {
 967     operands[2] = CONSTM1_RTX (VNx16BImode);
 968     operands[3] = CONSTM1_RTX (<MODE>mode);
 969   }
 970 )
 971
 972 ;; -------------------------------------------------------------------------
 973 ;; ---- Moves relating to the FFR
 974 ;; -------------------------------------------------------------------------
 975 ;; RDFFR
 976 ;; RDFFRS
 977 ;; SETFFR
 978 ;; WRFFR
 979 ;; -------------------------------------------------------------------------
 980
 981 ;; [W1 in the block comment above about FFR handling]
 982 ;;
 983 ;; Write to the FFR and start a new FFRT scheduling region.
 984 (define_insn "aarch64_wrffr"
 985   [(set (reg:VNx16BI FFR_REGNUM)
 986         (match_operand:VNx16BI 0 "aarch64_simd_reg_or_minus_one" "Dm, Upa"))
 987    (set (reg:VNx16BI FFRT_REGNUM)
 988         (unspec:VNx16BI [(match_dup 0)] UNSPEC_WRFFR))]
 989   "TARGET_SVE"
 990   "@
 991    setffr
 992    wrffr\t%0.b"
 993 )
 994
 995 ;; [L2 in the block comment above about FFR handling]
 996 ;;
 997 ;; Introduce a read from and write to the FFR in the current FFRT region,
 998 ;; so that the FFR value is live on entry to the region and so that the FFR
 999 ;; value visibly changes within the region.  This is used (possibly multiple
1000 ;; times) in an FFRT region that includes LDFF1 or LDNF1 instructions.
1001 (define_insn "aarch64_update_ffr_for_load"
1002   [(set (reg:VNx16BI FFR_REGNUM)
1003         (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)
1004                          (reg:VNx16BI FFR_REGNUM)] UNSPEC_UPDATE_FFR))]
1005   "TARGET_SVE"
1006   ""
1007   [(set_attr "type" "no_insn")]
1008 )
1009
1010 ;; [R1 in the block comment above about FFR handling]
1011 ;;
1012 ;; Notionally copy the FFR to the FFRT, so that the current FFR value
1013 ;; can be read from there by the RDFFR instructions below.  This acts
1014 ;; as a scheduling barrier for earlier LDFF1 and LDNF1 instructions and
1015 ;; creates a natural dependency with earlier writes.
1016 (define_insn "aarch64_copy_ffr_to_ffrt"
1017   [(set (reg:VNx16BI FFRT_REGNUM)
1018         (reg:VNx16BI FFR_REGNUM))]
1019   "TARGET_SVE"
1020   ""
1021   [(set_attr "type" "no_insn")]
1022 )
1023
1024 ;; [R2 in the block comment above about FFR handling]
1025 ;;
1026 ;; Read the FFR via the FFRT.
1027 (define_insn "aarch64_rdffr"
1028   [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1029         (reg:VNx16BI FFRT_REGNUM))]
1030   "TARGET_SVE"
1031   "rdffr\t%0.b"
1032 )
1033
1034 ;; Likewise with zero predication.
1035 (define_insn "aarch64_rdffr_z"
1036   [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1037         (and:VNx16BI
1038           (reg:VNx16BI FFRT_REGNUM)
1039           (match_operand:VNx16BI 1 "register_operand" "Upa")))]
1040   "TARGET_SVE"
1041   "rdffr\t%0.b, %1/z"
1042 )
1043
1044 ;; Read the FFR to test for a fault, without using the predicate result.
1045 (define_insn "*aarch64_rdffr_z_ptest"
1046   [(set (reg:CC_NZC CC_REGNUM)
1047         (unspec:CC_NZC
1048           [(match_operand:VNx16BI 1 "register_operand" "Upa")
1049            (match_dup 1)
1050            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
1051            (and:VNx16BI
1052              (reg:VNx16BI FFRT_REGNUM)
1053              (match_dup 1))]
1054           UNSPEC_PTEST))
1055    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1056   "TARGET_SVE"
1057   "rdffrs\t%0.b, %1/z"
1058 )
1059
1060 ;; Same for unpredicated RDFFR when tested with a known PTRUE.
1061 (define_insn "*aarch64_rdffr_ptest"
1062   [(set (reg:CC_NZC CC_REGNUM)
1063         (unspec:CC_NZC
1064           [(match_operand:VNx16BI 1 "register_operand" "Upa")
1065            (match_dup 1)
1066            (const_int SVE_KNOWN_PTRUE)
1067            (reg:VNx16BI FFRT_REGNUM)]
1068           UNSPEC_PTEST))
1069    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1070   "TARGET_SVE"
1071   "rdffrs\t%0.b, %1/z"
1072 )
1073
1074 ;; Read the FFR with zero predication and test the result.
1075 (define_insn "*aarch64_rdffr_z_cc"
1076   [(set (reg:CC_NZC CC_REGNUM)
1077         (unspec:CC_NZC
1078           [(match_operand:VNx16BI 1 "register_operand" "Upa")
1079            (match_dup 1)
1080            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
1081            (and:VNx16BI
1082              (reg:VNx16BI FFRT_REGNUM)
1083              (match_dup 1))]
1084           UNSPEC_PTEST))
1085    (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1086         (and:VNx16BI
1087           (reg:VNx16BI FFRT_REGNUM)
1088           (match_dup 1)))]
1089   "TARGET_SVE"
1090   "rdffrs\t%0.b, %1/z"
1091 )
1092
1093 ;; Same for unpredicated RDFFR when tested with a known PTRUE.
1094 (define_insn "*aarch64_rdffr_cc"
1095   [(set (reg:CC_NZC CC_REGNUM)
1096         (unspec:CC_NZC
1097           [(match_operand:VNx16BI 1 "register_operand" "Upa")
1098            (match_dup 1)
1099            (const_int SVE_KNOWN_PTRUE)
1100            (reg:VNx16BI FFRT_REGNUM)]
1101           UNSPEC_PTEST))
1102    (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1103         (reg:VNx16BI FFRT_REGNUM))]
1104   "TARGET_SVE"
1105   "rdffrs\t%0.b, %1/z"
1106 )
1107
1108 ;; [R3 in the block comment above about FFR handling]
1109 ;;
1110 ;; Arbitrarily update the FFRT after a read from the FFR.  This acts as
1111 ;; a scheduling barrier for later LDFF1 and LDNF1 instructions.
1112 (define_insn "aarch64_update_ffrt"
1113   [(set (reg:VNx16BI FFRT_REGNUM)
1114         (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)] UNSPEC_UPDATE_FFRT))]
1115   "TARGET_SVE"
1116   ""
1117   [(set_attr "type" "no_insn")]
1118 )
1119
1120 ;; =========================================================================
1121 ;; == Loads
1122 ;; =========================================================================
1123
1124 ;; -------------------------------------------------------------------------
1125 ;; ---- Normal contiguous loads
1126 ;; -------------------------------------------------------------------------
1127 ;; Includes contiguous forms of:
1128 ;; - LD1B
1129 ;; - LD1D
1130 ;; - LD1H
1131 ;; - LD1W
1132 ;; - LD2B
1133 ;; - LD2D
1134 ;; - LD2H
1135 ;; - LD2W
1136 ;; - LD3B
1137 ;; - LD3D
1138 ;; - LD3H
1139 ;; - LD3W
1140 ;; - LD4B
1141 ;; - LD4D
1142 ;; - LD4H
1143 ;; - LD4W
1144 ;; -------------------------------------------------------------------------
1145
1146 ;; Predicated LD1.
1147 (define_insn "maskload<mode><vpred>"
1148   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
1149         (unspec:SVE_ALL
1150           [(match_operand:<VPRED> 2 "register_operand" "Upl")
1151            (match_operand:SVE_ALL 1 "memory_operand" "m")]
1152           UNSPEC_LD1_SVE))]
1153   "TARGET_SVE"
1154   "ld1<Vesize>\t%0.<Vctype>, %2/z, %1"
1155 )
1156
1157 ;; Unpredicated LD[234].
1158 (define_expand "vec_load_lanes<mode><vsingle>"
1159   [(set (match_operand:SVE_STRUCT 0 "register_operand")
1160         (unspec:SVE_STRUCT
1161           [(match_dup 2)
1162            (match_operand:SVE_STRUCT 1 "memory_operand")]
1163           UNSPEC_LDN))]
1164   "TARGET_SVE"
1165   {
1166     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
1167   }
1168 )
1169
1170 ;; Predicated LD[234].
1171 (define_insn "vec_mask_load_lanes<mode><vsingle>"
1172   [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
1173         (unspec:SVE_STRUCT
1174           [(match_operand:<VPRED> 2 "register_operand" "Upl")
1175            (match_operand:SVE_STRUCT 1 "memory_operand" "m")]
1176           UNSPEC_LDN))]
1177   "TARGET_SVE"
1178   "ld<vector_count><Vesize>\t%0, %2/z, %1"
1179 )
1180
1181 ;; -------------------------------------------------------------------------
1182 ;; ---- Extending contiguous loads
1183 ;; -------------------------------------------------------------------------
1184 ;; Includes contiguous forms of:
1185 ;; LD1B
1186 ;; LD1H
1187 ;; LD1SB
1188 ;; LD1SH
1189 ;; LD1SW
1190 ;; LD1W
1191 ;; -------------------------------------------------------------------------
1192
1193 ;; Predicated load and extend, with 8 elements per 128-bit block.
1194 (define_insn_and_rewrite "@aarch64_load_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
1195   [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
1196         (unspec:SVE_HSDI
1197           [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
1198            (ANY_EXTEND:SVE_HSDI
1199              (unspec:SVE_PARTIAL_I
1200                [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
1201                 (match_operand:SVE_PARTIAL_I 1 "memory_operand" "m")]
1202                UNSPEC_LD1_SVE))]
1203           UNSPEC_PRED_X))]
1204   "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
1205   "ld1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
1206   "&& !CONSTANT_P (operands[3])"
1207   {
1208     operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
1209   }
1210 )
1211
1212 ;; -------------------------------------------------------------------------
1213 ;; ---- First-faulting contiguous loads
1214 ;; -------------------------------------------------------------------------
1215 ;; Includes contiguous forms of:
1216 ;; - LDFF1B
1217 ;; - LDFF1D
1218 ;; - LDFF1H
1219 ;; - LDFF1W
1220 ;; - LDNF1B
1221 ;; - LDNF1D
1222 ;; - LDNF1H
1223 ;; - LDNF1W
1224 ;; -------------------------------------------------------------------------
1225
1226 ;; Contiguous non-extending first-faulting or non-faulting loads.
1227 (define_insn "@aarch64_ld<fn>f1<mode>"
1228   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
1229         (unspec:SVE_FULL
1230           [(match_operand:<VPRED> 2 "register_operand" "Upl")
1231            (match_operand:SVE_FULL 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
1232            (reg:VNx16BI FFRT_REGNUM)]
1233           SVE_LDFF1_LDNF1))]
1234   "TARGET_SVE"
1235   "ld<fn>f1<Vesize>\t%0.<Vetype>, %2/z, %1"
1236 )
1237
1238 ;; -------------------------------------------------------------------------
1239 ;; ---- First-faulting extending contiguous loads
1240 ;; -------------------------------------------------------------------------
1241 ;; Includes contiguous forms of:
1242 ;; - LDFF1B
1243 ;; - LDFF1H
1244 ;; - LDFF1SB
1245 ;; - LDFF1SH
1246 ;; - LDFF1SW
1247 ;; - LDFF1W
1248 ;; - LDNF1B
1249 ;; - LDNF1H
1250 ;; - LDNF1SB
1251 ;; - LDNF1SH
1252 ;; - LDNF1SW
1253 ;; - LDNF1W
1254 ;; -------------------------------------------------------------------------
1255
1256 ;; Predicated first-faulting or non-faulting load and extend.
1257 (define_insn_and_rewrite "@aarch64_ld<fn>f1_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
1258   [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
1259         (unspec:SVE_HSDI
1260           [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
1261            (ANY_EXTEND:SVE_HSDI
1262              (unspec:SVE_PARTIAL_I
1263                [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
1264                 (match_operand:SVE_PARTIAL_I 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
1265                 (reg:VNx16BI FFRT_REGNUM)]
1266                SVE_LDFF1_LDNF1))]
1267           UNSPEC_PRED_X))]
1268   "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
1269   "ld<fn>f1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
1270   "&& !CONSTANT_P (operands[3])"
1271   {
1272     operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
1273   }
1274 )
1275
1276 ;; -------------------------------------------------------------------------
1277 ;; ---- Non-temporal contiguous loads
1278 ;; -------------------------------------------------------------------------
1279 ;; Includes:
1280 ;; - LDNT1B
1281 ;; - LDNT1D
1282 ;; - LDNT1H
1283 ;; - LDNT1W
1284 ;; -------------------------------------------------------------------------
1285
1286 ;; Predicated contiguous non-temporal load.
1287 (define_insn "@aarch64_ldnt1<mode>"
1288   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
1289         (unspec:SVE_FULL
1290           [(match_operand:<VPRED> 2 "register_operand" "Upl")
1291            (match_operand:SVE_FULL 1 "memory_operand" "m")]
1292           UNSPEC_LDNT1_SVE))]
1293   "TARGET_SVE"
1294   "ldnt1<Vesize>\t%0.<Vetype>, %2/z, %1"
1295 )
1296
1297 ;; -------------------------------------------------------------------------
1298 ;; ---- Normal gather loads
1299 ;; -------------------------------------------------------------------------
1300 ;; Includes gather forms of:
1301 ;; - LD1D
1302 ;; - LD1W
1303 ;; -------------------------------------------------------------------------
1304
1305 ;; Unpredicated gather loads.
1306 (define_expand "gather_load<mode><v_int_container>"
1307   [(set (match_operand:SVE_24 0 "register_operand")
1308         (unspec:SVE_24
1309           [(match_dup 5)
1310            (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>")
1311            (match_operand:<V_INT_CONTAINER> 2 "register_operand")
1312            (match_operand:DI 3 "const_int_operand")
1313            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
1314            (mem:BLK (scratch))]
1315           UNSPEC_LD1_GATHER))]
1316   "TARGET_SVE"
1317   {
1318     operands[5] = aarch64_ptrue_reg (<VPRED>mode);
1319   }
1320 )
1321
1322 ;; Predicated gather loads for 32-bit elements.  Operand 3 is true for
1323 ;; unsigned extension and false for signed extension.
1324 (define_insn "mask_gather_load<mode><v_int_container>"
1325   [(set (match_operand:SVE_4 0 "register_operand" "=w, w, w, w, w, w")
1326         (unspec:SVE_4
1327           [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1328            (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk")
1329            (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w")
1330            (match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
1331            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1332            (mem:BLK (scratch))]
1333           UNSPEC_LD1_GATHER))]
1334   "TARGET_SVE"
1335   "@
1336    ld1<Vesize>\t%0.s, %5/z, [%2.s]
1337    ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1]
1338    ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1339    ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1340    ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1341    ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1342 )
1343
1344 ;; Predicated gather loads for 64-bit elements.  The value of operand 3
1345 ;; doesn't matter in this case.
1346 (define_insn "mask_gather_load<mode><v_int_container>"
1347   [(set (match_operand:SVE_2 0 "register_operand" "=w, w, w, w")
1348         (unspec:SVE_2
1349           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1350            (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk")
1351            (match_operand:VNx2DI 2 "register_operand" "w, w, w, w")
1352            (match_operand:DI 3 "const_int_operand")
1353            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i")
1354            (mem:BLK (scratch))]
1355           UNSPEC_LD1_GATHER))]
1356   "TARGET_SVE"
1357   "@
1358    ld1<Vesize>\t%0.d, %5/z, [%2.d]
1359    ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1]
1360    ld1<Vesize>\t%0.d, %5/z, [%1, %2.d]
1361    ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1362 )
1363
1364 ;; Likewise, but with the offset being extended from 32 bits.
1365 (define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_<su>xtw_unpacked"
1366   [(set (match_operand:SVE_2 0 "register_operand" "=w, w")
1367         (unspec:SVE_2
1368           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1369            (match_operand:DI 1 "register_operand" "rk, rk")
1370            (unspec:VNx2DI
1371              [(match_operand 6)
1372               (ANY_EXTEND:VNx2DI
1373                 (match_operand:VNx2SI 2 "register_operand" "w, w"))]
1374              UNSPEC_PRED_X)
1375            (match_operand:DI 3 "const_int_operand")
1376            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
1377            (mem:BLK (scratch))]
1378           UNSPEC_LD1_GATHER))]
1379   "TARGET_SVE"
1380   "@
1381    ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw]
1382    ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw %p4]"
1383   "&& !CONSTANT_P (operands[6])"
1384   {
1385     operands[6] = CONSTM1_RTX (VNx2BImode);
1386   }
1387 )
1388
1389 ;; Likewise, but with the offset being truncated to 32 bits and then
1390 ;; sign-extended.
1391 (define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_sxtw"
1392   [(set (match_operand:SVE_2 0 "register_operand" "=w, w")
1393         (unspec:SVE_2
1394           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1395            (match_operand:DI 1 "register_operand" "rk, rk")
1396            (unspec:VNx2DI
1397              [(match_operand 6)
1398               (sign_extend:VNx2DI
1399                 (truncate:VNx2SI
1400                   (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1401              UNSPEC_PRED_X)
1402            (match_operand:DI 3 "const_int_operand")
1403            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
1404            (mem:BLK (scratch))]
1405           UNSPEC_LD1_GATHER))]
1406   "TARGET_SVE"
1407   "@
1408    ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1409    ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1410   "&& !CONSTANT_P (operands[6])"
1411   {
1412     operands[6] = CONSTM1_RTX (VNx2BImode);
1413   }
1414 )
1415
1416 ;; Likewise, but with the offset being truncated to 32 bits and then
1417 ;; zero-extended.
1418 (define_insn "*mask_gather_load<mode><v_int_container>_uxtw"
1419   [(set (match_operand:SVE_2 0 "register_operand" "=w, w")
1420         (unspec:SVE_2
1421           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1422            (match_operand:DI 1 "register_operand" "rk, rk")
1423            (and:VNx2DI
1424              (match_operand:VNx2DI 2 "register_operand" "w, w")
1425              (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1426            (match_operand:DI 3 "const_int_operand")
1427            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
1428            (mem:BLK (scratch))]
1429           UNSPEC_LD1_GATHER))]
1430   "TARGET_SVE"
1431   "@
1432    ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1433    ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1434 )
1435
1436 ;; -------------------------------------------------------------------------
1437 ;; ---- Extending gather loads
1438 ;; -------------------------------------------------------------------------
1439 ;; Includes gather forms of:
1440 ;; - LD1B
1441 ;; - LD1H
1442 ;; - LD1SB
1443 ;; - LD1SH
1444 ;; - LD1SW
1445 ;; - LD1W
1446 ;; -------------------------------------------------------------------------
1447
1448 ;; Predicated extending gather loads for 32-bit elements.  Operand 3 is
1449 ;; true for unsigned extension and false for signed extension.
1450 (define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_4HSI:mode><SVE_4BHI:mode>"
1451   [(set (match_operand:SVE_4HSI 0 "register_operand" "=w, w, w, w, w, w")
1452         (unspec:SVE_4HSI
1453           [(match_operand:VNx4BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm, UplDnm, UplDnm")
1454            (ANY_EXTEND:SVE_4HSI
1455              (unspec:SVE_4BHI
1456                [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1457                 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_4BHI:Vesize>" "Z, vg<SVE_4BHI:Vesize>, rk, rk, rk, rk")
1458                 (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w")
1459                 (match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
1460                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_4BHI:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1461                 (mem:BLK (scratch))]
1462                UNSPEC_LD1_GATHER))]
1463           UNSPEC_PRED_X))]
1464   "TARGET_SVE && (~<SVE_4HSI:narrower_mask> & <SVE_4BHI:self_mask>) == 0"
1465   "@
1466    ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s]
1467    ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s, #%1]
1468    ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1469    ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1470    ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1471    ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1472   "&& !CONSTANT_P (operands[6])"
1473   {
1474     operands[6] = CONSTM1_RTX (VNx4BImode);
1475   }
1476 )
1477
1478 ;; Predicated extending gather loads for 64-bit elements.  The value of
1479 ;; operand 3 doesn't matter in this case.
1480 (define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>"
1481   [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w, w, w")
1482         (unspec:SVE_2HSDI
1483           [(match_operand:VNx2BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm")
1484            (ANY_EXTEND:SVE_2HSDI
1485              (unspec:SVE_2BHSI
1486                [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1487                 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_2BHSI:Vesize>" "Z, vg<SVE_2BHSI:Vesize>, rk, rk")
1488                 (match_operand:VNx2DI 2 "register_operand" "w, w, w, w")
1489                 (match_operand:DI 3 "const_int_operand")
1490                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, Ui1, Ui1, i")
1491                 (mem:BLK (scratch))]
1492                UNSPEC_LD1_GATHER))]
1493           UNSPEC_PRED_X))]
1494   "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1495   "@
1496    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d]
1497    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d, #%1]
1498    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d]
1499    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1500   "&& !CONSTANT_P (operands[6])"
1501   {
1502     operands[6] = CONSTM1_RTX (VNx2BImode);
1503   }
1504 )
1505
1506 ;; Likewise, but with the offset being extended from 32 bits.
1507 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_<ANY_EXTEND2:su>xtw_unpacked"
1508   [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
1509         (unspec:SVE_2HSDI
1510           [(match_operand 6)
1511            (ANY_EXTEND:SVE_2HSDI
1512              (unspec:SVE_2BHSI
1513                [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1514                 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1515                 (unspec:VNx2DI
1516                   [(match_operand 7)
1517                    (ANY_EXTEND2:VNx2DI
1518                      (match_operand:VNx2SI 2 "register_operand" "w, w"))]
1519                   UNSPEC_PRED_X)
1520                 (match_operand:DI 3 "const_int_operand")
1521                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
1522                 (mem:BLK (scratch))]
1523                UNSPEC_LD1_GATHER))]
1524           UNSPEC_PRED_X))]
1525   "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1526   "@
1527    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw]
1528    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw %p4]"
1529   "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1530   {
1531     operands[6] = CONSTM1_RTX (VNx2BImode);
1532     operands[7] = CONSTM1_RTX (VNx2BImode);
1533   }
1534 )
1535
1536 ;; Likewise, but with the offset being truncated to 32 bits and then
1537 ;; sign-extended.
1538 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_sxtw"
1539   [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
1540         (unspec:SVE_2HSDI
1541           [(match_operand 6)
1542            (ANY_EXTEND:SVE_2HSDI
1543              (unspec:SVE_2BHSI
1544                [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1545                 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1546                 (unspec:VNx2DI
1547                   [(match_operand 7)
1548                    (sign_extend:VNx2DI
1549                      (truncate:VNx2SI
1550                        (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1551                   UNSPEC_PRED_X)
1552                 (match_operand:DI 3 "const_int_operand")
1553                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
1554                 (mem:BLK (scratch))]
1555                UNSPEC_LD1_GATHER))]
1556           UNSPEC_PRED_X))]
1557   "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1558   "@
1559    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1560    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1561   "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1562   {
1563     operands[6] = CONSTM1_RTX (VNx2BImode);
1564     operands[7] = CONSTM1_RTX (VNx2BImode);
1565   }
1566 )
1567
1568 ;; Likewise, but with the offset being truncated to 32 bits and then
1569 ;; zero-extended.
1570 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_uxtw"
1571   [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
1572         (unspec:SVE_2HSDI
1573           [(match_operand 7)
1574            (ANY_EXTEND:SVE_2HSDI
1575              (unspec:SVE_2BHSI
1576                [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1577                 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1578                 (and:VNx2DI
1579                   (match_operand:VNx2DI 2 "register_operand" "w, w")
1580                   (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1581                 (match_operand:DI 3 "const_int_operand")
1582                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
1583                 (mem:BLK (scratch))]
1584                UNSPEC_LD1_GATHER))]
1585           UNSPEC_PRED_X))]
1586   "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1587   "@
1588    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1589    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1590   "&& !CONSTANT_P (operands[7])"
1591   {
1592     operands[7] = CONSTM1_RTX (VNx2BImode);
1593   }
1594 )
1595
1596 ;; -------------------------------------------------------------------------
1597 ;; ---- First-faulting gather loads
1598 ;; -------------------------------------------------------------------------
1599 ;; Includes gather forms of:
1600 ;; - LDFF1D
1601 ;; - LDFF1W
1602 ;; -------------------------------------------------------------------------
1603
1604 ;; Predicated first-faulting gather loads for 32-bit elements.  Operand
1605 ;; 3 is true for unsigned extension and false for signed extension.
1606 (define_insn "@aarch64_ldff1_gather<mode>"
1607   [(set (match_operand:SVE_FULL_S 0 "register_operand" "=w, w, w, w, w, w")
1608         (unspec:SVE_FULL_S
1609           [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1610            (match_operand:DI 1 "aarch64_sve_gather_offset_w" "Z, vgw, rk, rk, rk, rk")
1611            (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w")
1612            (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
1613            (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, Ui1, i, i")
1614            (mem:BLK (scratch))
1615            (reg:VNx16BI FFRT_REGNUM)]
1616           UNSPEC_LDFF1_GATHER))]
1617   "TARGET_SVE"
1618   "@
1619    ldff1w\t%0.s, %5/z, [%2.s]
1620    ldff1w\t%0.s, %5/z, [%2.s, #%1]
1621    ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw]
1622    ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw]
1623    ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1624    ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1625 )
1626
1627 ;; Predicated first-faulting gather loads for 64-bit elements.  The value
1628 ;; of operand 3 doesn't matter in this case.
1629 (define_insn "@aarch64_ldff1_gather<mode>"
1630   [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w, w, w")
1631         (unspec:SVE_FULL_D
1632           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1633            (match_operand:DI 1 "aarch64_sve_gather_offset_d" "Z, vgd, rk, rk")
1634            (match_operand:VNx2DI 2 "register_operand" "w, w, w, w")
1635            (match_operand:DI 3 "const_int_operand")
1636            (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, Ui1, i")
1637            (mem:BLK (scratch))
1638            (reg:VNx16BI FFRT_REGNUM)]
1639           UNSPEC_LDFF1_GATHER))]
1640   "TARGET_SVE"
1641   "@
1642    ldff1d\t%0.d, %5/z, [%2.d]
1643    ldff1d\t%0.d, %5/z, [%2.d, #%1]
1644    ldff1d\t%0.d, %5/z, [%1, %2.d]
1645    ldff1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1646 )
1647
1648 ;; Likewise, but with the offset being sign-extended from 32 bits.
1649 (define_insn_and_rewrite "*aarch64_ldff1_gather<mode>_sxtw"
1650   [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w")
1651         (unspec:SVE_FULL_D
1652           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1653            (match_operand:DI 1 "register_operand" "rk, rk")
1654            (unspec:VNx2DI
1655              [(match_operand 6)
1656               (sign_extend:VNx2DI
1657                 (truncate:VNx2SI
1658                   (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1659              UNSPEC_PRED_X)
1660            (match_operand:DI 3 "const_int_operand")
1661            (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i")
1662            (mem:BLK (scratch))
1663            (reg:VNx16BI FFRT_REGNUM)]
1664           UNSPEC_LDFF1_GATHER))]
1665   "TARGET_SVE"
1666   "@
1667    ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw]
1668    ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1669   "&& !CONSTANT_P (operands[6])"
1670   {
1671     operands[6] = CONSTM1_RTX (VNx2BImode);
1672   }
1673 )
1674
1675 ;; Likewise, but with the offset being zero-extended from 32 bits.
1676 (define_insn "*aarch64_ldff1_gather<mode>_uxtw"
1677   [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w")
1678         (unspec:SVE_FULL_D
1679           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1680            (match_operand:DI 1 "register_operand" "rk, rk")
1681            (and:VNx2DI
1682              (match_operand:VNx2DI 2 "register_operand" "w, w")
1683              (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1684            (match_operand:DI 3 "const_int_operand")
1685            (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i")
1686            (mem:BLK (scratch))
1687            (reg:VNx16BI FFRT_REGNUM)]
1688           UNSPEC_LDFF1_GATHER))]
1689   "TARGET_SVE"
1690   "@
1691    ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw]
1692    ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1693 )
1694
1695 ;; -------------------------------------------------------------------------
1696 ;; ---- First-faulting extending gather loads
1697 ;; -------------------------------------------------------------------------
1698 ;; Includes gather forms of:
1699 ;; - LDFF1B
1700 ;; - LDFF1H
1701 ;; - LDFF1SB
1702 ;; - LDFF1SH
1703 ;; - LDFF1SW
1704 ;; - LDFF1W
1705 ;; -------------------------------------------------------------------------
1706
1707 ;; Predicated extending first-faulting gather loads for 32-bit elements.
1708 ;; Operand 3 is true for unsigned extension and false for signed extension.
1709 (define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>"
1710   [(set (match_operand:VNx4_WIDE 0 "register_operand" "=w, w, w, w, w, w")
1711         (unspec:VNx4_WIDE
1712           [(match_operand:VNx4BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm, UplDnm, UplDnm")
1713            (ANY_EXTEND:VNx4_WIDE
1714              (unspec:VNx4_NARROW
1715                [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1716                 (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk")
1717                 (match_operand:VNx4_WIDE 2 "register_operand" "w, w, w, w, w, w")
1718                 (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
1719                 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1720                 (mem:BLK (scratch))
1721                 (reg:VNx16BI FFRT_REGNUM)]
1722                UNSPEC_LDFF1_GATHER))]
1723           UNSPEC_PRED_X))]
1724   "TARGET_SVE"
1725   "@
1726    ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s]
1727    ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s, #%1]
1728    ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1729    ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1730    ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1731    ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1732   "&& !CONSTANT_P (operands[6])"
1733   {
1734     operands[6] = CONSTM1_RTX (VNx4BImode);
1735   }
1736 )
1737
1738 ;; Predicated extending first-faulting gather loads for 64-bit elements.
1739 ;; The value of operand 3 doesn't matter in this case.
1740 (define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>"
1741   [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w, w, w")
1742         (unspec:VNx2_WIDE
1743           [(match_operand:VNx2BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm")
1744            (ANY_EXTEND:VNx2_WIDE
1745              (unspec:VNx2_NARROW
1746                [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1747                 (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk")
1748                 (match_operand:VNx2_WIDE 2 "register_operand" "w, w, w, w")
1749                 (match_operand:DI 3 "const_int_operand")
1750                 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i")
1751                 (mem:BLK (scratch))
1752                 (reg:VNx16BI FFRT_REGNUM)]
1753                UNSPEC_LDFF1_GATHER))]
1754           UNSPEC_PRED_X))]
1755   "TARGET_SVE"
1756   "@
1757    ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d]
1758    ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d, #%1]
1759    ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d]
1760    ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1761   "&& !CONSTANT_P (operands[6])"
1762   {
1763     operands[6] = CONSTM1_RTX (VNx2BImode);
1764   }
1765 )
1766
1767 ;; Likewise, but with the offset being sign-extended from 32 bits.
1768 (define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_sxtw"
1769   [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w")
1770         (unspec:VNx2_WIDE
1771           [(match_operand 6)
1772            (ANY_EXTEND:VNx2_WIDE
1773              (unspec:VNx2_NARROW
1774                [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1775                 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1776                 (unspec:VNx2DI
1777                   [(match_operand 7)
1778                    (sign_extend:VNx2DI
1779                      (truncate:VNx2SI
1780                        (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1781                   UNSPEC_PRED_X)
1782                 (match_operand:DI 3 "const_int_operand")
1783                 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
1784                 (mem:BLK (scratch))
1785                 (reg:VNx16BI FFRT_REGNUM)]
1786                UNSPEC_LDFF1_GATHER))]
1787           UNSPEC_PRED_X))]
1788   "TARGET_SVE"
1789   "@
1790    ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1791    ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1792   "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1793   {
1794     operands[6] = CONSTM1_RTX (VNx2BImode);
1795     operands[7] = CONSTM1_RTX (VNx2BImode);
1796   }
1797 )
1798
1799 ;; Likewise, but with the offset being zero-extended from 32 bits.
1800 (define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_uxtw"
1801   [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w")
1802         (unspec:VNx2_WIDE
1803           [(match_operand 7)
1804            (ANY_EXTEND:VNx2_WIDE
1805              (unspec:VNx2_NARROW
1806                [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1807                 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1808                 (and:VNx2DI
1809                   (match_operand:VNx2DI 2 "register_operand" "w, w")
1810                   (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1811                 (match_operand:DI 3 "const_int_operand")
1812                 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
1813                 (mem:BLK (scratch))
1814                 (reg:VNx16BI FFRT_REGNUM)]
1815                UNSPEC_LDFF1_GATHER))]
1816           UNSPEC_PRED_X))]
1817   "TARGET_SVE"
1818   "@
1819    ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1820    ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1821   "&& !CONSTANT_P (operands[7])"
1822   {
1823     operands[7] = CONSTM1_RTX (VNx2BImode);
1824   }
1825 )
1826
1827 ;; =========================================================================
1828 ;; == Prefetches
1829 ;; =========================================================================
1830
1831 ;; -------------------------------------------------------------------------
1832 ;; ---- Contiguous prefetches
1833 ;; -------------------------------------------------------------------------
1834 ;; Includes contiguous forms of:
1835 ;; - PRFB
1836 ;; - PRFD
1837 ;; - PRFH
1838 ;; - PRFW
1839 ;; -------------------------------------------------------------------------
1840
1841 ;; Contiguous predicated prefetches.  Operand 2 gives the real prefetch
1842 ;; operation (as an svprfop), with operands 3 and 4 providing distilled
1843 ;; information.
1844 (define_insn "@aarch64_sve_prefetch<mode>"
1845   [(prefetch (unspec:DI
1846                [(match_operand:<VPRED> 0 "register_operand" "Upl")
1847                 (match_operand:SVE_FULL_I 1 "aarch64_sve_prefetch_operand" "UP<Vesize>")
1848                 (match_operand:DI 2 "const_int_operand")]
1849                UNSPEC_SVE_PREFETCH)
1850              (match_operand:DI 3 "const_int_operand")
1851              (match_operand:DI 4 "const_int_operand"))]
1852   "TARGET_SVE"
1853   {
1854     operands[1] = gen_rtx_MEM (<MODE>mode, operands[1]);
1855     return aarch64_output_sve_prefetch ("prf<Vesize>", operands[2], "%0, %1");
1856   }
1857 )
1858
1859 ;; -------------------------------------------------------------------------
1860 ;; ---- Gather prefetches
1861 ;; -------------------------------------------------------------------------
1862 ;; Includes gather forms of:
1863 ;; - PRFB
1864 ;; - PRFD
1865 ;; - PRFH
1866 ;; - PRFW
1867 ;; -------------------------------------------------------------------------
1868
1869 ;; Predicated gather prefetches for 32-bit bases and offsets.  The operands
1870 ;; are:
1871 ;; 0: the governing predicate
1872 ;; 1: the scalar component of the address
1873 ;; 2: the vector component of the address
1874 ;; 3: 1 for zero extension, 0 for sign extension
1875 ;; 4: the scale multiplier
1876 ;; 5: a vector zero that identifies the mode of data being accessed
1877 ;; 6: the prefetch operator (an svprfop)
1878 ;; 7: the normal RTL prefetch rw flag
1879 ;; 8: the normal RTL prefetch locality value
1880 (define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx4SI_ONLY:mode>"
1881   [(prefetch (unspec:DI
1882                [(match_operand:VNx4BI 0 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1883                 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk, rk, rk")
1884                 (match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w, w, w, w")
1885                 (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
1886                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1887                 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
1888                 (match_operand:DI 6 "const_int_operand")]
1889                UNSPEC_SVE_PREFETCH_GATHER)
1890              (match_operand:DI 7 "const_int_operand")
1891              (match_operand:DI 8 "const_int_operand"))]
1892   "TARGET_SVE"
1893   {
1894     static const char *const insns[][2] = {
1895       "prf<SVE_FULL_I:Vesize>", "%0, [%2.s]",
1896       "prf<SVE_FULL_I:Vesize>", "%0, [%2.s, #%1]",
1897       "prfb", "%0, [%1, %2.s, sxtw]",
1898       "prfb", "%0, [%1, %2.s, uxtw]",
1899       "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, sxtw %p4]",
1900       "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, uxtw %p4]"
1901     };
1902     const char *const *parts = insns[which_alternative];
1903     return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
1904   }
1905 )
1906
1907 ;; Predicated gather prefetches for 64-bit elements.  The value of operand 3
1908 ;; doesn't matter in this case.
1909 (define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>"
1910   [(prefetch (unspec:DI
1911                [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl, Upl, Upl")
1912                 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk")
1913                 (match_operand:VNx2DI_ONLY 2 "register_operand" "w, w, w, w")
1914                 (match_operand:DI 3 "const_int_operand")
1915                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, i")
1916                 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
1917                 (match_operand:DI 6 "const_int_operand")]
1918                UNSPEC_SVE_PREFETCH_GATHER)
1919              (match_operand:DI 7 "const_int_operand")
1920              (match_operand:DI 8 "const_int_operand"))]
1921   "TARGET_SVE"
1922   {
1923     static const char *const insns[][2] = {
1924       "prf<SVE_FULL_I:Vesize>", "%0, [%2.d]",
1925       "prf<SVE_FULL_I:Vesize>", "%0, [%2.d, #%1]",
1926       "prfb", "%0, [%1, %2.d]",
1927       "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, lsl %p4]"
1928     };
1929     const char *const *parts = insns[which_alternative];
1930     return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
1931   }
1932 )
1933
1934 ;; Likewise, but with the offset being sign-extended from 32 bits.
1935 (define_insn_and_rewrite "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_sxtw"
1936   [(prefetch (unspec:DI
1937                [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl")
1938                 (match_operand:DI 1 "register_operand" "rk, rk")
1939                 (unspec:VNx2DI_ONLY
1940                   [(match_operand 9)
1941                    (sign_extend:VNx2DI
1942                      (truncate:VNx2SI
1943                        (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1944                   UNSPEC_PRED_X)
1945                 (match_operand:DI 3 "const_int_operand")
1946                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i")
1947                 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
1948                 (match_operand:DI 6 "const_int_operand")]
1949                UNSPEC_SVE_PREFETCH_GATHER)
1950              (match_operand:DI 7 "const_int_operand")
1951              (match_operand:DI 8 "const_int_operand"))]
1952   "TARGET_SVE"
1953   {
1954     static const char *const insns[][2] = {
1955       "prfb", "%0, [%1, %2.d, sxtw]",
1956       "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, sxtw %p4]"
1957     };
1958     const char *const *parts = insns[which_alternative];
1959     return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
1960   }
1961   "&& !rtx_equal_p (operands[0], operands[9])"
1962   {
1963     operands[9] = copy_rtx (operands[0]);
1964   }
1965 )
1966
1967 ;; Likewise, but with the offset being zero-extended from 32 bits.
1968 (define_insn "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_uxtw"
1969   [(prefetch (unspec:DI
1970                [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl")
1971                 (match_operand:DI 1 "register_operand" "rk, rk")
1972                 (and:VNx2DI_ONLY
1973                   (match_operand:VNx2DI 2 "register_operand" "w, w")
1974                   (match_operand:VNx2DI 9 "aarch64_sve_uxtw_immediate"))
1975                 (match_operand:DI 3 "const_int_operand")
1976                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i")
1977                 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
1978                 (match_operand:DI 6 "const_int_operand")]
1979                UNSPEC_SVE_PREFETCH_GATHER)
1980              (match_operand:DI 7 "const_int_operand")
1981              (match_operand:DI 8 "const_int_operand"))]
1982   "TARGET_SVE"
1983   {
1984     static const char *const insns[][2] = {
1985       "prfb", "%0, [%1, %2.d, uxtw]",
1986       "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, uxtw %p4]"
1987     };
1988     const char *const *parts = insns[which_alternative];
1989     return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
1990   }
1991 )
1992
1993 ;; =========================================================================
1994 ;; == Stores
1995 ;; =========================================================================
1996
1997 ;; -------------------------------------------------------------------------
1998 ;; ---- Normal contiguous stores
1999 ;; -------------------------------------------------------------------------
2000 ;; Includes contiguous forms of:
2001 ;; - ST1B
2002 ;; - ST1D
2003 ;; - ST1H
2004 ;; - ST1W
2005 ;; - ST2B
2006 ;; - ST2D
2007 ;; - ST2H
2008 ;; - ST2W
2009 ;; - ST3B
2010 ;; - ST3D
2011 ;; - ST3H
2012 ;; - ST3W
2013 ;; - ST4B
2014 ;; - ST4D
2015 ;; - ST4H
2016 ;; - ST4W
2017 ;; -------------------------------------------------------------------------
2018
2019 ;; Predicated ST1.
2020 (define_insn "maskstore<mode><vpred>"
2021   [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
2022         (unspec:SVE_ALL
2023           [(match_operand:<VPRED> 2 "register_operand" "Upl")
2024            (match_operand:SVE_ALL 1 "register_operand" "w")
2025            (match_dup 0)]
2026           UNSPEC_ST1_SVE))]
2027   "TARGET_SVE"
2028   "st1<Vesize>\t%1.<Vctype>, %2, %0"
2029 )
2030
2031 ;; Unpredicated ST[234].  This is always a full update, so the dependence
2032 ;; on the old value of the memory location (via (match_dup 0)) is redundant.
2033 ;; There doesn't seem to be any obvious benefit to treating the all-true
2034 ;; case differently though.  In particular, it's very unlikely that we'll
2035 ;; only find out during RTL that a store_lanes is dead.
2036 (define_expand "vec_store_lanes<mode><vsingle>"
2037   [(set (match_operand:SVE_STRUCT 0 "memory_operand")
2038         (unspec:SVE_STRUCT
2039           [(match_dup 2)
2040            (match_operand:SVE_STRUCT 1 "register_operand")
2041            (match_dup 0)]
2042           UNSPEC_STN))]
2043   "TARGET_SVE"
2044   {
2045     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2046   }
2047 )
2048
2049 ;; Predicated ST[234].
2050 (define_insn "vec_mask_store_lanes<mode><vsingle>"
2051   [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m")
2052         (unspec:SVE_STRUCT
2053           [(match_operand:<VPRED> 2 "register_operand" "Upl")
2054            (match_operand:SVE_STRUCT 1 "register_operand" "w")
2055            (match_dup 0)]
2056           UNSPEC_STN))]
2057   "TARGET_SVE"
2058   "st<vector_count><Vesize>\t%1, %2, %0"
2059 )
2060
2061 ;; -------------------------------------------------------------------------
2062 ;; ---- Truncating contiguous stores
2063 ;; -------------------------------------------------------------------------
2064 ;; Includes:
2065 ;; - ST1B
2066 ;; - ST1H
2067 ;; - ST1W
2068 ;; -------------------------------------------------------------------------
2069
2070 ;; Predicated truncate and store, with 8 elements per 128-bit block.
2071 (define_insn "@aarch64_store_trunc<VNx8_NARROW:mode><VNx8_WIDE:mode>"
2072   [(set (match_operand:VNx8_NARROW 0 "memory_operand" "+m")
2073         (unspec:VNx8_NARROW
2074           [(match_operand:VNx8BI 2 "register_operand" "Upl")
2075            (truncate:VNx8_NARROW
2076              (match_operand:VNx8_WIDE 1 "register_operand" "w"))
2077            (match_dup 0)]
2078           UNSPEC_ST1_SVE))]
2079   "TARGET_SVE"
2080   "st1<VNx8_NARROW:Vesize>\t%1.<VNx8_WIDE:Vetype>, %2, %0"
2081 )
2082
2083 ;; Predicated truncate and store, with 4 elements per 128-bit block.
2084 (define_insn "@aarch64_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>"
2085   [(set (match_operand:VNx4_NARROW 0 "memory_operand" "+m")
2086         (unspec:VNx4_NARROW
2087           [(match_operand:VNx4BI 2 "register_operand" "Upl")
2088            (truncate:VNx4_NARROW
2089              (match_operand:VNx4_WIDE 1 "register_operand" "w"))
2090            (match_dup 0)]
2091           UNSPEC_ST1_SVE))]
2092   "TARGET_SVE"
2093   "st1<VNx4_NARROW:Vesize>\t%1.<VNx4_WIDE:Vetype>, %2, %0"
2094 )
2095
2096 ;; Predicated truncate and store, with 2 elements per 128-bit block.
2097 (define_insn "@aarch64_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>"
2098   [(set (match_operand:VNx2_NARROW 0 "memory_operand" "+m")
2099         (unspec:VNx2_NARROW
2100           [(match_operand:VNx2BI 2 "register_operand" "Upl")
2101            (truncate:VNx2_NARROW
2102              (match_operand:VNx2_WIDE 1 "register_operand" "w"))
2103            (match_dup 0)]
2104           UNSPEC_ST1_SVE))]
2105   "TARGET_SVE"
2106   "st1<VNx2_NARROW:Vesize>\t%1.<VNx2_WIDE:Vetype>, %2, %0"
2107 )
2108
2109 ;; -------------------------------------------------------------------------
2110 ;; ---- Non-temporal contiguous stores
2111 ;; -------------------------------------------------------------------------
2112 ;; Includes:
2113 ;; - STNT1B
2114 ;; - STNT1D
2115 ;; - STNT1H
2116 ;; - STNT1W
2117 ;; -------------------------------------------------------------------------
2118
2119 (define_insn "@aarch64_stnt1<mode>"
2120   [(set (match_operand:SVE_FULL 0 "memory_operand" "+m")
2121         (unspec:SVE_FULL
2122           [(match_operand:<VPRED> 2 "register_operand" "Upl")
2123            (match_operand:SVE_FULL 1 "register_operand" "w")
2124            (match_dup 0)]
2125           UNSPEC_STNT1_SVE))]
2126   "TARGET_SVE"
2127   "stnt1<Vesize>\t%1.<Vetype>, %2, %0"
2128 )
2129
2130 ;; -------------------------------------------------------------------------
2131 ;; ---- Normal scatter stores
2132 ;; -------------------------------------------------------------------------
2133 ;; Includes scatter forms of:
2134 ;; - ST1D
2135 ;; - ST1W
2136 ;; -------------------------------------------------------------------------
2137
2138 ;; Unpredicated scatter stores.
2139 (define_expand "scatter_store<mode><v_int_container>"
2140   [(set (mem:BLK (scratch))
2141         (unspec:BLK
2142           [(match_dup 5)
2143            (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>")
2144            (match_operand:<V_INT_CONTAINER> 1 "register_operand")
2145            (match_operand:DI 2 "const_int_operand")
2146            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
2147            (match_operand:SVE_24 4 "register_operand")]
2148           UNSPEC_ST1_SCATTER))]
2149   "TARGET_SVE"
2150   {
2151     operands[5] = aarch64_ptrue_reg (<VPRED>mode);
2152   }
2153 )
2154
2155 ;; Predicated scatter stores for 32-bit elements.  Operand 2 is true for
2156 ;; unsigned extension and false for signed extension.
2157 (define_insn "mask_scatter_store<mode><v_int_container>"
2158   [(set (mem:BLK (scratch))
2159         (unspec:BLK
2160           [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
2161            (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk")
2162            (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w")
2163            (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
2164            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
2165            (match_operand:SVE_4 4 "register_operand" "w, w, w, w, w, w")]
2166           UNSPEC_ST1_SCATTER))]
2167   "TARGET_SVE"
2168   "@
2169    st1<Vesize>\t%4.s, %5, [%1.s]
2170    st1<Vesize>\t%4.s, %5, [%1.s, #%0]
2171    st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
2172    st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
2173    st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
2174    st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]"
2175 )
2176
2177 ;; Predicated scatter stores for 64-bit elements.  The value of operand 2
2178 ;; doesn't matter in this case.
2179 (define_insn "mask_scatter_store<mode><v_int_container>"
2180   [(set (mem:BLK (scratch))
2181         (unspec:BLK
2182           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
2183            (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk")
2184            (match_operand:VNx2DI 1 "register_operand" "w, w, w, w")
2185            (match_operand:DI 2 "const_int_operand")
2186            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i")
2187            (match_operand:SVE_2 4 "register_operand" "w, w, w, w")]
2188           UNSPEC_ST1_SCATTER))]
2189   "TARGET_SVE"
2190   "@
2191    st1<Vesize>\t%4.d, %5, [%1.d]
2192    st1<Vesize>\t%4.d, %5, [%1.d, #%0]
2193    st1<Vesize>\t%4.d, %5, [%0, %1.d]
2194    st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]"
2195 )
2196
2197 ;; Likewise, but with the offset being extended from 32 bits.
2198 (define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_<su>xtw_unpacked"
2199   [(set (mem:BLK (scratch))
2200         (unspec:BLK
2201           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2202            (match_operand:DI 0 "register_operand" "rk, rk")
2203            (unspec:VNx2DI
2204              [(match_operand 6)
2205               (ANY_EXTEND:VNx2DI
2206                 (match_operand:VNx2SI 1 "register_operand" "w, w"))]
2207              UNSPEC_PRED_X)
2208            (match_operand:DI 2 "const_int_operand")
2209            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
2210            (match_operand:SVE_2 4 "register_operand" "w, w")]
2211           UNSPEC_ST1_SCATTER))]
2212   "TARGET_SVE"
2213   "@
2214    st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw]
2215    st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw %p3]"
2216   "&& !CONSTANT_P (operands[6])"
2217   {
2218     operands[6] = CONSTM1_RTX (<VPRED>mode);
2219   }
2220 )
2221
2222 ;; Likewise, but with the offset being truncated to 32 bits and then
2223 ;; sign-extended.
2224 (define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_sxtw"
2225   [(set (mem:BLK (scratch))
2226         (unspec:BLK
2227           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2228            (match_operand:DI 0 "register_operand" "rk, rk")
2229            (unspec:VNx2DI
2230              [(match_operand 6)
2231               (sign_extend:VNx2DI
2232                 (truncate:VNx2SI
2233                   (match_operand:VNx2DI 1 "register_operand" "w, w")))]
2234              UNSPEC_PRED_X)
2235            (match_operand:DI 2 "const_int_operand")
2236            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
2237            (match_operand:SVE_2 4 "register_operand" "w, w")]
2238           UNSPEC_ST1_SCATTER))]
2239   "TARGET_SVE"
2240   "@
2241    st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw]
2242    st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]"
2243   "&& !CONSTANT_P (operands[6])"
2244   {
2245     operands[6] = CONSTM1_RTX (<VPRED>mode);
2246   }
2247 )
2248
2249 ;; Likewise, but with the offset being truncated to 32 bits and then
2250 ;; zero-extended.
2251 (define_insn "*mask_scatter_store<mode><v_int_container>_uxtw"
2252   [(set (mem:BLK (scratch))
2253         (unspec:BLK
2254           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2255            (match_operand:DI 0 "aarch64_reg_or_zero" "rk, rk")
2256            (and:VNx2DI
2257              (match_operand:VNx2DI 1 "register_operand" "w, w")
2258              (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
2259            (match_operand:DI 2 "const_int_operand")
2260            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
2261            (match_operand:SVE_2 4 "register_operand" "w, w")]
2262           UNSPEC_ST1_SCATTER))]
2263   "TARGET_SVE"
2264   "@
2265    st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw]
2266    st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]"
2267 )
2268
2269 ;; -------------------------------------------------------------------------
2270 ;; ---- Truncating scatter stores
2271 ;; -------------------------------------------------------------------------
2272 ;; Includes scatter forms of:
2273 ;; - ST1B
2274 ;; - ST1H
2275 ;; - ST1W
2276 ;; -------------------------------------------------------------------------
2277
2278 ;; Predicated truncating scatter stores for 32-bit elements.  Operand 2 is
2279 ;; true for unsigned extension and false for signed extension.
2280 (define_insn "@aarch64_scatter_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>"
2281   [(set (mem:BLK (scratch))
2282         (unspec:BLK
2283           [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
2284            (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk")
2285            (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w")
2286            (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
2287            (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
2288            (truncate:VNx4_NARROW
2289              (match_operand:VNx4_WIDE 4 "register_operand" "w, w, w, w, w, w"))]
2290           UNSPEC_ST1_SCATTER))]
2291   "TARGET_SVE"
2292   "@
2293    st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s]
2294    st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s, #%0]
2295    st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
2296    st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
2297    st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
2298    st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]"
2299 )
2300
2301 ;; Predicated truncating scatter stores for 64-bit elements.  The value of
2302 ;; operand 2 doesn't matter in this case.
2303 (define_insn "@aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>"
2304   [(set (mem:BLK (scratch))
2305         (unspec:BLK
2306           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
2307            (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk")
2308            (match_operand:VNx2DI 1 "register_operand" "w, w, w, w")
2309            (match_operand:DI 2 "const_int_operand")
2310            (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i")
2311            (truncate:VNx2_NARROW
2312              (match_operand:VNx2_WIDE 4 "register_operand" "w, w, w, w"))]
2313           UNSPEC_ST1_SCATTER))]
2314   "TARGET_SVE"
2315   "@
2316    st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d]
2317    st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d, #%0]
2318    st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d]
2319    st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]"
2320 )
2321
2322 ;; Likewise, but with the offset being sign-extended from 32 bits.
2323 (define_insn_and_rewrite "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_sxtw"
2324   [(set (mem:BLK (scratch))
2325         (unspec:BLK
2326           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2327            (match_operand:DI 0 "register_operand" "rk, rk")
2328            (unspec:VNx2DI
2329              [(match_operand 6)
2330               (sign_extend:VNx2DI
2331                 (truncate:VNx2SI
2332                   (match_operand:VNx2DI 1 "register_operand" "w, w")))]
2333              UNSPEC_PRED_X)
2334            (match_operand:DI 2 "const_int_operand")
2335            (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
2336            (truncate:VNx2_NARROW
2337              (match_operand:VNx2_WIDE 4 "register_operand" "w, w"))]
2338           UNSPEC_ST1_SCATTER))]
2339   "TARGET_SVE"
2340   "@
2341    st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw]
2342    st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]"
2343   "&& !rtx_equal_p (operands[5], operands[6])"
2344   {
2345     operands[6] = copy_rtx (operands[5]);
2346   }
2347 )
2348
2349 ;; Likewise, but with the offset being zero-extended from 32 bits.
2350 (define_insn "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_uxtw"
2351   [(set (mem:BLK (scratch))
2352         (unspec:BLK
2353           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2354            (match_operand:DI 0 "aarch64_reg_or_zero" "rk, rk")
2355            (and:VNx2DI
2356              (match_operand:VNx2DI 1 "register_operand" "w, w")
2357              (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
2358            (match_operand:DI 2 "const_int_operand")
2359            (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
2360            (truncate:VNx2_NARROW
2361              (match_operand:VNx2_WIDE 4 "register_operand" "w, w"))]
2362           UNSPEC_ST1_SCATTER))]
2363   "TARGET_SVE"
2364   "@
2365    st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw]
2366    st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]"
2367 )
2368
2369 ;; =========================================================================
2370 ;; == Vector creation
2371 ;; =========================================================================
2372
2373 ;; -------------------------------------------------------------------------
2374 ;; ---- [INT,FP] Duplicate element
2375 ;; -------------------------------------------------------------------------
2376 ;; Includes:
2377 ;; - DUP
2378 ;; - MOV
2379 ;; - LD1RB
2380 ;; - LD1RD
2381 ;; - LD1RH
2382 ;; - LD1RW
2383 ;; - LD1RQB
2384 ;; - LD1RQD
2385 ;; - LD1RQH
2386 ;; - LD1RQW
2387 ;; -------------------------------------------------------------------------
2388
2389 (define_expand "vec_duplicate<mode>"
2390   [(parallel
2391     [(set (match_operand:SVE_ALL 0 "register_operand")
2392           (vec_duplicate:SVE_ALL
2393             (match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
2394      (clobber (scratch:VNx16BI))])]
2395   "TARGET_SVE"
2396   {
2397     if (MEM_P (operands[1]))
2398       {
2399         rtx ptrue = aarch64_ptrue_reg (<VPRED>mode);
2400         emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1],
2401                                        CONST0_RTX (<MODE>mode)));
2402         DONE;
2403       }
2404   }
2405 )
2406
2407 ;; Accept memory operands for the benefit of combine, and also in case
2408 ;; the scalar input gets spilled to memory during RA.  We want to split
2409 ;; the load at the first opportunity in order to allow the PTRUE to be
2410 ;; optimized with surrounding code.
2411 (define_insn_and_split "*vec_duplicate<mode>_reg"
2412   [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w")
2413         (vec_duplicate:SVE_ALL
2414           (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty")))
2415    (clobber (match_scratch:VNx16BI 2 "=X, X, Upl"))]
2416   "TARGET_SVE"
2417   "@
2418    mov\t%0.<Vetype>, %<vwcore>1
2419    mov\t%0.<Vetype>, %<Vetype>1
2420    #"
2421   "&& MEM_P (operands[1])"
2422   [(const_int 0)]
2423   {
2424     if (GET_CODE (operands[2]) == SCRATCH)
2425       operands[2] = gen_reg_rtx (VNx16BImode);
2426     emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
2427     rtx gp = gen_lowpart (<VPRED>mode, operands[2]);
2428     emit_insn (gen_sve_ld1r<mode> (operands[0], gp, operands[1],
2429                                    CONST0_RTX (<MODE>mode)));
2430     DONE;
2431   }
2432   [(set_attr "length" "4,4,8")]
2433 )
2434
2435 ;; Duplicate an Advanced SIMD vector to fill an SVE vector (LE version).
2436 (define_insn "@aarch64_vec_duplicate_vq<mode>_le"
2437   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2438         (vec_duplicate:SVE_FULL
2439           (match_operand:<V128> 1 "register_operand" "w")))]
2440   "TARGET_SVE && !BYTES_BIG_ENDIAN"
2441   {
2442     operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
2443     return "dup\t%0.q, %1.q[0]";
2444   }
2445 )
2446
2447 ;; Duplicate an Advanced SIMD vector to fill an SVE vector (BE version).
2448 ;; The SVE register layout puts memory lane N into (architectural)
2449 ;; register lane N, whereas the Advanced SIMD layout puts the memory
2450 ;; lsb into the register lsb.  We therefore have to describe this in rtl
2451 ;; terms as a reverse of the V128 vector followed by a duplicate.
2452 (define_insn "@aarch64_vec_duplicate_vq<mode>_be"
2453   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2454         (vec_duplicate:SVE_FULL
2455           (vec_select:<V128>
2456             (match_operand:<V128> 1 "register_operand" "w")
2457             (match_operand 2 "descending_int_parallel"))))]
2458   "TARGET_SVE
2459    && BYTES_BIG_ENDIAN
2460    && known_eq (INTVAL (XVECEXP (operands[2], 0, 0)),
2461                 GET_MODE_NUNITS (<V128>mode) - 1)"
2462   {
2463     operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
2464     return "dup\t%0.q, %1.q[0]";
2465   }
2466 )
2467
2468 ;; This is used for vec_duplicate<mode>s from memory, but can also
2469 ;; be used by combine to optimize selects of a a vec_duplicate<mode>
2470 ;; with zero.
2471 (define_insn "sve_ld1r<mode>"
2472   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
2473         (unspec:SVE_ALL
2474           [(match_operand:<VPRED> 1 "register_operand" "Upl")
2475            (vec_duplicate:SVE_ALL
2476              (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty"))
2477            (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")]
2478           UNSPEC_SEL))]
2479   "TARGET_SVE"
2480   "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2"
2481 )
2482
2483 ;; Load 128 bits from memory under predicate control and duplicate to
2484 ;; fill a vector.
2485 (define_insn "@aarch64_sve_ld1rq<mode>"
2486   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2487         (unspec:SVE_FULL
2488           [(match_operand:<VPRED> 2 "register_operand" "Upl")
2489            (match_operand:<V128> 1 "aarch64_sve_ld1rq_operand" "UtQ")]
2490           UNSPEC_LD1RQ))]
2491   "TARGET_SVE"
2492   {
2493     operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0));
2494     return "ld1rq<Vesize>\t%0.<Vetype>, %2/z, %1";
2495   }
2496 )
2497
2498 (define_insn "@aarch64_sve_ld1ro<mode>"
2499   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2500         (unspec:SVE_FULL
2501           [(match_operand:<VPRED> 2 "register_operand" "Upl")
2502            (match_operand:OI 1 "aarch64_sve_ld1ro_operand_<Vesize>"
2503                                "UO<Vesize>")]
2504           UNSPEC_LD1RO))]
2505   "TARGET_SVE && TARGET_F64MM"
2506   {
2507     operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0));
2508     return "ld1ro<Vesize>\t%0.<Vetype>, %2/z, %1";
2509   }
2510 )
2511
2512 ;; -------------------------------------------------------------------------
2513 ;; ---- [INT,FP] Initialize from individual elements
2514 ;; -------------------------------------------------------------------------
2515 ;; Includes:
2516 ;; - INSR
2517 ;; -------------------------------------------------------------------------
2518
2519 (define_expand "vec_init<mode><Vel>"
2520   [(match_operand:SVE_FULL 0 "register_operand")
2521     (match_operand 1 "")]
2522   "TARGET_SVE"
2523   {
2524     aarch64_sve_expand_vector_init (operands[0], operands[1]);
2525     DONE;
2526   }
2527 )
2528
2529 ;; Shift an SVE vector left and insert a scalar into element 0.
2530 (define_insn "vec_shl_insert_<mode>"
2531   [(set (match_operand:SVE_FULL 0 "register_operand" "=?w, w, ??&w, ?&w")
2532         (unspec:SVE_FULL
2533           [(match_operand:SVE_FULL 1 "register_operand" "0, 0, w, w")
2534            (match_operand:<VEL> 2 "aarch64_reg_or_zero" "rZ, w, rZ, w")]
2535           UNSPEC_INSR))]
2536   "TARGET_SVE"
2537   "@
2538    insr\t%0.<Vetype>, %<vwcore>2
2539    insr\t%0.<Vetype>, %<Vetype>2
2540    movprfx\t%0, %1\;insr\t%0.<Vetype>, %<vwcore>2
2541    movprfx\t%0, %1\;insr\t%0.<Vetype>, %<Vetype>2"
2542   [(set_attr "movprfx" "*,*,yes,yes")]
2543 )
2544
2545 ;; -------------------------------------------------------------------------
2546 ;; ---- [INT] Linear series
2547 ;; -------------------------------------------------------------------------
2548 ;; Includes:
2549 ;; - INDEX
2550 ;; -------------------------------------------------------------------------
2551
2552 (define_insn "vec_series<mode>"
2553   [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w")
2554         (vec_series:SVE_I
2555           (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r")
2556           (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))]
2557   "TARGET_SVE"
2558   "@
2559    index\t%0.<Vctype>, #%1, %<vccore>2
2560    index\t%0.<Vctype>, %<vccore>1, #%2
2561    index\t%0.<Vctype>, %<vccore>1, %<vccore>2"
2562 )
2563
2564 ;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range
2565 ;; of an INDEX instruction.
2566 (define_insn "*vec_series<mode>_plus"
2567   [(set (match_operand:SVE_I 0 "register_operand" "=w")
2568         (plus:SVE_I
2569           (vec_duplicate:SVE_I
2570             (match_operand:<VEL> 1 "register_operand" "r"))
2571           (match_operand:SVE_I 2 "immediate_operand")))]
2572   "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])"
2573   {
2574     operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]);
2575     return "index\t%0.<Vctype>, %<vccore>1, #%2";
2576   }
2577 )
2578
2579 ;; -------------------------------------------------------------------------
2580 ;; ---- [PRED] Duplicate element
2581 ;; -------------------------------------------------------------------------
2582 ;; The patterns in this section are synthetic.
2583 ;; -------------------------------------------------------------------------
2584
2585 ;; Implement a predicate broadcast by shifting the low bit of the scalar
2586 ;; input into the top bit and using a WHILELO.  An alternative would be to
2587 ;; duplicate the input and do a compare with zero.
2588 (define_expand "vec_duplicate<mode>"
2589   [(set (match_operand:PRED_ALL 0 "register_operand")
2590         (vec_duplicate:PRED_ALL (match_operand:QI 1 "register_operand")))]
2591   "TARGET_SVE"
2592   {
2593     rtx tmp = gen_reg_rtx (DImode);
2594     rtx op1 = gen_lowpart (DImode, operands[1]);
2595     emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
2596     emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
2597     DONE;
2598   }
2599 )
2600
2601 ;; =========================================================================
2602 ;; == Vector decomposition
2603 ;; =========================================================================
2604
2605 ;; -------------------------------------------------------------------------
2606 ;; ---- [INT,FP] Extract index
2607 ;; -------------------------------------------------------------------------
2608 ;; Includes:
2609 ;; - DUP    (Advanced SIMD)
2610 ;; - DUP    (SVE)
2611 ;; - EXT    (SVE)
2612 ;; - ST1    (Advanced SIMD)
2613 ;; - UMOV   (Advanced SIMD)
2614 ;; -------------------------------------------------------------------------
2615
2616 (define_expand "vec_extract<mode><Vel>"
2617   [(set (match_operand:<VEL> 0 "register_operand")
2618         (vec_select:<VEL>
2619           (match_operand:SVE_FULL 1 "register_operand")
2620           (parallel [(match_operand:SI 2 "nonmemory_operand")])))]
2621   "TARGET_SVE"
2622   {
2623     poly_int64 val;
2624     if (poly_int_rtx_p (operands[2], &val)
2625         && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
2626       {
2627         /* The last element can be extracted with a LASTB and a false
2628            predicate.  */
2629         rtx sel = aarch64_pfalse_reg (<VPRED>mode);
2630         emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
2631         DONE;
2632       }
2633     if (!CONST_INT_P (operands[2]))
2634       {
2635         /* Create an index with operand[2] as the base and -1 as the step.
2636            It will then be zero for the element we care about.  */
2637         rtx index = gen_lowpart (<VEL_INT>mode, operands[2]);
2638         index = force_reg (<VEL_INT>mode, index);
2639         rtx series = gen_reg_rtx (<V_INT_EQUIV>mode);
2640         emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx));
2641
2642         /* Get a predicate that is true for only that element.  */
2643         rtx zero = CONST0_RTX (<V_INT_EQUIV>mode);
2644         rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero);
2645         rtx sel = gen_reg_rtx (<VPRED>mode);
2646         emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero));
2647
2648         /* Select the element using LASTB.  */
2649         emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
2650         DONE;
2651       }
2652   }
2653 )
2654
2655 ;; Extract element zero.  This is a special case because we want to force
2656 ;; the registers to be the same for the second alternative, and then
2657 ;; split the instruction into nothing after RA.
2658 (define_insn_and_split "*vec_extract<mode><Vel>_0"
2659   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2660         (vec_select:<VEL>
2661           (match_operand:SVE_FULL 1 "register_operand" "w, 0, w")
2662           (parallel [(const_int 0)])))]
2663   "TARGET_SVE"
2664   {
2665     operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
2666     switch (which_alternative)
2667       {
2668         case 0:
2669           return "umov\\t%<vwcore>0, %1.<Vetype>[0]";
2670         case 1:
2671           return "#";
2672         case 2:
2673           return "st1\\t{%1.<Vetype>}[0], %0";
2674         default:
2675           gcc_unreachable ();
2676       }
2677   }
2678   "&& reload_completed
2679    && REG_P (operands[0])
2680    && REGNO (operands[0]) == REGNO (operands[1])"
2681   [(const_int 0)]
2682   {
2683     emit_note (NOTE_INSN_DELETED);
2684     DONE;
2685   }
2686   [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")]
2687 )
2688
2689 ;; Extract an element from the Advanced SIMD portion of the register.
2690 ;; We don't just reuse the aarch64-simd.md pattern because we don't
2691 ;; want any change in lane number on big-endian targets.
2692 (define_insn "*vec_extract<mode><Vel>_v128"
2693   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2694         (vec_select:<VEL>
2695           (match_operand:SVE_FULL 1 "register_operand" "w, w, w")
2696           (parallel [(match_operand:SI 2 "const_int_operand")])))]
2697   "TARGET_SVE
2698    && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)"
2699   {
2700     operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
2701     switch (which_alternative)
2702       {
2703         case 0:
2704           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2705         case 1:
2706           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2707         case 2:
2708           return "st1\\t{%1.<Vetype>}[%2], %0";
2709         default:
2710           gcc_unreachable ();
2711       }
2712   }
2713   [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")]
2714 )
2715
2716 ;; Extract an element in the range of DUP.  This pattern allows the
2717 ;; source and destination to be different.
2718 (define_insn "*vec_extract<mode><Vel>_dup"
2719   [(set (match_operand:<VEL> 0 "register_operand" "=w")
2720         (vec_select:<VEL>
2721           (match_operand:SVE_FULL 1 "register_operand" "w")
2722           (parallel [(match_operand:SI 2 "const_int_operand")])))]
2723   "TARGET_SVE
2724    && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)"
2725   {
2726     operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
2727     return "dup\t%0.<Vetype>, %1.<Vetype>[%2]";
2728   }
2729 )
2730
2731 ;; Extract an element outside the range of DUP.  This pattern requires the
2732 ;; source and destination to be the same.
2733 (define_insn "*vec_extract<mode><Vel>_ext"
2734   [(set (match_operand:<VEL> 0 "register_operand" "=w, ?&w")
2735         (vec_select:<VEL>
2736           (match_operand:SVE_FULL 1 "register_operand" "0, w")
2737           (parallel [(match_operand:SI 2 "const_int_operand")])))]
2738   "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64"
2739   {
2740     operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
2741     operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode));
2742     return (which_alternative == 0
2743             ? "ext\t%0.b, %0.b, %0.b, #%2"
2744             : "movprfx\t%0, %1\;ext\t%0.b, %0.b, %1.b, #%2");
2745   }
2746   [(set_attr "movprfx" "*,yes")]
2747 )
2748
2749 ;; -------------------------------------------------------------------------
2750 ;; ---- [INT,FP] Extract active element
2751 ;; -------------------------------------------------------------------------
2752 ;; Includes:
2753 ;; - LASTA
2754 ;; - LASTB
2755 ;; -------------------------------------------------------------------------
2756
2757 ;; Extract the last active element of operand 1 into operand 0.
2758 ;; If no elements are active, extract the last inactive element instead.
2759 (define_insn "@extract_<last_op>_<mode>"
2760   [(set (match_operand:<VEL> 0 "register_operand" "=?r, w")
2761         (unspec:<VEL>
2762           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2763            (match_operand:SVE_FULL 2 "register_operand" "w, w")]
2764           LAST))]
2765   "TARGET_SVE"
2766   "@
2767    last<ab>\t%<vwcore>0, %1, %2.<Vetype>
2768    last<ab>\t%<Vetype>0, %1, %2.<Vetype>"
2769 )
2770
2771 ;; -------------------------------------------------------------------------
2772 ;; ---- [PRED] Extract index
2773 ;; -------------------------------------------------------------------------
2774 ;; The patterns in this section are synthetic.
2775 ;; -------------------------------------------------------------------------
2776
2777 ;; Handle extractions from a predicate by converting to an integer vector
2778 ;; and extracting from there.
2779 (define_expand "vec_extract<vpred><Vel>"
2780   [(match_operand:<VEL> 0 "register_operand")
2781    (match_operand:<VPRED> 1 "register_operand")
2782    (match_operand:SI 2 "nonmemory_operand")
2783    ;; Dummy operand to which we can attach the iterator.
2784    (reg:SVE_FULL_I V0_REGNUM)]
2785   "TARGET_SVE"
2786   {
2787     rtx tmp = gen_reg_rtx (<MODE>mode);
2788     emit_insn (gen_vcond_mask_<mode><vpred> (tmp, operands[1],
2789                                              CONST1_RTX (<MODE>mode),
2790                                              CONST0_RTX (<MODE>mode)));
2791     emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
2792     DONE;
2793   }
2794 )
2795
2796 ;; =========================================================================
2797 ;; == Unary arithmetic
2798 ;; =========================================================================
2799
2800 ;; -------------------------------------------------------------------------
2801 ;; ---- [INT] General unary arithmetic corresponding to rtx codes
2802 ;; -------------------------------------------------------------------------
2803 ;; Includes:
2804 ;; - ABS
2805 ;; - CLS (= clrsb)
2806 ;; - CLZ
2807 ;; - CNT (= popcount)
2808 ;; - NEG
2809 ;; - NOT
2810 ;; -------------------------------------------------------------------------
2811
2812 ;; Unpredicated integer unary arithmetic.
2813 (define_expand "<optab><mode>2"
2814   [(set (match_operand:SVE_FULL_I 0 "register_operand")
2815         (unspec:SVE_FULL_I
2816           [(match_dup 2)
2817            (SVE_INT_UNARY:SVE_FULL_I
2818              (match_operand:SVE_FULL_I 1 "register_operand"))]
2819           UNSPEC_PRED_X))]
2820   "TARGET_SVE"
2821   {
2822     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2823   }
2824 )
2825
2826 ;; Integer unary arithmetic predicated with a PTRUE.
2827 (define_insn "@aarch64_pred_<optab><mode>"
2828   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
2829         (unspec:SVE_FULL_I
2830           [(match_operand:<VPRED> 1 "register_operand" "Upl")
2831            (SVE_INT_UNARY:SVE_FULL_I
2832              (match_operand:SVE_FULL_I 2 "register_operand" "w"))]
2833           UNSPEC_PRED_X))]
2834   "TARGET_SVE"
2835   "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2836 )
2837
2838 ;; Predicated integer unary arithmetic with merging.
2839 (define_expand "@cond_<optab><mode>"
2840   [(set (match_operand:SVE_FULL_I 0 "register_operand")
2841         (unspec:SVE_FULL_I
2842           [(match_operand:<VPRED> 1 "register_operand")
2843            (SVE_INT_UNARY:SVE_FULL_I
2844              (match_operand:SVE_FULL_I 2 "register_operand"))
2845            (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")]
2846           UNSPEC_SEL))]
2847   "TARGET_SVE"
2848 )
2849
2850 ;; Predicated integer unary arithmetic, merging with the first input.
2851 (define_insn "*cond_<optab><mode>_2"
2852   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
2853         (unspec:SVE_FULL_I
2854           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2855            (SVE_INT_UNARY:SVE_FULL_I
2856              (match_operand:SVE_FULL_I 2 "register_operand" "0, w"))
2857            (match_dup 2)]
2858           UNSPEC_SEL))]
2859   "TARGET_SVE"
2860   "@
2861    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
2862    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2863   [(set_attr "movprfx" "*,yes")]
2864 )
2865
2866 ;; Predicated integer unary arithmetic, merging with an independent value.
2867 ;;
2868 ;; The earlyclobber isn't needed for the first alternative, but omitting
2869 ;; it would only help the case in which operands 2 and 3 are the same,
2870 ;; which is handled above rather than here.  Marking all the alternatives
2871 ;; as earlyclobber helps to make the instruction more regular to the
2872 ;; register allocator.
2873 (define_insn "*cond_<optab><mode>_any"
2874   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w")
2875         (unspec:SVE_FULL_I
2876           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2877            (SVE_INT_UNARY:SVE_FULL_I
2878              (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w"))
2879            (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
2880           UNSPEC_SEL))]
2881   "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
2882   "@
2883    <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2884    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2885    movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2886   [(set_attr "movprfx" "*,yes,yes")]
2887 )
2888
2889 ;; -------------------------------------------------------------------------
2890 ;; ---- [INT] General unary arithmetic corresponding to unspecs
2891 ;; -------------------------------------------------------------------------
2892 ;; Includes
2893 ;; - RBIT
2894 ;; - REVB
2895 ;; - REVH
2896 ;; - REVW
2897 ;; -------------------------------------------------------------------------
2898
2899 ;; Predicated integer unary operations.
2900 (define_insn "@aarch64_pred_<optab><mode>"
2901   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
2902         (unspec:SVE_FULL_I
2903           [(match_operand:<VPRED> 1 "register_operand" "Upl")
2904            (unspec:SVE_FULL_I
2905              [(match_operand:SVE_FULL_I 2 "register_operand" "w")]
2906              SVE_INT_UNARY)]
2907           UNSPEC_PRED_X))]
2908   "TARGET_SVE && <elem_bits> >= <min_elem_bits>"
2909   "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2910 )
2911
2912 ;; Predicated integer unary operations with merging.
2913 (define_insn "@cond_<optab><mode>"
2914   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w, ?&w")
2915         (unspec:SVE_FULL_I
2916           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2917            (unspec:SVE_FULL_I
2918              [(match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")]
2919              SVE_INT_UNARY)
2920            (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
2921           UNSPEC_SEL))]
2922   "TARGET_SVE && <elem_bits> >= <min_elem_bits>"
2923   "@
2924    <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2925    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2926    movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2927   [(set_attr "movprfx" "*,yes,yes")]
2928 )
2929
2930 ;; -------------------------------------------------------------------------
2931 ;; ---- [INT] Sign and zero extension
2932 ;; -------------------------------------------------------------------------
2933 ;; Includes:
2934 ;; - SXTB
2935 ;; - SXTH
2936 ;; - SXTW
2937 ;; - UXTB
2938 ;; - UXTH
2939 ;; - UXTW
2940 ;; -------------------------------------------------------------------------
2941
2942 ;; Unpredicated sign and zero extension from a narrower mode.
2943 (define_expand "<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
2944   [(set (match_operand:SVE_HSDI 0 "register_operand")
2945         (unspec:SVE_HSDI
2946           [(match_dup 2)
2947            (ANY_EXTEND:SVE_HSDI
2948              (match_operand:SVE_PARTIAL_I 1 "register_operand"))]
2949           UNSPEC_PRED_X))]
2950   "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
2951   {
2952     operands[2] = aarch64_ptrue_reg (<SVE_HSDI:VPRED>mode);
2953   }
2954 )
2955
2956 ;; Predicated sign and zero extension from a narrower mode.
2957 (define_insn "*<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
2958   [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
2959         (unspec:SVE_HSDI
2960           [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand" "Upl")
2961            (ANY_EXTEND:SVE_HSDI
2962              (match_operand:SVE_PARTIAL_I 2 "register_operand" "w"))]
2963           UNSPEC_PRED_X))]
2964   "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
2965   "<su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>"
2966 )
2967
2968 ;; Predicated truncate-and-sign-extend operations.
2969 (define_insn "@aarch64_pred_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
2970   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
2971         (unspec:SVE_FULL_HSDI
2972           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl")
2973            (sign_extend:SVE_FULL_HSDI
2974              (truncate:SVE_PARTIAL_I
2975                (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")))]
2976           UNSPEC_PRED_X))]
2977   "TARGET_SVE
2978    && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
2979   "sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
2980 )
2981
2982 ;; Predicated truncate-and-sign-extend operations with merging.
2983 (define_insn "@aarch64_cond_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
2984   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w, ?&w")
2985         (unspec:SVE_FULL_HSDI
2986           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
2987            (sign_extend:SVE_FULL_HSDI
2988              (truncate:SVE_PARTIAL_I
2989                (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")))
2990            (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
2991           UNSPEC_SEL))]
2992   "TARGET_SVE
2993    && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
2994   "@
2995    sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
2996    movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
2997    movprfx\t%0, %3\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
2998   [(set_attr "movprfx" "*,yes,yes")]
2999 )
3000
3001 ;; Predicated truncate-and-zero-extend operations, merging with the
3002 ;; first input.
3003 ;;
3004 ;; The canonical form of this operation is an AND of a constant rather
3005 ;; than (zero_extend (truncate ...)).
3006 (define_insn "*cond_uxt<mode>_2"
3007   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3008         (unspec:SVE_FULL_I
3009           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3010            (and:SVE_FULL_I
3011              (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
3012              (match_operand:SVE_FULL_I 3 "aarch64_sve_uxt_immediate"))
3013            (match_dup 2)]
3014           UNSPEC_SEL))]
3015   "TARGET_SVE"
3016   "@
3017    uxt%e3\t%0.<Vetype>, %1/m, %0.<Vetype>
3018    movprfx\t%0, %2\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>"
3019   [(set_attr "movprfx" "*,yes")]
3020 )
3021
3022 ;; Predicated truncate-and-zero-extend operations, merging with an
3023 ;; independent value.
3024 ;;
3025 ;; The earlyclobber isn't needed for the first alternative, but omitting
3026 ;; it would only help the case in which operands 2 and 4 are the same,
3027 ;; which is handled above rather than here.  Marking all the alternatives
3028 ;; as early-clobber helps to make the instruction more regular to the
3029 ;; register allocator.
3030 (define_insn "*cond_uxt<mode>_any"
3031   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w")
3032         (unspec:SVE_FULL_I
3033           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3034            (and:SVE_FULL_I
3035              (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")
3036              (match_operand:SVE_FULL_I 3 "aarch64_sve_uxt_immediate"))
3037            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3038           UNSPEC_SEL))]
3039   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
3040   "@
3041    uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3042    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3043    movprfx\t%0, %4\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>"
3044   [(set_attr "movprfx" "*,yes,yes")]
3045 )
3046
3047 ;; -------------------------------------------------------------------------
3048 ;; ---- [INT] Truncation
3049 ;; -------------------------------------------------------------------------
3050 ;; The patterns in this section are synthetic.
3051 ;; -------------------------------------------------------------------------
3052
3053 ;; Truncate to a partial SVE vector from either a full vector or a
3054 ;; wider partial vector.  This is a no-op, because we can just ignore
3055 ;; the unused upper bits of the source.
3056 (define_insn_and_split "trunc<SVE_HSDI:mode><SVE_PARTIAL_I:mode>2"
3057   [(set (match_operand:SVE_PARTIAL_I 0 "register_operand" "=w")
3058         (truncate:SVE_PARTIAL_I
3059           (match_operand:SVE_HSDI 1 "register_operand" "w")))]
3060   "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3061   "#"
3062   "&& reload_completed"
3063   [(set (match_dup 0) (match_dup 1))]
3064   {
3065     operands[1] = aarch64_replace_reg_mode (operands[1],
3066                                             <SVE_PARTIAL_I:MODE>mode);
3067   }
3068 )
3069
3070 ;; -------------------------------------------------------------------------
3071 ;; ---- [INT] Logical inverse
3072 ;; -------------------------------------------------------------------------
3073 ;; Includes:
3074 ;; - CNOT
3075 ;; -------------------------------------------------------------------------
3076
3077 ;; Predicated logical inverse.
3078 (define_expand "@aarch64_pred_cnot<mode>"
3079   [(set (match_operand:SVE_FULL_I 0 "register_operand")
3080         (unspec:SVE_FULL_I
3081           [(unspec:<VPRED>
3082              [(match_operand:<VPRED> 1 "register_operand")
3083               (match_operand:SI 2 "aarch64_sve_ptrue_flag")
3084               (eq:<VPRED>
3085                 (match_operand:SVE_FULL_I 3 "register_operand")
3086                 (match_dup 4))]
3087              UNSPEC_PRED_Z)
3088            (match_dup 5)
3089            (match_dup 4)]
3090           UNSPEC_SEL))]
3091   "TARGET_SVE"
3092   {
3093     operands[4] = CONST0_RTX (<MODE>mode);
3094     operands[5] = CONST1_RTX (<MODE>mode);
3095   }
3096 )
3097
3098 (define_insn "*cnot<mode>"
3099   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
3100         (unspec:SVE_FULL_I
3101           [(unspec:<VPRED>
3102              [(match_operand:<VPRED> 1 "register_operand" "Upl")
3103               (match_operand:SI 5 "aarch64_sve_ptrue_flag")
3104               (eq:<VPRED>
3105                 (match_operand:SVE_FULL_I 2 "register_operand" "w")
3106                 (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))]
3107              UNSPEC_PRED_Z)
3108            (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one")
3109            (match_dup 3)]
3110           UNSPEC_SEL))]
3111   "TARGET_SVE"
3112   "cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
3113 )
3114
3115 ;; Predicated logical inverse with merging.
3116 (define_expand "@cond_cnot<mode>"
3117   [(set (match_operand:SVE_FULL_I 0 "register_operand")
3118         (unspec:SVE_FULL_I
3119           [(match_operand:<VPRED> 1 "register_operand")
3120            (unspec:SVE_FULL_I
3121              [(unspec:<VPRED>
3122                 [(match_dup 4)
3123                  (const_int SVE_KNOWN_PTRUE)
3124                  (eq:<VPRED>
3125                    (match_operand:SVE_FULL_I 2 "register_operand")
3126                    (match_dup 5))]
3127                 UNSPEC_PRED_Z)
3128               (match_dup 6)
3129               (match_dup 5)]
3130              UNSPEC_SEL)
3131            (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")]
3132           UNSPEC_SEL))]
3133   "TARGET_SVE"
3134   {
3135     operands[4] = CONSTM1_RTX (<VPRED>mode);
3136     operands[5] = CONST0_RTX (<MODE>mode);
3137     operands[6] = CONST1_RTX (<MODE>mode);
3138   }
3139 )
3140
3141 ;; Predicated logical inverse, merging with the first input.
3142 (define_insn_and_rewrite "*cond_cnot<mode>_2"
3143   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3144         (unspec:SVE_FULL_I
3145           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3146            ;; Logical inverse of operand 2 (as above).
3147            (unspec:SVE_FULL_I
3148              [(unspec:<VPRED>
3149                 [(match_operand 5)
3150                  (const_int SVE_KNOWN_PTRUE)
3151                  (eq:<VPRED>
3152                    (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
3153                    (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))]
3154                 UNSPEC_PRED_Z)
3155               (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one")
3156               (match_dup 3)]
3157              UNSPEC_SEL)
3158            (match_dup 2)]
3159           UNSPEC_SEL))]
3160   "TARGET_SVE"
3161   "@
3162    cnot\t%0.<Vetype>, %1/m, %0.<Vetype>
3163    movprfx\t%0, %2\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
3164   "&& !CONSTANT_P (operands[5])"
3165   {
3166     operands[5] = CONSTM1_RTX (<VPRED>mode);
3167   }
3168   [(set_attr "movprfx" "*,yes")]
3169 )
3170
3171 ;; Predicated logical inverse, merging with an independent value.
3172 ;;
3173 ;; The earlyclobber isn't needed for the first alternative, but omitting
3174 ;; it would only help the case in which operands 2 and 6 are the same,
3175 ;; which is handled above rather than here.  Marking all the alternatives
3176 ;; as earlyclobber helps to make the instruction more regular to the
3177 ;; register allocator.
3178 (define_insn_and_rewrite "*cond_cnot<mode>_any"
3179   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w")
3180         (unspec:SVE_FULL_I
3181           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3182            ;; Logical inverse of operand 2 (as above).
3183            (unspec:SVE_FULL_I
3184              [(unspec:<VPRED>
3185                 [(match_operand 5)
3186                  (const_int SVE_KNOWN_PTRUE)
3187                  (eq:<VPRED>
3188                    (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")
3189                    (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))]
3190                 UNSPEC_PRED_Z)
3191               (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one")
3192               (match_dup 3)]
3193              UNSPEC_SEL)
3194            (match_operand:SVE_FULL_I 6 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3195           UNSPEC_SEL))]
3196   "TARGET_SVE && !rtx_equal_p (operands[2], operands[6])"
3197   "@
3198    cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3199    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3200    movprfx\t%0, %6\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
3201   "&& !CONSTANT_P (operands[5])"
3202   {
3203     operands[5] = CONSTM1_RTX (<VPRED>mode);
3204   }
3205   [(set_attr "movprfx" "*,yes,yes")]
3206 )
3207
3208 ;; -------------------------------------------------------------------------
3209 ;; ---- [FP<-INT] General unary arithmetic that maps to unspecs
3210 ;; -------------------------------------------------------------------------
3211 ;; Includes:
3212 ;; - FEXPA
3213 ;; -------------------------------------------------------------------------
3214
3215 ;; Unpredicated unary operations that take an integer and return a float.
3216 (define_insn "@aarch64_sve_<optab><mode>"
3217   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
3218         (unspec:SVE_FULL_F
3219           [(match_operand:<V_INT_EQUIV> 1 "register_operand" "w")]
3220           SVE_FP_UNARY_INT))]
3221   "TARGET_SVE"
3222   "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>"
3223 )
3224
3225 ;; -------------------------------------------------------------------------
3226 ;; ---- [FP] General unary arithmetic corresponding to unspecs
3227 ;; -------------------------------------------------------------------------
3228 ;; Includes:
3229 ;; - FABS
3230 ;; - FNEG
3231 ;; - FRECPE
3232 ;; - FRECPX
3233 ;; - FRINTA
3234 ;; - FRINTI
3235 ;; - FRINTM
3236 ;; - FRINTN
3237 ;; - FRINTP
3238 ;; - FRINTX
3239 ;; - FRINTZ
3240 ;; - FRSQRT
3241 ;; - FSQRT
3242 ;; -------------------------------------------------------------------------
3243
3244 ;; Unpredicated floating-point unary operations.
3245 (define_insn "@aarch64_sve_<optab><mode>"
3246   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
3247         (unspec:SVE_FULL_F
3248           [(match_operand:SVE_FULL_F 1 "register_operand" "w")]
3249           SVE_FP_UNARY))]
3250   "TARGET_SVE"
3251   "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>"
3252 )
3253
3254 ;; Unpredicated floating-point unary operations.
3255 (define_expand "<optab><mode>2"
3256   [(set (match_operand:SVE_FULL_F 0 "register_operand")
3257         (unspec:SVE_FULL_F
3258           [(match_dup 2)
3259            (const_int SVE_RELAXED_GP)
3260            (match_operand:SVE_FULL_F 1 "register_operand")]
3261           SVE_COND_FP_UNARY))]
3262   "TARGET_SVE"
3263   {
3264     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
3265   }
3266 )
3267
3268 ;; Predicated floating-point unary operations.
3269 (define_insn "@aarch64_pred_<optab><mode>"
3270   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
3271         (unspec:SVE_FULL_F
3272           [(match_operand:<VPRED> 1 "register_operand" "Upl")
3273            (match_operand:SI 3 "aarch64_sve_gp_strictness")
3274            (match_operand:SVE_FULL_F 2 "register_operand" "w")]
3275           SVE_COND_FP_UNARY))]
3276   "TARGET_SVE"
3277   "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3278 )
3279
3280 ;; Predicated floating-point unary arithmetic with merging.
3281 (define_expand "@cond_<optab><mode>"
3282   [(set (match_operand:SVE_FULL_F 0 "register_operand")
3283         (unspec:SVE_FULL_F
3284           [(match_operand:<VPRED> 1 "register_operand")
3285            (unspec:SVE_FULL_F
3286              [(match_dup 1)
3287               (const_int SVE_STRICT_GP)
3288               (match_operand:SVE_FULL_F 2 "register_operand")]
3289              SVE_COND_FP_UNARY)
3290            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
3291           UNSPEC_SEL))]
3292   "TARGET_SVE"
3293 )
3294
3295 ;; Predicated floating-point unary arithmetic, merging with the first input.
3296 (define_insn_and_rewrite "*cond_<optab><mode>_2"
3297   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
3298         (unspec:SVE_FULL_F
3299           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3300            (unspec:SVE_FULL_F
3301              [(match_operand 3)
3302               (match_operand:SI 4 "aarch64_sve_gp_strictness")
3303               (match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
3304              SVE_COND_FP_UNARY)
3305            (match_dup 2)]
3306           UNSPEC_SEL))]
3307   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[3], operands[1])"
3308   "@
3309    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
3310    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3311   "&& !rtx_equal_p (operands[1], operands[3])"
3312   {
3313     operands[3] = copy_rtx (operands[1]);
3314   }
3315   [(set_attr "movprfx" "*,yes")]
3316 )
3317
3318 ;; Predicated floating-point unary arithmetic, merging with an independent
3319 ;; value.
3320 ;;
3321 ;; The earlyclobber isn't needed for the first alternative, but omitting
3322 ;; it would only help the case in which operands 2 and 3 are the same,
3323 ;; which is handled above rather than here.  Marking all the alternatives
3324 ;; as earlyclobber helps to make the instruction more regular to the
3325 ;; register allocator.
3326 (define_insn_and_rewrite "*cond_<optab><mode>_any"
3327   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, ?&w, ?&w")
3328         (unspec:SVE_FULL_F
3329           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3330            (unspec:SVE_FULL_F
3331              [(match_operand 4)
3332               (match_operand:SI 5 "aarch64_sve_gp_strictness")
3333               (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
3334              SVE_COND_FP_UNARY)
3335            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3336           UNSPEC_SEL))]
3337   "TARGET_SVE
3338    && !rtx_equal_p (operands[2], operands[3])
3339    && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
3340   "@
3341    <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3342    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3343    movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3344   "&& !rtx_equal_p (operands[1], operands[4])"
3345   {
3346     operands[4] = copy_rtx (operands[1]);
3347   }
3348   [(set_attr "movprfx" "*,yes,yes")]
3349 )
3350
3351 ;; -------------------------------------------------------------------------
3352 ;; ---- [PRED] Inverse
3353 ;; -------------------------------------------------------------------------
3354 ;; Includes:
3355 ;; - NOT
3356 ;; -------------------------------------------------------------------------
3357
3358 ;; Unpredicated predicate inverse.
3359 (define_expand "one_cmpl<mode>2"
3360   [(set (match_operand:PRED_ALL 0 "register_operand")
3361         (and:PRED_ALL
3362           (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand"))
3363           (match_dup 2)))]
3364   "TARGET_SVE"
3365   {
3366     operands[2] = aarch64_ptrue_reg (<MODE>mode);
3367   }
3368 )
3369
3370 ;; Predicated predicate inverse.
3371 (define_insn "*one_cmpl<mode>3"
3372   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
3373         (and:PRED_ALL
3374           (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
3375           (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
3376   "TARGET_SVE"
3377   "not\t%0.b, %1/z, %2.b"
3378 )
3379
3380 ;; =========================================================================
3381 ;; == Binary arithmetic
3382 ;; =========================================================================
3383
3384 ;; -------------------------------------------------------------------------
3385 ;; ---- [INT] General binary arithmetic corresponding to rtx codes
3386 ;; -------------------------------------------------------------------------
3387 ;; Includes:
3388 ;; - ADD    (merging form only)
3389 ;; - AND    (merging form only)
3390 ;; - ASR    (merging form only)
3391 ;; - EOR    (merging form only)
3392 ;; - LSL    (merging form only)
3393 ;; - LSR    (merging form only)
3394 ;; - MUL
3395 ;; - ORR    (merging form only)
3396 ;; - SMAX
3397 ;; - SMIN
3398 ;; - SQADD  (SVE2 merging form only)
3399 ;; - SQSUB  (SVE2 merging form only)
3400 ;; - SUB    (merging form only)
3401 ;; - UMAX
3402 ;; - UMIN
3403 ;; - UQADD  (SVE2 merging form only)
3404 ;; - UQSUB  (SVE2 merging form only)
3405 ;; -------------------------------------------------------------------------
3406
3407 ;; Unpredicated integer binary operations that have an immediate form.
3408 (define_expand "<optab><mode>3"
3409   [(set (match_operand:SVE_FULL_I 0 "register_operand")
3410         (unspec:SVE_FULL_I
3411           [(match_dup 3)
3412            (SVE_INT_BINARY_IMM:SVE_FULL_I
3413              (match_operand:SVE_FULL_I 1 "register_operand")
3414              (match_operand:SVE_FULL_I 2 "aarch64_sve_<sve_imm_con>_operand"))]
3415           UNSPEC_PRED_X))]
3416   "TARGET_SVE"
3417   {
3418     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
3419   }
3420 )
3421
3422 ;; Integer binary operations that have an immediate form, predicated
3423 ;; with a PTRUE.  We don't actually need the predicate for the first
3424 ;; and third alternatives, but using Upa or X isn't likely to gain much
3425 ;; and would make the instruction seem less uniform to the register
3426 ;; allocator.
3427 (define_insn_and_split "@aarch64_pred_<optab><mode>"
3428   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w")
3429         (unspec:SVE_FULL_I
3430           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
3431            (SVE_INT_BINARY_IMM:SVE_FULL_I
3432              (match_operand:SVE_FULL_I 2 "register_operand" "%0, 0, w, w")
3433              (match_operand:SVE_FULL_I 3 "aarch64_sve_<sve_imm_con>_operand" "<sve_imm_con>, w, <sve_imm_con>, w"))]
3434           UNSPEC_PRED_X))]
3435   "TARGET_SVE"
3436   "@
3437    #
3438    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3439    #
3440    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3441   ; Split the unpredicated form after reload, so that we don't have
3442   ; the unnecessary PTRUE.
3443   "&& reload_completed
3444    && !register_operand (operands[3], <MODE>mode)"
3445   [(set (match_dup 0)
3446         (SVE_INT_BINARY_IMM:SVE_FULL_I (match_dup 2) (match_dup 3)))]
3447   ""
3448   [(set_attr "movprfx" "*,*,yes,yes")]
3449 )
3450
3451 ;; Unpredicated binary operations with a constant (post-RA only).
3452 ;; These are generated by splitting a predicated instruction whose
3453 ;; predicate is unused.
3454 (define_insn "*post_ra_<optab><mode>3"
3455   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3456         (SVE_INT_BINARY_IMM:SVE_FULL_I
3457           (match_operand:SVE_FULL_I 1 "register_operand" "0, w")
3458           (match_operand:SVE_FULL_I 2 "aarch64_sve_<sve_imm_con>_immediate")))]
3459   "TARGET_SVE && reload_completed"
3460   "@
3461    <sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2
3462    movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2"
3463   [(set_attr "movprfx" "*,yes")]
3464 )
3465
3466 ;; Predicated integer operations with merging.
3467 (define_expand "@cond_<optab><mode>"
3468   [(set (match_operand:SVE_FULL_I 0 "register_operand")
3469         (unspec:SVE_FULL_I
3470           [(match_operand:<VPRED> 1 "register_operand")
3471            (SVE_INT_BINARY:SVE_FULL_I
3472              (match_operand:SVE_FULL_I 2 "register_operand")
3473              (match_operand:SVE_FULL_I 3 "<sve_pred_int_rhs2_operand>"))
3474            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
3475           UNSPEC_SEL))]
3476   "TARGET_SVE"
3477 )
3478
3479 ;; Predicated integer operations, merging with the first input.
3480 (define_insn "*cond_<optab><mode>_2"
3481   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3482         (unspec:SVE_FULL_I
3483           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3484            (SVE_INT_BINARY:SVE_FULL_I
3485              (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
3486              (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
3487            (match_dup 2)]
3488           UNSPEC_SEL))]
3489   "TARGET_SVE"
3490   "@
3491    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3492    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3493   [(set_attr "movprfx" "*,yes")]
3494 )
3495
3496 ;; Predicated integer operations, merging with the second input.
3497 (define_insn "*cond_<optab><mode>_3"
3498   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3499         (unspec:SVE_FULL_I
3500           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3501            (SVE_INT_BINARY:SVE_FULL_I
3502              (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
3503              (match_operand:SVE_FULL_I 3 "register_operand" "0, w"))
3504            (match_dup 3)]
3505           UNSPEC_SEL))]
3506   "TARGET_SVE"
3507   "@
3508    <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
3509    movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
3510   [(set_attr "movprfx" "*,yes")]
3511 )
3512
3513 ;; Predicated integer operations, merging with an independent value.
3514 (define_insn_and_rewrite "*cond_<optab><mode>_any"
3515   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
3516         (unspec:SVE_FULL_I
3517           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
3518            (SVE_INT_BINARY:SVE_FULL_I
3519              (match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w, w")
3520              (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w, w, w"))
3521            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
3522           UNSPEC_SEL))]
3523   "TARGET_SVE
3524    && !rtx_equal_p (operands[2], operands[4])
3525    && !rtx_equal_p (operands[3], operands[4])"
3526   "@
3527    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3528    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
3529    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3530    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3531    #"
3532   "&& reload_completed
3533    && register_operand (operands[4], <MODE>mode)
3534    && !rtx_equal_p (operands[0], operands[4])"
3535   {
3536     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
3537                                              operands[4], operands[1]));
3538     operands[4] = operands[2] = operands[0];
3539   }
3540   [(set_attr "movprfx" "yes")]
3541 )
3542
3543 ;; -------------------------------------------------------------------------
3544 ;; ---- [INT] Addition
3545 ;; -------------------------------------------------------------------------
3546 ;; Includes:
3547 ;; - ADD
3548 ;; - DECB
3549 ;; - DECD
3550 ;; - DECH
3551 ;; - DECW
3552 ;; - INCB
3553 ;; - INCD
3554 ;; - INCH
3555 ;; - INCW
3556 ;; - SUB
3557 ;; -------------------------------------------------------------------------
3558
3559 (define_insn "add<mode>3"
3560   [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?w, ?w, w")
3561         (plus:SVE_I
3562           (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w, w, w")
3563           (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, vsa, vsn, w")))]
3564   "TARGET_SVE"
3565   "@
3566    add\t%0.<Vetype>, %0.<Vetype>, #%D2
3567    sub\t%0.<Vetype>, %0.<Vetype>, #%N2
3568    * return aarch64_output_sve_vector_inc_dec (\"%0.<Vetype>\", operands[2]);
3569    movprfx\t%0, %1\;add\t%0.<Vetype>, %0.<Vetype>, #%D2
3570    movprfx\t%0, %1\;sub\t%0.<Vetype>, %0.<Vetype>, #%N2
3571    add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
3572   [(set_attr "movprfx" "*,*,*,yes,yes,*")]
3573 )
3574
3575 ;; Merging forms are handled through SVE_INT_BINARY.
3576
3577 ;; -------------------------------------------------------------------------
3578 ;; ---- [INT] Subtraction
3579 ;; -------------------------------------------------------------------------
3580 ;; Includes:
3581 ;; - SUB
3582 ;; - SUBR
3583 ;; -------------------------------------------------------------------------
3584
3585 (define_insn "sub<mode>3"
3586   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w")
3587         (minus:SVE_FULL_I
3588           (match_operand:SVE_FULL_I 1 "aarch64_sve_arith_operand" "w, vsa, vsa")
3589           (match_operand:SVE_FULL_I 2 "register_operand" "w, 0, w")))]
3590   "TARGET_SVE"
3591   "@
3592    sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
3593    subr\t%0.<Vetype>, %0.<Vetype>, #%D1
3594    movprfx\t%0, %2\;subr\t%0.<Vetype>, %0.<Vetype>, #%D1"
3595   [(set_attr "movprfx" "*,*,yes")]
3596 )
3597
3598 ;; Merging forms are handled through SVE_INT_BINARY.
3599
3600 ;; -------------------------------------------------------------------------
3601 ;; ---- [INT] Take address
3602 ;; -------------------------------------------------------------------------
3603 ;; Includes:
3604 ;; - ADR
3605 ;; -------------------------------------------------------------------------
3606
3607 ;; An unshifted and unscaled ADR.  This is functionally equivalent to an ADD,
3608 ;; but the svadrb intrinsics should preserve the user's choice.
3609 (define_insn "@aarch64_adr<mode>"
3610   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
3611         (unspec:SVE_FULL_SDI
3612           [(match_operand:SVE_FULL_SDI 1 "register_operand" "w")
3613            (match_operand:SVE_FULL_SDI 2 "register_operand" "w")]
3614           UNSPEC_ADR))]
3615   "TARGET_SVE"
3616   "adr\t%0.<Vetype>, [%1.<Vetype>, %2.<Vetype>]"
3617 )
3618
3619 ;; Same, but with the offset being sign-extended from the low 32 bits.
3620 (define_insn_and_rewrite "*aarch64_adr_sxtw"
3621   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3622         (unspec:VNx2DI
3623           [(match_operand:VNx2DI 1 "register_operand" "w")
3624            (unspec:VNx2DI
3625              [(match_operand 3)
3626               (sign_extend:VNx2DI
3627                 (truncate:VNx2SI
3628                   (match_operand:VNx2DI 2 "register_operand" "w")))]
3629              UNSPEC_PRED_X)]
3630           UNSPEC_ADR))]
3631   "TARGET_SVE"
3632   "adr\t%0.d, [%1.d, %2.d, sxtw]"
3633   "&& !CONSTANT_P (operands[3])"
3634   {
3635     operands[3] = CONSTM1_RTX (VNx2BImode);
3636   }
3637 )
3638
3639 ;; Same, but with the offset being zero-extended from the low 32 bits.
3640 (define_insn "*aarch64_adr_uxtw_unspec"
3641   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3642         (unspec:VNx2DI
3643           [(match_operand:VNx2DI 1 "register_operand" "w")
3644            (and:VNx2DI
3645              (match_operand:VNx2DI 2 "register_operand" "w")
3646              (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))]
3647           UNSPEC_ADR))]
3648   "TARGET_SVE"
3649   "adr\t%0.d, [%1.d, %2.d, uxtw]"
3650 )
3651
3652 ;; Same, matching as a PLUS rather than unspec.
3653 (define_insn "*aarch64_adr_uxtw_and"
3654   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3655         (plus:VNx2DI
3656           (and:VNx2DI
3657             (match_operand:VNx2DI 2 "register_operand" "w")
3658             (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))
3659           (match_operand:VNx2DI 1 "register_operand" "w")))]
3660   "TARGET_SVE"
3661   "adr\t%0.d, [%1.d, %2.d, uxtw]"
3662 )
3663
3664 ;; ADR with a nonzero shift.
3665 (define_expand "@aarch64_adr<mode>_shift"
3666   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
3667         (plus:SVE_FULL_SDI
3668           (unspec:SVE_FULL_SDI
3669             [(match_dup 4)
3670              (ashift:SVE_FULL_SDI
3671                (match_operand:SVE_FULL_SDI 2 "register_operand")
3672                (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))]
3673             UNSPEC_PRED_X)
3674           (match_operand:SVE_FULL_SDI 1 "register_operand")))]
3675   "TARGET_SVE"
3676   {
3677     operands[4] = CONSTM1_RTX (<VPRED>mode);
3678   }
3679 )
3680
3681 (define_insn_and_rewrite "*aarch64_adr<mode>_shift"
3682   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
3683         (plus:SVE_FULL_SDI
3684           (unspec:SVE_FULL_SDI
3685             [(match_operand 4)
3686              (ashift:SVE_FULL_SDI
3687                (match_operand:SVE_FULL_SDI 2 "register_operand" "w")
3688                (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))]
3689             UNSPEC_PRED_X)
3690           (match_operand:SVE_FULL_SDI 1 "register_operand" "w")))]
3691   "TARGET_SVE"
3692   "adr\t%0.<Vetype>, [%1.<Vetype>, %2.<Vetype>, lsl %3]"
3693   "&& !CONSTANT_P (operands[4])"
3694   {
3695     operands[4] = CONSTM1_RTX (<VPRED>mode);
3696   }
3697 )
3698
3699 ;; Same, but with the index being sign-extended from the low 32 bits.
3700 (define_insn_and_rewrite "*aarch64_adr_shift_sxtw"
3701   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3702         (plus:VNx2DI
3703           (unspec:VNx2DI
3704             [(match_operand 4)
3705              (ashift:VNx2DI
3706                (unspec:VNx2DI
3707                  [(match_operand 5)
3708                   (sign_extend:VNx2DI
3709                     (truncate:VNx2SI
3710                       (match_operand:VNx2DI 2 "register_operand" "w")))]
3711                  UNSPEC_PRED_X)
3712                (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
3713             UNSPEC_PRED_X)
3714           (match_operand:VNx2DI 1 "register_operand" "w")))]
3715   "TARGET_SVE"
3716   "adr\t%0.d, [%1.d, %2.d, sxtw %3]"
3717   "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
3718   {
3719     operands[5] = operands[4] = CONSTM1_RTX (VNx2BImode);
3720   }
3721 )
3722
3723 ;; Same, but with the index being zero-extended from the low 32 bits.
3724 (define_insn_and_rewrite "*aarch64_adr_shift_uxtw"
3725   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3726         (plus:VNx2DI
3727           (unspec:VNx2DI
3728             [(match_operand 5)
3729              (ashift:VNx2DI
3730                (and:VNx2DI
3731                  (match_operand:VNx2DI 2 "register_operand" "w")
3732                  (match_operand:VNx2DI 4 "aarch64_sve_uxtw_immediate"))
3733                (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
3734             UNSPEC_PRED_X)
3735           (match_operand:VNx2DI 1 "register_operand" "w")))]
3736   "TARGET_SVE"
3737   "adr\t%0.d, [%1.d, %2.d, uxtw %3]"
3738   "&& !CONSTANT_P (operands[5])"
3739   {
3740     operands[5] = CONSTM1_RTX (VNx2BImode);
3741   }
3742 )
3743
3744 ;; -------------------------------------------------------------------------
3745 ;; ---- [INT] Absolute difference
3746 ;; -------------------------------------------------------------------------
3747 ;; Includes:
3748 ;; - SABD
3749 ;; - UABD
3750 ;; -------------------------------------------------------------------------
3751
3752 ;; Unpredicated integer absolute difference.
3753 (define_expand "<su>abd<mode>_3"
3754   [(use (match_operand:SVE_FULL_I 0 "register_operand"))
3755    (USMAX:SVE_FULL_I
3756      (match_operand:SVE_FULL_I 1 "register_operand")
3757      (match_operand:SVE_FULL_I 2 "register_operand"))]
3758   "TARGET_SVE"
3759   {
3760     rtx pred = aarch64_ptrue_reg (<VPRED>mode);
3761     emit_insn (gen_aarch64_pred_<su>abd<mode> (operands[0], pred, operands[1],
3762                                                operands[2]));
3763     DONE;
3764   }
3765 )
3766
3767 ;; Predicated integer absolute difference.
3768 (define_insn "@aarch64_pred_<su>abd<mode>"
3769   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3770         (unspec:SVE_FULL_I
3771           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3772            (minus:SVE_FULL_I
3773              (USMAX:SVE_FULL_I
3774                (match_operand:SVE_FULL_I 2 "register_operand" "%0, w")
3775                (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
3776              (<max_opp>:SVE_FULL_I
3777                (match_dup 2)
3778                (match_dup 3)))]
3779           UNSPEC_PRED_X))]
3780   "TARGET_SVE"
3781   "@
3782    <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3783    movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3784   [(set_attr "movprfx" "*,yes")]
3785 )
3786
3787 (define_expand "@aarch64_cond_<su>abd<mode>"
3788   [(set (match_operand:SVE_FULL_I 0 "register_operand")
3789         (unspec:SVE_FULL_I
3790           [(match_operand:<VPRED> 1 "register_operand")
3791            (minus:SVE_FULL_I
3792              (unspec:SVE_FULL_I
3793                [(match_dup 1)
3794                 (USMAX:SVE_FULL_I
3795                   (match_operand:SVE_FULL_I 2 "register_operand")
3796                   (match_operand:SVE_FULL_I 3 "register_operand"))]
3797                UNSPEC_PRED_X)
3798              (unspec:SVE_FULL_I
3799                [(match_dup 1)
3800                 (<max_opp>:SVE_FULL_I
3801                   (match_dup 2)
3802                   (match_dup 3))]
3803                UNSPEC_PRED_X))
3804            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
3805           UNSPEC_SEL))]
3806   "TARGET_SVE"
3807 {
3808   if (rtx_equal_p (operands[3], operands[4]))
3809     std::swap (operands[2], operands[3]);
3810 })
3811
3812 ;; Predicated integer absolute difference, merging with the first input.
3813 (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_2"
3814   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3815         (unspec:SVE_FULL_I
3816           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3817            (minus:SVE_FULL_I
3818              (unspec:SVE_FULL_I
3819                [(match_operand 4)
3820                 (USMAX:SVE_FULL_I
3821                   (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
3822                   (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))]
3823                UNSPEC_PRED_X)
3824              (unspec:SVE_FULL_I
3825                [(match_operand 5)
3826                 (<max_opp>:SVE_FULL_I
3827                   (match_dup 2)
3828                   (match_dup 3))]
3829                UNSPEC_PRED_X))
3830            (match_dup 2)]
3831           UNSPEC_SEL))]
3832   "TARGET_SVE"
3833   "@
3834    <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3835    movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3836   "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
3837   {
3838     operands[4] = operands[5] = CONSTM1_RTX (<VPRED>mode);
3839   }
3840   [(set_attr "movprfx" "*,yes")]
3841 )
3842
3843 ;; Predicated integer absolute difference, merging with an independent value.
3844 (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_any"
3845   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
3846         (unspec:SVE_FULL_I
3847           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
3848            (minus:SVE_FULL_I
3849              (unspec:SVE_FULL_I
3850                [(match_operand 5)
3851                 (USMAX:SVE_FULL_I
3852                   (match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w, w")
3853                   (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w, w, w"))]
3854                UNSPEC_PRED_X)
3855              (unspec:SVE_FULL_I
3856                [(match_operand 6)
3857                 (<max_opp>:SVE_FULL_I
3858                   (match_dup 2)
3859                   (match_dup 3))]
3860                UNSPEC_PRED_X))
3861            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
3862           UNSPEC_SEL))]
3863   "TARGET_SVE
3864    && !rtx_equal_p (operands[2], operands[4])
3865    && !rtx_equal_p (operands[3], operands[4])"
3866   "@
3867    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3868    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
3869    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3870    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3871    #"
3872   "&& 1"
3873   {
3874     if (!CONSTANT_P (operands[5]) || !CONSTANT_P (operands[6]))
3875       operands[5] = operands[6] = CONSTM1_RTX (<VPRED>mode);
3876     else if (reload_completed
3877              && register_operand (operands[4], <MODE>mode)
3878              && !rtx_equal_p (operands[0], operands[4]))
3879       {
3880         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
3881                                                  operands[4], operands[1]));
3882         operands[4] = operands[2] = operands[0];
3883       }
3884     else
3885       FAIL;
3886   }
3887   [(set_attr "movprfx" "yes")]
3888 )
3889
3890 ;; -------------------------------------------------------------------------
3891 ;; ---- [INT] Saturating addition and subtraction
3892 ;; -------------------------------------------------------------------------
3893 ;; - SQADD
3894 ;; - SQSUB
3895 ;; - UQADD
3896 ;; - UQSUB
3897 ;; -------------------------------------------------------------------------
3898
3899 ;; Unpredicated saturating signed addition and subtraction.
3900 (define_insn "@aarch64_sve_<optab><mode>"
3901   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w, w")
3902         (SBINQOPS:SVE_FULL_I
3903           (match_operand:SVE_FULL_I 1 "register_operand" "0, 0, w, w, w")
3904           (match_operand:SVE_FULL_I 2 "aarch64_sve_sqadd_operand" "vsQ, vsS, vsQ, vsS, w")))]
3905   "TARGET_SVE"
3906   "@
3907    <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
3908    <binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2
3909    movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
3910    movprfx\t%0, %1\;<binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2
3911    <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
3912   [(set_attr "movprfx" "*,*,yes,yes,*")]
3913 )
3914
3915 ;; Unpredicated saturating unsigned addition and subtraction.
3916 (define_insn "@aarch64_sve_<optab><mode>"
3917   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w, w")
3918         (UBINQOPS:SVE_FULL_I
3919           (match_operand:SVE_FULL_I 1 "register_operand" "0, w, w")
3920           (match_operand:SVE_FULL_I 2 "aarch64_sve_arith_operand" "vsa, vsa, w")))]
3921   "TARGET_SVE"
3922   "@
3923    <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
3924    movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
3925    <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
3926   [(set_attr "movprfx" "*,yes,*")]
3927 )
3928
3929 ;; -------------------------------------------------------------------------
3930 ;; ---- [INT] Highpart multiplication
3931 ;; -------------------------------------------------------------------------
3932 ;; Includes:
3933 ;; - SMULH
3934 ;; - UMULH
3935 ;; -------------------------------------------------------------------------
3936
3937 ;; Unpredicated highpart multiplication.
3938 (define_expand "<su>mul<mode>3_highpart"
3939   [(set (match_operand:SVE_FULL_I 0 "register_operand")
3940         (unspec:SVE_FULL_I
3941           [(match_dup 3)
3942            (unspec:SVE_FULL_I
3943              [(match_operand:SVE_FULL_I 1 "register_operand")
3944               (match_operand:SVE_FULL_I 2 "register_operand")]
3945              MUL_HIGHPART)]
3946           UNSPEC_PRED_X))]
3947   "TARGET_SVE"
3948   {
3949     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
3950   }
3951 )
3952
3953 ;; Predicated highpart multiplication.
3954 (define_insn "@aarch64_pred_<optab><mode>"
3955   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3956         (unspec:SVE_FULL_I
3957           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3958            (unspec:SVE_FULL_I
3959              [(match_operand:SVE_FULL_I 2 "register_operand" "%0, w")
3960               (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
3961              MUL_HIGHPART)]
3962           UNSPEC_PRED_X))]
3963   "TARGET_SVE"
3964   "@
3965    <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3966    movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3967   [(set_attr "movprfx" "*,yes")]
3968 )
3969
3970 ;; Predicated highpart multiplications with merging.
3971 (define_expand "@cond_<optab><mode>"
3972   [(set (match_operand:SVE_FULL_I 0 "register_operand")
3973         (unspec:SVE_FULL_I
3974           [(match_operand:<VPRED> 1 "register_operand")
3975            (unspec:SVE_FULL_I
3976              [(match_operand:SVE_FULL_I 2 "register_operand")
3977               (match_operand:SVE_FULL_I 3 "register_operand")]
3978              MUL_HIGHPART)
3979            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
3980           UNSPEC_SEL))]
3981   "TARGET_SVE"
3982 {
3983   /* Only target code is aware of these operations, so we don't need
3984      to handle the fully-general case.  */
3985   gcc_assert (rtx_equal_p (operands[2], operands[4])
3986               || CONSTANT_P (operands[4]));
3987 })
3988
3989 ;; Predicated highpart multiplications, merging with the first input.
3990 (define_insn "*cond_<optab><mode>_2"
3991   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3992         (unspec:SVE_FULL_I
3993           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3994            (unspec:SVE_FULL_I
3995              [(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
3996               (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
3997              MUL_HIGHPART)
3998            (match_dup 2)]
3999           UNSPEC_SEL))]
4000   "TARGET_SVE"
4001   "@
4002    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4003    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4004   [(set_attr "movprfx" "*,yes")])
4005
4006 ;; Predicated highpart multiplications, merging with zero.
4007 (define_insn "*cond_<optab><mode>_z"
4008   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w")
4009         (unspec:SVE_FULL_I
4010           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4011            (unspec:SVE_FULL_I
4012              [(match_operand:SVE_FULL_I 2 "register_operand" "%0, w")
4013               (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
4014              MUL_HIGHPART)
4015            (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")]
4016           UNSPEC_SEL))]
4017   "TARGET_SVE"
4018   "@
4019    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4020    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4021   [(set_attr "movprfx" "yes")])
4022
4023 ;; -------------------------------------------------------------------------
4024 ;; ---- [INT] Division
4025 ;; -------------------------------------------------------------------------
4026 ;; Includes:
4027 ;; - SDIV
4028 ;; - SDIVR
4029 ;; - UDIV
4030 ;; - UDIVR
4031 ;; -------------------------------------------------------------------------
4032
4033 ;; Unpredicated integer division.
4034 (define_expand "<optab><mode>3"
4035   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4036         (unspec:SVE_FULL_SDI
4037           [(match_dup 3)
4038            (SVE_INT_BINARY_SD:SVE_FULL_SDI
4039              (match_operand:SVE_FULL_SDI 1 "register_operand")
4040              (match_operand:SVE_FULL_SDI 2 "register_operand"))]
4041           UNSPEC_PRED_X))]
4042   "TARGET_SVE"
4043   {
4044     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4045   }
4046 )
4047
4048 ;; Integer division predicated with a PTRUE.
4049 (define_insn "@aarch64_pred_<optab><mode>"
4050   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, w, ?&w")
4051         (unspec:SVE_FULL_SDI
4052           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
4053            (SVE_INT_BINARY_SD:SVE_FULL_SDI
4054              (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w, w")
4055              (match_operand:SVE_FULL_SDI 3 "register_operand" "w, 0, w"))]
4056           UNSPEC_PRED_X))]
4057   "TARGET_SVE"
4058   "@
4059    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4060    <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4061    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4062   [(set_attr "movprfx" "*,*,yes")]
4063 )
4064
4065 ;; Predicated integer division with merging.
4066 (define_expand "@cond_<optab><mode>"
4067   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4068         (unspec:SVE_FULL_SDI
4069           [(match_operand:<VPRED> 1 "register_operand")
4070            (SVE_INT_BINARY_SD:SVE_FULL_SDI
4071              (match_operand:SVE_FULL_SDI 2 "register_operand")
4072              (match_operand:SVE_FULL_SDI 3 "register_operand"))
4073            (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero")]
4074           UNSPEC_SEL))]
4075   "TARGET_SVE"
4076 )
4077
4078 ;; Predicated integer division, merging with the first input.
4079 (define_insn "*cond_<optab><mode>_2"
4080   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
4081         (unspec:SVE_FULL_SDI
4082           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4083            (SVE_INT_BINARY_SD:SVE_FULL_SDI
4084              (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w")
4085              (match_operand:SVE_FULL_SDI 3 "register_operand" "w, w"))
4086            (match_dup 2)]
4087           UNSPEC_SEL))]
4088   "TARGET_SVE"
4089   "@
4090    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4091    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4092   [(set_attr "movprfx" "*,yes")]
4093 )
4094
4095 ;; Predicated integer division, merging with the second input.
4096 (define_insn "*cond_<optab><mode>_3"
4097   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
4098         (unspec:SVE_FULL_SDI
4099           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4100            (SVE_INT_BINARY_SD:SVE_FULL_SDI
4101              (match_operand:SVE_FULL_SDI 2 "register_operand" "w, w")
4102              (match_operand:SVE_FULL_SDI 3 "register_operand" "0, w"))
4103            (match_dup 3)]
4104           UNSPEC_SEL))]
4105   "TARGET_SVE"
4106   "@
4107    <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4108    movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
4109   [(set_attr "movprfx" "*,yes")]
4110 )
4111
4112 ;; Predicated integer division, merging with an independent value.
4113 (define_insn_and_rewrite "*cond_<optab><mode>_any"
4114   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=&w, &w, &w, &w, ?&w")
4115         (unspec:SVE_FULL_SDI
4116           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
4117            (SVE_INT_BINARY_SD:SVE_FULL_SDI
4118              (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w, w, w, w")
4119              (match_operand:SVE_FULL_SDI 3 "register_operand" "w, 0, w, w, w"))
4120            (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
4121           UNSPEC_SEL))]
4122   "TARGET_SVE
4123    && !rtx_equal_p (operands[2], operands[4])
4124    && !rtx_equal_p (operands[3], operands[4])"
4125   "@
4126    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4127    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4128    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4129    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4130    #"
4131   "&& reload_completed
4132    && register_operand (operands[4], <MODE>mode)
4133    && !rtx_equal_p (operands[0], operands[4])"
4134   {
4135     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4136                                              operands[4], operands[1]));
4137     operands[4] = operands[2] = operands[0];
4138   }
4139   [(set_attr "movprfx" "yes")]
4140 )
4141
4142 ;; -------------------------------------------------------------------------
4143 ;; ---- [INT] Binary logical operations
4144 ;; -------------------------------------------------------------------------
4145 ;; Includes:
4146 ;; - AND
4147 ;; - EOR
4148 ;; - ORR
4149 ;; -------------------------------------------------------------------------
4150
4151 ;; Unpredicated integer binary logical operations.
4152 (define_insn "<optab><mode>3"
4153   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?w, w")
4154         (LOGICAL:SVE_FULL_I
4155           (match_operand:SVE_FULL_I 1 "register_operand" "%0, w, w")
4156           (match_operand:SVE_FULL_I 2 "aarch64_sve_logical_operand" "vsl, vsl, w")))]
4157   "TARGET_SVE"
4158   "@
4159    <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
4160    movprfx\t%0, %1\;<logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
4161    <logical>\t%0.d, %1.d, %2.d"
4162   [(set_attr "movprfx" "*,yes,*")]
4163 )
4164
4165 ;; Merging forms are handled through SVE_INT_BINARY.
4166
4167 ;; -------------------------------------------------------------------------
4168 ;; ---- [INT] Binary logical operations (inverted second input)
4169 ;; -------------------------------------------------------------------------
4170 ;; Includes:
4171 ;; - BIC
4172 ;; -------------------------------------------------------------------------
4173
4174 ;; Unpredicated BIC.
4175 (define_expand "@aarch64_bic<mode>"
4176   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4177         (and:SVE_FULL_I
4178           (unspec:SVE_FULL_I
4179             [(match_dup 3)
4180              (not:SVE_FULL_I (match_operand:SVE_FULL_I 2 "register_operand"))]
4181             UNSPEC_PRED_X)
4182           (match_operand:SVE_FULL_I 1 "register_operand")))]
4183   "TARGET_SVE"
4184   {
4185     operands[3] = CONSTM1_RTX (<VPRED>mode);
4186   }
4187 )
4188
4189 ;; Predicated BIC.
4190 (define_insn_and_rewrite "*bic<mode>3"
4191   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
4192         (and:SVE_FULL_I
4193           (unspec:SVE_FULL_I
4194             [(match_operand 3)
4195              (not:SVE_FULL_I
4196                (match_operand:SVE_FULL_I 2 "register_operand" "w"))]
4197             UNSPEC_PRED_X)
4198           (match_operand:SVE_FULL_I 1 "register_operand" "w")))]
4199   "TARGET_SVE"
4200   "bic\t%0.d, %1.d, %2.d"
4201   "&& !CONSTANT_P (operands[3])"
4202   {
4203     operands[3] = CONSTM1_RTX (<VPRED>mode);
4204   }
4205 )
4206
4207 ;; Predicated BIC with merging.
4208 (define_expand "@cond_bic<mode>"
4209   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4210         (unspec:SVE_FULL_I
4211           [(match_operand:<VPRED> 1 "register_operand")
4212            (and:SVE_FULL_I
4213              (not:SVE_FULL_I (match_operand:SVE_FULL_I 3 "register_operand"))
4214              (match_operand:SVE_FULL_I 2 "register_operand"))
4215            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4216           UNSPEC_SEL))]
4217   "TARGET_SVE"
4218 )
4219
4220 ;; Predicated integer BIC, merging with the first input.
4221 (define_insn "*cond_bic<mode>_2"
4222   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
4223         (unspec:SVE_FULL_I
4224           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4225            (and:SVE_FULL_I
4226              (not:SVE_FULL_I
4227                (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
4228              (match_operand:SVE_FULL_I 2 "register_operand" "0, w"))
4229            (match_dup 2)]
4230           UNSPEC_SEL))]
4231   "TARGET_SVE"
4232   "@
4233    bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4234    movprfx\t%0, %2\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4235   [(set_attr "movprfx" "*,yes")]
4236 )
4237
4238 ;; Predicated integer BIC, merging with an independent value.
4239 (define_insn_and_rewrite "*cond_bic<mode>_any"
4240   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, ?&w")
4241         (unspec:SVE_FULL_I
4242           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
4243            (and:SVE_FULL_I
4244              (not:SVE_FULL_I
4245                (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, w"))
4246              (match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w"))
4247            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
4248           UNSPEC_SEL))]
4249   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4250   "@
4251    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4252    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4253    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4254    #"
4255   "&& reload_completed
4256    && register_operand (operands[4], <MODE>mode)
4257    && !rtx_equal_p (operands[0], operands[4])"
4258   {
4259     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4260                                              operands[4], operands[1]));
4261     operands[4] = operands[2] = operands[0];
4262   }
4263   [(set_attr "movprfx" "yes")]
4264 )
4265
4266 ;; -------------------------------------------------------------------------
4267 ;; ---- [INT] Shifts (rounding towards -Inf)
4268 ;; -------------------------------------------------------------------------
4269 ;; Includes:
4270 ;; - ASR
4271 ;; - ASRR
4272 ;; - LSL
4273 ;; - LSLR
4274 ;; - LSR
4275 ;; - LSRR
4276 ;; -------------------------------------------------------------------------
4277
4278 ;; Unpredicated shift by a scalar, which expands into one of the vector
4279 ;; shifts below.
4280 (define_expand "<ASHIFT:optab><mode>3"
4281   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4282         (ASHIFT:SVE_FULL_I
4283           (match_operand:SVE_FULL_I 1 "register_operand")
4284           (match_operand:<VEL> 2 "general_operand")))]
4285   "TARGET_SVE"
4286   {
4287     rtx amount;
4288     if (CONST_INT_P (operands[2]))
4289       {
4290         amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
4291         if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
4292           amount = force_reg (<MODE>mode, amount);
4293       }
4294     else
4295       {
4296         amount = gen_reg_rtx (<MODE>mode);
4297         emit_insn (gen_vec_duplicate<mode> (amount,
4298                                             convert_to_mode (<VEL>mode,
4299                                                              operands[2], 0)));
4300       }
4301     emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount));
4302     DONE;
4303   }
4304 )
4305
4306 ;; Unpredicated shift by a vector.
4307 (define_expand "v<optab><mode>3"
4308   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4309         (unspec:SVE_FULL_I
4310           [(match_dup 3)
4311            (ASHIFT:SVE_FULL_I
4312              (match_operand:SVE_FULL_I 1 "register_operand")
4313              (match_operand:SVE_FULL_I 2 "aarch64_sve_<lr>shift_operand"))]
4314           UNSPEC_PRED_X))]
4315   "TARGET_SVE"
4316   {
4317     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4318   }
4319 )
4320
4321 ;; Shift by a vector, predicated with a PTRUE.  We don't actually need
4322 ;; the predicate for the first alternative, but using Upa or X isn't
4323 ;; likely to gain much and would make the instruction seem less uniform
4324 ;; to the register allocator.
4325 (define_insn_and_split "@aarch64_pred_<optab><mode>"
4326   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?&w")
4327         (unspec:SVE_FULL_I
4328           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
4329            (ASHIFT:SVE_FULL_I
4330              (match_operand:SVE_FULL_I 2 "register_operand" "w, 0, w, w")
4331              (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, w"))]
4332           UNSPEC_PRED_X))]
4333   "TARGET_SVE"
4334   "@
4335    #
4336    <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4337    <shift>r\t%0.<Vetype>, %1/m, %3.<Vetype>, %2.<Vetype>
4338    movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4339   "&& reload_completed
4340    && !register_operand (operands[3], <MODE>mode)"
4341   [(set (match_dup 0) (ASHIFT:SVE_FULL_I (match_dup 2) (match_dup 3)))]
4342   ""
4343   [(set_attr "movprfx" "*,*,*,yes")]
4344 )
4345
4346 ;; Unpredicated shift operations by a constant (post-RA only).
4347 ;; These are generated by splitting a predicated instruction whose
4348 ;; predicate is unused.
4349 (define_insn "*post_ra_v<optab><mode>3"
4350   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
4351         (ASHIFT:SVE_FULL_I
4352           (match_operand:SVE_FULL_I 1 "register_operand" "w")
4353           (match_operand:SVE_FULL_I 2 "aarch64_simd_<lr>shift_imm")))]
4354   "TARGET_SVE && reload_completed"
4355   "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
4356 )
4357
4358 ;; Predicated integer shift, merging with the first input.
4359 (define_insn "*cond_<optab><mode>_2_const"
4360   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
4361         (unspec:SVE_FULL_I
4362           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4363            (ASHIFT:SVE_FULL_I
4364              (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
4365              (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm"))
4366            (match_dup 2)]
4367          UNSPEC_SEL))]
4368   "TARGET_SVE"
4369   "@
4370    <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4371    movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4372   [(set_attr "movprfx" "*,yes")]
4373 )
4374
4375 ;; Predicated integer shift, merging with an independent value.
4376 (define_insn_and_rewrite "*cond_<optab><mode>_any_const"
4377   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, &w, ?&w")
4378         (unspec:SVE_FULL_I
4379           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
4380            (ASHIFT:SVE_FULL_I
4381              (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")
4382              (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm"))
4383            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
4384          UNSPEC_SEL))]
4385   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4386   "@
4387    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4388    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4389    #"
4390   "&& reload_completed
4391    && register_operand (operands[4], <MODE>mode)
4392    && !rtx_equal_p (operands[0], operands[4])"
4393   {
4394     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4395                                              operands[4], operands[1]));
4396     operands[4] = operands[2] = operands[0];
4397   }
4398   [(set_attr "movprfx" "yes")]
4399 )
4400
4401 ;; Unpredicated shifts of narrow elements by 64-bit amounts.
4402 (define_insn "@aarch64_sve_<sve_int_op><mode>"
4403   [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
4404         (unspec:SVE_FULL_BHSI
4405           [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")
4406            (match_operand:VNx2DI 2 "register_operand" "w")]
4407           SVE_SHIFT_WIDE))]
4408   "TARGET_SVE"
4409   "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.d"
4410 )
4411
4412 ;; Merging predicated shifts of narrow elements by 64-bit amounts.
4413 (define_expand "@cond_<sve_int_op><mode>"
4414   [(set (match_operand:SVE_FULL_BHSI 0 "register_operand")
4415         (unspec:SVE_FULL_BHSI
4416           [(match_operand:<VPRED> 1 "register_operand")
4417            (unspec:SVE_FULL_BHSI
4418              [(match_operand:SVE_FULL_BHSI 2 "register_operand")
4419               (match_operand:VNx2DI 3 "register_operand")]
4420              SVE_SHIFT_WIDE)
4421            (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_reg_or_zero")]
4422           UNSPEC_SEL))]
4423   "TARGET_SVE"
4424 )
4425
4426 ;; Predicated shifts of narrow elements by 64-bit amounts, merging with
4427 ;; the first input.
4428 (define_insn "*cond_<sve_int_op><mode>_m"
4429   [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w, ?&w")
4430         (unspec:SVE_FULL_BHSI
4431           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4432            (unspec:SVE_FULL_BHSI
4433              [(match_operand:SVE_FULL_BHSI 2 "register_operand" "0, w")
4434               (match_operand:VNx2DI 3 "register_operand" "w, w")]
4435              SVE_SHIFT_WIDE)
4436            (match_dup 2)]
4437          UNSPEC_SEL))]
4438   "TARGET_SVE"
4439   "@
4440    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
4441    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d"
4442   [(set_attr "movprfx" "*, yes")])
4443
4444 ;; Predicated shifts of narrow elements by 64-bit amounts, merging with zero.
4445 (define_insn "*cond_<sve_int_op><mode>_z"
4446   [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=&w, &w")
4447         (unspec:SVE_FULL_BHSI
4448           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4449            (unspec:SVE_FULL_BHSI
4450              [(match_operand:SVE_FULL_BHSI 2 "register_operand" "0, w")
4451               (match_operand:VNx2DI 3 "register_operand" "w, w")]
4452              SVE_SHIFT_WIDE)
4453            (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_imm_zero")]
4454          UNSPEC_SEL))]
4455   "TARGET_SVE"
4456   "@
4457    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
4458    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d"
4459   [(set_attr "movprfx" "yes")])
4460
4461 ;; -------------------------------------------------------------------------
4462 ;; ---- [INT] Shifts (rounding towards 0)
4463 ;; -------------------------------------------------------------------------
4464 ;; Includes:
4465 ;; - ASRD
4466 ;; - SQSHLU (SVE2)
4467 ;; - SRSHR (SVE2)
4468 ;; - URSHR (SVE2)
4469 ;; -------------------------------------------------------------------------
4470
4471 ;; Unpredicated <SVE_INT_OP>.
4472 (define_expand "sdiv_pow2<mode>3"
4473   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4474         (unspec:SVE_FULL_I
4475           [(match_dup 3)
4476            (unspec:SVE_FULL_I
4477              [(match_operand:SVE_FULL_I 1 "register_operand")
4478               (match_operand 2 "aarch64_simd_rshift_imm")]
4479              UNSPEC_ASRD)
4480            (match_dup 1)]
4481          UNSPEC_SEL))]
4482   "TARGET_SVE"
4483   {
4484     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4485   }
4486 )
4487
4488 ;; Predicated right shift with merging.
4489 (define_expand "@cond_<sve_int_op><mode>"
4490   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4491         (unspec:SVE_FULL_I
4492           [(match_operand:<VPRED> 1 "register_operand")
4493            (unspec:SVE_FULL_I
4494              [(match_operand:SVE_FULL_I 2 "register_operand")
4495               (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")]
4496              SVE_INT_SHIFT_IMM)
4497            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4498           UNSPEC_SEL))]
4499   "TARGET_SVE"
4500 )
4501
4502 ;; Predicated right shift, merging with the first input.
4503 (define_insn "*cond_<sve_int_op><mode>_2"
4504   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
4505         (unspec:SVE_FULL_I
4506           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4507            (unspec:SVE_FULL_I
4508              [(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
4509               (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")]
4510              SVE_INT_SHIFT_IMM)
4511            (match_dup 2)]
4512           UNSPEC_SEL))]
4513   "TARGET_SVE"
4514   "@
4515    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4516    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4517   [(set_attr "movprfx" "*,yes")])
4518
4519 ;; Predicated right shift, merging with zero.
4520 (define_insn "*cond_<sve_int_op><mode>_z"
4521   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
4522         (unspec:SVE_FULL_I
4523           [(match_operand:<VPRED> 1 "register_operand" "Upl")
4524            (unspec:SVE_FULL_I
4525              [(match_operand:SVE_FULL_I 2 "register_operand" "w")
4526               (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")]
4527              SVE_INT_SHIFT_IMM)
4528            (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")]
4529           UNSPEC_SEL))]
4530   "TARGET_SVE"
4531   "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4532   [(set_attr "movprfx" "yes")])
4533
4534 ;; -------------------------------------------------------------------------
4535 ;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs
4536 ;; -------------------------------------------------------------------------
4537 ;; Includes:
4538 ;; - FSCALE
4539 ;; - FTSMUL
4540 ;; - FTSSEL
4541 ;; -------------------------------------------------------------------------
4542
4543 ;; Unpredicated floating-point binary operations that take an integer as
4544 ;; their second operand.
4545 (define_insn "@aarch64_sve_<optab><mode>"
4546   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
4547         (unspec:SVE_FULL_F
4548           [(match_operand:SVE_FULL_F 1 "register_operand" "w")
4549            (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
4550           SVE_FP_BINARY_INT))]
4551   "TARGET_SVE"
4552   "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
4553 )
4554
4555 ;; Predicated floating-point binary operations that take an integer
4556 ;; as their second operand.
4557 (define_insn "@aarch64_pred_<optab><mode>"
4558   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4559         (unspec:SVE_FULL_F
4560           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4561            (match_operand:SI 4 "aarch64_sve_gp_strictness")
4562            (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
4563            (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")]
4564           SVE_COND_FP_BINARY_INT))]
4565   "TARGET_SVE"
4566   "@
4567    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4568    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4569   [(set_attr "movprfx" "*,yes")]
4570 )
4571
4572 ;; Predicated floating-point binary operations with merging, taking an
4573 ;; integer as their second operand.
4574 (define_expand "@cond_<optab><mode>"
4575   [(set (match_operand:SVE_FULL_F 0 "register_operand")
4576         (unspec:SVE_FULL_F
4577           [(match_operand:<VPRED> 1 "register_operand")
4578            (unspec:SVE_FULL_F
4579              [(match_dup 1)
4580               (const_int SVE_STRICT_GP)
4581               (match_operand:SVE_FULL_F 2 "register_operand")
4582               (match_operand:<V_INT_EQUIV> 3 "register_operand")]
4583              SVE_COND_FP_BINARY_INT)
4584            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
4585           UNSPEC_SEL))]
4586   "TARGET_SVE"
4587 )
4588
4589 ;; Predicated floating-point binary operations that take an integer as their
4590 ;; second operand, with inactive lanes coming from the first operand.
4591 (define_insn_and_rewrite "*cond_<optab><mode>_2"
4592   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4593         (unspec:SVE_FULL_F
4594           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4595            (unspec:SVE_FULL_F
4596              [(match_operand 4)
4597               (match_operand:SI 5 "aarch64_sve_gp_strictness")
4598               (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
4599               (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")]
4600              SVE_COND_FP_BINARY_INT)
4601            (match_dup 2)]
4602           UNSPEC_SEL))]
4603   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
4604   "@
4605    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4606    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4607   "&& !rtx_equal_p (operands[1], operands[4])"
4608   {
4609     operands[4] = copy_rtx (operands[1]);
4610   }
4611   [(set_attr "movprfx" "*,yes")]
4612 )
4613
4614 ;; Predicated floating-point binary operations that take an integer as
4615 ;; their second operand, with the values of inactive lanes being distinct
4616 ;; from the other inputs.
4617 (define_insn_and_rewrite "*cond_<optab><mode>_any"
4618   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
4619         (unspec:SVE_FULL_F
4620           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
4621            (unspec:SVE_FULL_F
4622              [(match_operand 5)
4623               (match_operand:SI 6 "aarch64_sve_gp_strictness")
4624               (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w")
4625               (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w, w, w")]
4626              SVE_COND_FP_BINARY_INT)
4627            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
4628           UNSPEC_SEL))]
4629   "TARGET_SVE
4630    && !rtx_equal_p (operands[2], operands[4])
4631    && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
4632   "@
4633    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4634    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4635    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4636    #"
4637   "&& 1"
4638   {
4639     if (reload_completed
4640         && register_operand (operands[4], <MODE>mode)
4641         && !rtx_equal_p (operands[0], operands[4]))
4642       {
4643         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4644                                                  operands[4], operands[1]));
4645         operands[4] = operands[2] = operands[0];
4646       }
4647     else if (!rtx_equal_p (operands[1], operands[5]))
4648       operands[5] = copy_rtx (operands[1]);
4649     else
4650       FAIL;
4651   }
4652   [(set_attr "movprfx" "yes")]
4653 )
4654
4655 ;; -------------------------------------------------------------------------
4656 ;; ---- [FP] General binary arithmetic corresponding to rtx codes
4657 ;; -------------------------------------------------------------------------
4658 ;; Includes post-RA forms of:
4659 ;; - FADD
4660 ;; - FMUL
4661 ;; - FSUB
4662 ;; -------------------------------------------------------------------------
4663
4664 ;; Unpredicated floating-point binary operations (post-RA only).
4665 ;; These are generated by splitting a predicated instruction whose
4666 ;; predicate is unused.
4667 (define_insn "*post_ra_<sve_fp_op><mode>3"
4668   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
4669         (SVE_UNPRED_FP_BINARY:SVE_FULL_F
4670           (match_operand:SVE_FULL_F 1 "register_operand" "w")
4671           (match_operand:SVE_FULL_F 2 "register_operand" "w")))]
4672   "TARGET_SVE && reload_completed"
4673   "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
4674
4675 ;; -------------------------------------------------------------------------
4676 ;; ---- [FP] General binary arithmetic corresponding to unspecs
4677 ;; -------------------------------------------------------------------------
4678 ;; Includes merging forms of:
4679 ;; - FADD    (constant forms handled in the "Addition" section)
4680 ;; - FDIV
4681 ;; - FDIVR
4682 ;; - FMAX
4683 ;; - FMAXNM  (including #0.0 and #1.0)
4684 ;; - FMIN
4685 ;; - FMINNM  (including #0.0 and #1.0)
4686 ;; - FMUL    (including #0.5 and #2.0)
4687 ;; - FMULX
4688 ;; - FRECPS
4689 ;; - FRSQRTS
4690 ;; - FSUB    (constant forms handled in the "Addition" section)
4691 ;; - FSUBR   (constant forms handled in the "Subtraction" section)
4692 ;; -------------------------------------------------------------------------
4693
4694 ;; Unpredicated floating-point binary operations.
4695 (define_insn "@aarch64_sve_<optab><mode>"
4696   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
4697         (unspec:SVE_FULL_F
4698           [(match_operand:SVE_FULL_F 1 "register_operand" "w")
4699            (match_operand:SVE_FULL_F 2 "register_operand" "w")]
4700           SVE_FP_BINARY))]
4701   "TARGET_SVE"
4702   "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
4703 )
4704
4705 ;; Unpredicated floating-point binary operations that need to be predicated
4706 ;; for SVE.
4707 (define_expand "<optab><mode>3"
4708   [(set (match_operand:SVE_FULL_F 0 "register_operand")
4709         (unspec:SVE_FULL_F
4710           [(match_dup 3)
4711            (const_int SVE_RELAXED_GP)
4712            (match_operand:SVE_FULL_F 1 "<sve_pred_fp_rhs1_operand>")
4713            (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs2_operand>")]
4714           SVE_COND_FP_BINARY))]
4715   "TARGET_SVE"
4716   {
4717     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4718   }
4719 )
4720
4721 ;; Predicated floating-point binary operations that have no immediate forms.
4722 (define_insn "@aarch64_pred_<optab><mode>"
4723   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w")
4724         (unspec:SVE_FULL_F
4725           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
4726            (match_operand:SI 4 "aarch64_sve_gp_strictness")
4727            (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w")
4728            (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w")]
4729           SVE_COND_FP_BINARY_REG))]
4730   "TARGET_SVE"
4731   "@
4732    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4733    <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4734    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4735   [(set_attr "movprfx" "*,*,yes")]
4736 )
4737
4738 ;; Predicated floating-point operations with merging.
4739 (define_expand "@cond_<optab><mode>"
4740   [(set (match_operand:SVE_FULL_F 0 "register_operand")
4741         (unspec:SVE_FULL_F
4742           [(match_operand:<VPRED> 1 "register_operand")
4743            (unspec:SVE_FULL_F
4744              [(match_dup 1)
4745               (const_int SVE_STRICT_GP)
4746               (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs1_operand>")
4747               (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_operand>")]
4748              SVE_COND_FP_BINARY)
4749            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
4750           UNSPEC_SEL))]
4751   "TARGET_SVE"
4752 )
4753
4754 ;; Predicated floating-point operations, merging with the first input.
4755 (define_insn_and_rewrite "*cond_<optab><mode>_2"
4756   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4757         (unspec:SVE_FULL_F
4758           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4759            (unspec:SVE_FULL_F
4760              [(match_operand 4)
4761               (match_operand:SI 5 "aarch64_sve_gp_strictness")
4762               (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
4763               (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
4764              SVE_COND_FP_BINARY)
4765            (match_dup 2)]
4766           UNSPEC_SEL))]
4767   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
4768   "@
4769    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4770    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4771   "&& !rtx_equal_p (operands[1], operands[4])"
4772   {
4773     operands[4] = copy_rtx (operands[1]);
4774   }
4775   [(set_attr "movprfx" "*,yes")]
4776 )
4777
4778 ;; Same for operations that take a 1-bit constant.
4779 (define_insn_and_rewrite "*cond_<optab><mode>_2_const"
4780   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
4781         (unspec:SVE_FULL_F
4782           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4783            (unspec:SVE_FULL_F
4784              [(match_operand 4)
4785               (match_operand:SI 5 "aarch64_sve_gp_strictness")
4786               (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
4787               (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
4788              SVE_COND_FP_BINARY_I1)
4789            (match_dup 2)]
4790           UNSPEC_SEL))]
4791   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
4792   "@
4793    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4794    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4795   "&& !rtx_equal_p (operands[1], operands[4])"
4796   {
4797     operands[4] = copy_rtx (operands[1]);
4798   }
4799   [(set_attr "movprfx" "*,yes")]
4800 )
4801
4802 ;; Predicated floating-point operations, merging with the second input.
4803 (define_insn_and_rewrite "*cond_<optab><mode>_3"
4804   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4805         (unspec:SVE_FULL_F
4806           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4807            (unspec:SVE_FULL_F
4808              [(match_operand 4)
4809               (match_operand:SI 5 "aarch64_sve_gp_strictness")
4810               (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
4811               (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
4812              SVE_COND_FP_BINARY)
4813            (match_dup 3)]
4814           UNSPEC_SEL))]
4815   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
4816   "@
4817    <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4818    movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
4819   "&& !rtx_equal_p (operands[1], operands[4])"
4820   {
4821     operands[4] = copy_rtx (operands[1]);
4822   }
4823   [(set_attr "movprfx" "*,yes")]
4824 )
4825
4826 ;; Predicated floating-point operations, merging with an independent value.
4827 (define_insn_and_rewrite "*cond_<optab><mode>_any"
4828   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
4829         (unspec:SVE_FULL_F
4830           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
4831            (unspec:SVE_FULL_F
4832              [(match_operand 5)
4833               (match_operand:SI 6 "aarch64_sve_gp_strictness")
4834               (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
4835               (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
4836              SVE_COND_FP_BINARY)
4837            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
4838           UNSPEC_SEL))]
4839   "TARGET_SVE
4840    && !rtx_equal_p (operands[2], operands[4])
4841    && !rtx_equal_p (operands[3], operands[4])
4842    && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
4843   "@
4844    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4845    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4846    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4847    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4848    #"
4849   "&& 1"
4850   {
4851     if (reload_completed
4852         && register_operand (operands[4], <MODE>mode)
4853         && !rtx_equal_p (operands[0], operands[4]))
4854       {
4855         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4856                                                  operands[4], operands[1]));
4857         operands[4] = operands[2] = operands[0];
4858       }
4859     else if (!rtx_equal_p (operands[1], operands[5]))
4860       operands[5] = copy_rtx (operands[1]);
4861     else
4862       FAIL;
4863   }
4864   [(set_attr "movprfx" "yes")]
4865 )
4866
4867 ;; Same for operations that take a 1-bit constant.
4868 (define_insn_and_rewrite "*cond_<optab><mode>_any_const"
4869   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
4870         (unspec:SVE_FULL_F
4871           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
4872            (unspec:SVE_FULL_F
4873              [(match_operand 5)
4874               (match_operand:SI 6 "aarch64_sve_gp_strictness")
4875               (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")
4876               (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
4877              SVE_COND_FP_BINARY_I1)
4878            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
4879           UNSPEC_SEL))]
4880   "TARGET_SVE
4881    && !rtx_equal_p (operands[2], operands[4])
4882    && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
4883   "@
4884    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4885    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4886    #"
4887   "&& 1"
4888   {
4889     if (reload_completed
4890         && register_operand (operands[4], <MODE>mode)
4891         && !rtx_equal_p (operands[0], operands[4]))
4892       {
4893         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4894                                                  operands[4], operands[1]));
4895         operands[4] = operands[2] = operands[0];
4896       }
4897     else if (!rtx_equal_p (operands[1], operands[5]))
4898       operands[5] = copy_rtx (operands[1]);
4899     else
4900       FAIL;
4901   }
4902   [(set_attr "movprfx" "yes")]
4903 )
4904
4905 ;; -------------------------------------------------------------------------
4906 ;; ---- [FP] Addition
4907 ;; -------------------------------------------------------------------------
4908 ;; Includes:
4909 ;; - FADD
4910 ;; - FSUB
4911 ;; -------------------------------------------------------------------------
4912
4913 ;; Predicated floating-point addition.
4914 (define_insn_and_split "@aarch64_pred_<optab><mode>"
4915   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?&w, ?&w, ?&w")
4916         (unspec:SVE_FULL_F
4917           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl, Upl")
4918            (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, i, Z, Ui1, i, i, Ui1")
4919            (match_operand:SVE_FULL_F 2 "register_operand" "%0, 0, w, 0, w, w, w")
4920            (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w, w, vsA, vsN, w")]
4921           SVE_COND_FP_ADD))]
4922   "TARGET_SVE"
4923   "@
4924    fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4925    fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
4926    #
4927    fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4928    movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4929    movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
4930    movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4931   ; Split the unpredicated form after reload, so that we don't have
4932   ; the unnecessary PTRUE.
4933   "&& reload_completed
4934    && register_operand (operands[3], <MODE>mode)
4935    && INTVAL (operands[4]) == SVE_RELAXED_GP"
4936   [(set (match_dup 0) (plus:SVE_FULL_F (match_dup 2) (match_dup 3)))]
4937   ""
4938   [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")]
4939 )
4940
4941 ;; Predicated floating-point addition of a constant, merging with the
4942 ;; first input.
4943 (define_insn_and_rewrite "*cond_add<mode>_2_const"
4944   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w, ?w")
4945         (unspec:SVE_FULL_F
4946           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
4947            (unspec:SVE_FULL_F
4948              [(match_operand 4)
4949               (match_operand:SI 5 "aarch64_sve_gp_strictness")
4950               (match_operand:SVE_FULL_F 2 "register_operand" "0, 0, w, w")
4951               (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN")]
4952              UNSPEC_COND_FADD)
4953            (match_dup 2)]
4954           UNSPEC_SEL))]
4955   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
4956   "@
4957    fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4958    fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
4959    movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4960    movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3"
4961   "&& !rtx_equal_p (operands[1], operands[4])"
4962   {
4963     operands[4] = copy_rtx (operands[1]);
4964   }
4965   [(set_attr "movprfx" "*,*,yes,yes")]
4966 )
4967
4968 ;; Predicated floating-point addition of a constant, merging with an
4969 ;; independent value.
4970 (define_insn_and_rewrite "*cond_add<mode>_any_const"
4971   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?w, ?w")
4972         (unspec:SVE_FULL_F
4973           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
4974            (unspec:SVE_FULL_F
4975              [(match_operand 5)
4976               (match_operand:SI 6 "aarch64_sve_gp_strictness")
4977               (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w, w, w")
4978               (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN, vsA, vsN")]
4979              UNSPEC_COND_FADD)
4980            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 0, w, w")]
4981           UNSPEC_SEL))]
4982   "TARGET_SVE
4983    && !rtx_equal_p (operands[2], operands[4])
4984    && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
4985   "@
4986    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4987    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
4988    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4989    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
4990    #
4991    #"
4992   "&& 1"
4993   {
4994     if (reload_completed
4995         && register_operand (operands[4], <MODE>mode)
4996         && !rtx_equal_p (operands[0], operands[4]))
4997       {
4998         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4999                                                  operands[4], operands[1]));
5000         operands[4] = operands[2] = operands[0];
5001       }
5002     else if (!rtx_equal_p (operands[1], operands[5]))
5003       operands[5] = copy_rtx (operands[1]);
5004     else
5005       FAIL;
5006   }
5007   [(set_attr "movprfx" "yes")]
5008 )
5009
5010 ;; Register merging forms are handled through SVE_COND_FP_BINARY.
5011
5012 ;; -------------------------------------------------------------------------
5013 ;; ---- [FP] Complex addition
5014 ;; -------------------------------------------------------------------------
5015 ;; Includes:
5016 ;; - FCADD
5017 ;; -------------------------------------------------------------------------
5018
5019 ;; Predicated FCADD.
5020 (define_insn "@aarch64_pred_<optab><mode>"
5021   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5022         (unspec:SVE_FULL_F
5023           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5024            (match_operand:SI 4 "aarch64_sve_gp_strictness")
5025            (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5026            (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5027           SVE_COND_FCADD))]
5028   "TARGET_SVE"
5029   "@
5030    fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5031    movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>"
5032   [(set_attr "movprfx" "*,yes")]
5033 )
5034
5035 ;; Predicated FCADD with merging.
5036 (define_expand "@cond_<optab><mode>"
5037   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5038         (unspec:SVE_FULL_F
5039           [(match_operand:<VPRED> 1 "register_operand")
5040            (unspec:SVE_FULL_F
5041              [(match_dup 1)
5042               (const_int SVE_STRICT_GP)
5043               (match_operand:SVE_FULL_F 2 "register_operand")
5044               (match_operand:SVE_FULL_F 3 "register_operand")]
5045              SVE_COND_FCADD)
5046            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5047           UNSPEC_SEL))]
5048   "TARGET_SVE"
5049 )
5050
5051 ;; Predicated FCADD, merging with the first input.
5052 (define_insn_and_rewrite "*cond_<optab><mode>_2"
5053   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5054         (unspec:SVE_FULL_F
5055           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5056            (unspec:SVE_FULL_F
5057              [(match_operand 4)
5058               (match_operand:SI 5 "aarch64_sve_gp_strictness")
5059               (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5060               (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5061              SVE_COND_FCADD)
5062            (match_dup 2)]
5063           UNSPEC_SEL))]
5064   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
5065   "@
5066    fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5067    movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>"
5068   "&& !rtx_equal_p (operands[1], operands[4])"
5069   {
5070     operands[4] = copy_rtx (operands[1]);
5071   }
5072   [(set_attr "movprfx" "*,yes")]
5073 )
5074
5075 ;; Predicated FCADD, merging with an independent value.
5076 (define_insn_and_rewrite "*cond_<optab><mode>_any"
5077   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
5078         (unspec:SVE_FULL_F
5079           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
5080            (unspec:SVE_FULL_F
5081              [(match_operand 5)
5082               (match_operand:SI 6 "aarch64_sve_gp_strictness")
5083               (match_operand:SVE_FULL_F 2 "register_operand" "w, 0, w, w")
5084               (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")]
5085              SVE_COND_FCADD)
5086            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
5087           UNSPEC_SEL))]
5088   "TARGET_SVE
5089    && !rtx_equal_p (operands[2], operands[4])
5090    && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
5091   "@
5092    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5093    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5094    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5095    #"
5096   "&& 1"
5097   {
5098     if (reload_completed
5099         && register_operand (operands[4], <MODE>mode)
5100         && !rtx_equal_p (operands[0], operands[4]))
5101       {
5102         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5103                                                  operands[4], operands[1]));
5104         operands[4] = operands[2] = operands[0];
5105       }
5106     else if (!rtx_equal_p (operands[1], operands[5]))
5107       operands[5] = copy_rtx (operands[1]);
5108     else
5109       FAIL;
5110   }
5111   [(set_attr "movprfx" "yes")]
5112 )
5113
5114 ;; -------------------------------------------------------------------------
5115 ;; ---- [FP] Subtraction
5116 ;; -------------------------------------------------------------------------
5117 ;; Includes:
5118 ;; - FSUB
5119 ;; - FSUBR
5120 ;; -------------------------------------------------------------------------
5121
5122 ;; Predicated floating-point subtraction.
5123 (define_insn_and_split "@aarch64_pred_<optab><mode>"
5124   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?&w, ?&w")
5125         (unspec:SVE_FULL_F
5126           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
5127            (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, Ui1, Ui1, i, Ui1")
5128            (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_operand" "vsA, w, 0, w, vsA, w")
5129            (match_operand:SVE_FULL_F 3 "register_operand" "0, w, w, 0, w, w")]
5130           SVE_COND_FP_SUB))]
5131   "TARGET_SVE"
5132   "@
5133    fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5134    #
5135    fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5136    fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5137    movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5138    movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5139   ; Split the unpredicated form after reload, so that we don't have
5140   ; the unnecessary PTRUE.
5141   "&& reload_completed
5142    && register_operand (operands[2], <MODE>mode)
5143    && INTVAL (operands[4]) == SVE_RELAXED_GP"
5144   [(set (match_dup 0) (minus:SVE_FULL_F (match_dup 2) (match_dup 3)))]
5145   ""
5146   [(set_attr "movprfx" "*,*,*,*,yes,yes")]
5147 )
5148
5149 ;; Predicated floating-point subtraction from a constant, merging with the
5150 ;; second input.
5151 (define_insn_and_rewrite "*cond_sub<mode>_3_const"
5152   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
5153         (unspec:SVE_FULL_F
5154           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5155            (unspec:SVE_FULL_F
5156              [(match_operand 4)
5157               (match_operand:SI 5 "aarch64_sve_gp_strictness")
5158               (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
5159               (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
5160              UNSPEC_COND_FSUB)
5161            (match_dup 3)]
5162           UNSPEC_SEL))]
5163   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
5164   "@
5165    fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5166    movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2"
5167   "&& !rtx_equal_p (operands[1], operands[4])"
5168   {
5169     operands[4] = copy_rtx (operands[1]);
5170   }
5171   [(set_attr "movprfx" "*,yes")]
5172 )
5173
5174 ;; Predicated floating-point subtraction from a constant, merging with an
5175 ;; independent value.
5176 (define_insn_and_rewrite "*cond_sub<mode>_any_const"
5177   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
5178         (unspec:SVE_FULL_F
5179           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
5180            (unspec:SVE_FULL_F
5181              [(match_operand 5)
5182               (match_operand:SI 6 "aarch64_sve_gp_strictness")
5183               (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
5184               (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")]
5185              UNSPEC_COND_FSUB)
5186            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
5187           UNSPEC_SEL))]
5188   "TARGET_SVE
5189    && !rtx_equal_p (operands[3], operands[4])
5190    && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
5191   "@
5192    movprfx\t%0.<Vetype>, %1/z, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5193    movprfx\t%0.<Vetype>, %1/m, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5194    #"
5195   "&& 1"
5196   {
5197     if (reload_completed
5198         && register_operand (operands[4], <MODE>mode)
5199         && !rtx_equal_p (operands[0], operands[4]))
5200       {
5201         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
5202                                                  operands[4], operands[1]));
5203         operands[4] = operands[3] = operands[0];
5204       }
5205     else if (!rtx_equal_p (operands[1], operands[5]))
5206       operands[5] = copy_rtx (operands[1]);
5207     else
5208       FAIL;
5209   }
5210   [(set_attr "movprfx" "yes")]
5211 )
5212
5213 ;; Register merging forms are handled through SVE_COND_FP_BINARY.
5214
5215 ;; -------------------------------------------------------------------------
5216 ;; ---- [FP] Absolute difference
5217 ;; -------------------------------------------------------------------------
5218 ;; Includes:
5219 ;; - FABD
5220 ;; -------------------------------------------------------------------------
5221
5222 ;; Predicated floating-point absolute difference.
5223 (define_expand "@aarch64_pred_abd<mode>"
5224   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5225         (unspec:SVE_FULL_F
5226           [(match_operand:<VPRED> 1 "register_operand")
5227            (match_operand:SI 4 "aarch64_sve_gp_strictness")
5228            (unspec:SVE_FULL_F
5229              [(match_dup 1)
5230               (match_dup 4)
5231               (match_operand:SVE_FULL_F 2 "register_operand")
5232               (match_operand:SVE_FULL_F 3 "register_operand")]
5233              UNSPEC_COND_FSUB)]
5234           UNSPEC_COND_FABS))]
5235   "TARGET_SVE"
5236 )
5237
5238 ;; Predicated floating-point absolute difference.
5239 (define_insn_and_rewrite "*aarch64_pred_abd<mode>"
5240   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5241         (unspec:SVE_FULL_F
5242           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5243            (match_operand:SI 4 "aarch64_sve_gp_strictness")
5244            (unspec:SVE_FULL_F
5245              [(match_operand 5)
5246               (match_operand:SI 6 "aarch64_sve_gp_strictness")
5247               (match_operand:SVE_FULL_F 2 "register_operand" "%0, w")
5248               (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5249              UNSPEC_COND_FSUB)]
5250           UNSPEC_COND_FABS))]
5251   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
5252   "@
5253    fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5254    movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5255   "&& !rtx_equal_p (operands[1], operands[5])"
5256   {
5257     operands[5] = copy_rtx (operands[1]);
5258   }
5259   [(set_attr "movprfx" "*,yes")]
5260 )
5261
5262 (define_expand "@aarch64_cond_abd<mode>"
5263   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5264         (unspec:SVE_FULL_F
5265           [(match_operand:<VPRED> 1 "register_operand")
5266            (unspec:SVE_FULL_F
5267              [(match_dup 1)
5268               (const_int SVE_STRICT_GP)
5269               (unspec:SVE_FULL_F
5270                 [(match_dup 1)
5271                  (const_int SVE_STRICT_GP)
5272                  (match_operand:SVE_FULL_F 2 "register_operand")
5273                  (match_operand:SVE_FULL_F 3 "register_operand")]
5274                 UNSPEC_COND_FSUB)]
5275              UNSPEC_COND_FABS)
5276            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5277           UNSPEC_SEL))]
5278   "TARGET_SVE"
5279 {
5280   if (rtx_equal_p (operands[3], operands[4]))
5281     std::swap (operands[2], operands[3]);
5282 })
5283
5284 ;; Predicated floating-point absolute difference, merging with the first
5285 ;; input.
5286 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_2"
5287   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5288         (unspec:SVE_FULL_F
5289           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5290            (unspec:SVE_FULL_F
5291              [(match_operand 4)
5292               (match_operand:SI 5 "aarch64_sve_gp_strictness")
5293               (unspec:SVE_FULL_F
5294                 [(match_operand 6)
5295                  (match_operand:SI 7 "aarch64_sve_gp_strictness")
5296                  (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5297                  (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5298                 UNSPEC_COND_FSUB)]
5299              UNSPEC_COND_FABS)
5300            (match_dup 2)]
5301           UNSPEC_SEL))]
5302   "TARGET_SVE
5303    && aarch64_sve_pred_dominates_p (&operands[4], operands[1])
5304    && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
5305   "@
5306    fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5307    movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5308   "&& (!rtx_equal_p (operands[1], operands[4])
5309        || !rtx_equal_p (operands[1], operands[6]))"
5310   {
5311     operands[4] = copy_rtx (operands[1]);
5312     operands[6] = copy_rtx (operands[1]);
5313   }
5314   [(set_attr "movprfx" "*,yes")]
5315 )
5316
5317 ;; Predicated floating-point absolute difference, merging with the second
5318 ;; input.
5319 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_3"
5320   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5321         (unspec:SVE_FULL_F
5322           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5323            (unspec:SVE_FULL_F
5324              [(match_operand 4)
5325               (match_operand:SI 5 "aarch64_sve_gp_strictness")
5326               (unspec:SVE_FULL_F
5327                 [(match_operand 6)
5328                  (match_operand:SI 7 "aarch64_sve_gp_strictness")
5329                  (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
5330                  (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
5331                 UNSPEC_COND_FSUB)]
5332              UNSPEC_COND_FABS)
5333            (match_dup 3)]
5334           UNSPEC_SEL))]
5335   "TARGET_SVE
5336    && aarch64_sve_pred_dominates_p (&operands[4], operands[1])
5337    && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
5338   "@
5339    fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5340    movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
5341   "&& (!rtx_equal_p (operands[1], operands[4])
5342        || !rtx_equal_p (operands[1], operands[6]))"
5343   {
5344     operands[4] = copy_rtx (operands[1]);
5345     operands[6] = copy_rtx (operands[1]);
5346   }
5347   [(set_attr "movprfx" "*,yes")]
5348 )
5349
5350 ;; Predicated floating-point absolute difference, merging with an
5351 ;; independent value.
5352 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_any"
5353   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
5354         (unspec:SVE_FULL_F
5355           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
5356            (unspec:SVE_FULL_F
5357              [(match_operand 5)
5358               (match_operand:SI 6 "aarch64_sve_gp_strictness")
5359               (unspec:SVE_FULL_F
5360                 [(match_operand 7)
5361                  (match_operand:SI 8 "aarch64_sve_gp_strictness")
5362                  (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
5363                  (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
5364                 UNSPEC_COND_FSUB)]
5365              UNSPEC_COND_FABS)
5366            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
5367           UNSPEC_SEL))]
5368   "TARGET_SVE
5369    && !rtx_equal_p (operands[2], operands[4])
5370    && !rtx_equal_p (operands[3], operands[4])
5371    && aarch64_sve_pred_dominates_p (&operands[5], operands[1])
5372    && aarch64_sve_pred_dominates_p (&operands[7], operands[1])"
5373   "@
5374    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5375    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5376    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5377    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5378    #"
5379   "&& 1"
5380   {
5381     if (reload_completed
5382         && register_operand (operands[4], <MODE>mode)
5383         && !rtx_equal_p (operands[0], operands[4]))
5384       {
5385         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
5386                                                  operands[4], operands[1]));
5387         operands[4] = operands[3] = operands[0];
5388       }
5389     else if (!rtx_equal_p (operands[1], operands[5])
5390              || !rtx_equal_p (operands[1], operands[7]))
5391       {
5392         operands[5] = copy_rtx (operands[1]);
5393         operands[7] = copy_rtx (operands[1]);
5394       }
5395     else
5396       FAIL;
5397   }
5398   [(set_attr "movprfx" "yes")]
5399 )
5400
5401 ;; -------------------------------------------------------------------------
5402 ;; ---- [FP] Multiplication
5403 ;; -------------------------------------------------------------------------
5404 ;; Includes:
5405 ;; - FMUL
5406 ;; -------------------------------------------------------------------------
5407
5408 ;; Predicated floating-point multiplication.
5409 (define_insn_and_split "@aarch64_pred_<optab><mode>"
5410   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, ?&w, ?&w")
5411         (unspec:SVE_FULL_F
5412           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
5413            (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, Ui1, i, Ui1")
5414            (match_operand:SVE_FULL_F 2 "register_operand" "%0, w, 0, w, w")
5415            (match_operand:SVE_FULL_F 3 "aarch64_sve_float_mul_operand" "vsM, w, w, vsM, w")]
5416           SVE_COND_FP_MUL))]
5417   "TARGET_SVE"
5418   "@
5419    fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5420    #
5421    fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5422    movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5423    movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5424   ; Split the unpredicated form after reload, so that we don't have
5425   ; the unnecessary PTRUE.
5426   "&& reload_completed
5427    && register_operand (operands[3], <MODE>mode)
5428    && INTVAL (operands[4]) == SVE_RELAXED_GP"
5429   [(set (match_dup 0) (mult:SVE_FULL_F (match_dup 2) (match_dup 3)))]
5430   ""
5431   [(set_attr "movprfx" "*,*,*,yes,yes")]
5432 )
5433
5434 ;; Merging forms are handled through SVE_COND_FP_BINARY and
5435 ;; SVE_COND_FP_BINARY_I1.
5436
5437 ;; Unpredicated multiplication by selected lanes.
5438 (define_insn "@aarch64_mul_lane_<mode>"
5439   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
5440         (mult:SVE_FULL_F
5441           (unspec:SVE_FULL_F
5442             [(match_operand:SVE_FULL_F 2 "register_operand" "<sve_lane_con>")
5443              (match_operand:SI 3 "const_int_operand")]
5444             UNSPEC_SVE_LANE_SELECT)
5445           (match_operand:SVE_FULL_F 1 "register_operand" "w")))]
5446   "TARGET_SVE"
5447   "fmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
5448 )
5449
5450 ;; -------------------------------------------------------------------------
5451 ;; ---- [FP] Binary logical operations
5452 ;; -------------------------------------------------------------------------
5453 ;; Includes
5454 ;; - AND
5455 ;; - EOR
5456 ;; - ORR
5457 ;; -------------------------------------------------------------------------
5458
5459 ;; Binary logical operations on floating-point modes.  We avoid subregs
5460 ;; by providing this, but we need to use UNSPECs since rtx logical ops
5461 ;; aren't defined for floating-point modes.
5462 (define_insn "*<optab><mode>3"
5463   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
5464         (unspec:SVE_FULL_F
5465           [(match_operand:SVE_FULL_F 1 "register_operand" "w")
5466            (match_operand:SVE_FULL_F 2 "register_operand" "w")]
5467           LOGICALF))]
5468   "TARGET_SVE"
5469   "<logicalf_op>\t%0.d, %1.d, %2.d"
5470 )
5471
5472 ;; -------------------------------------------------------------------------
5473 ;; ---- [FP] Sign copying
5474 ;; -------------------------------------------------------------------------
5475 ;; The patterns in this section are synthetic.
5476 ;; -------------------------------------------------------------------------
5477
5478 (define_expand "copysign<mode>3"
5479   [(match_operand:SVE_FULL_F 0 "register_operand")
5480    (match_operand:SVE_FULL_F 1 "register_operand")
5481    (match_operand:SVE_FULL_F 2 "register_operand")]
5482   "TARGET_SVE"
5483   {
5484     rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
5485     rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode);
5486     rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
5487     int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
5488
5489     rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
5490     rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
5491
5492     emit_insn (gen_and<v_int_equiv>3
5493                (sign, arg2,
5494                 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
5495                                                    HOST_WIDE_INT_M1U
5496                                                    << bits)));
5497     emit_insn (gen_and<v_int_equiv>3
5498                (mant, arg1,
5499                 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
5500                                                    ~(HOST_WIDE_INT_M1U
5501                                                      << bits))));
5502     emit_insn (gen_ior<v_int_equiv>3 (int_res, sign, mant));
5503     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
5504     DONE;
5505   }
5506 )
5507
5508 (define_expand "xorsign<mode>3"
5509   [(match_operand:SVE_FULL_F 0 "register_operand")
5510    (match_operand:SVE_FULL_F 1 "register_operand")
5511    (match_operand:SVE_FULL_F 2 "register_operand")]
5512   "TARGET_SVE"
5513   {
5514     rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
5515     rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
5516     int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
5517
5518     rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
5519     rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
5520
5521     emit_insn (gen_and<v_int_equiv>3
5522                (sign, arg2,
5523                 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
5524                                                    HOST_WIDE_INT_M1U
5525                                                    << bits)));
5526     emit_insn (gen_xor<v_int_equiv>3 (int_res, arg1, sign));
5527     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
5528     DONE;
5529   }
5530 )
5531
5532 ;; -------------------------------------------------------------------------
5533 ;; ---- [FP] Maximum and minimum
5534 ;; -------------------------------------------------------------------------
5535 ;; Includes:
5536 ;; - FMAX
5537 ;; - FMAXNM
5538 ;; - FMIN
5539 ;; - FMINNM
5540 ;; -------------------------------------------------------------------------
5541
5542 ;; Unpredicated fmax/fmin (the libm functions).  The optabs for the
5543 ;; smin/smax rtx codes are handled in the generic section above.
5544 (define_expand "<maxmin_uns><mode>3"
5545   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5546         (unspec:SVE_FULL_F
5547           [(match_dup 3)
5548            (const_int SVE_RELAXED_GP)
5549            (match_operand:SVE_FULL_F 1 "register_operand")
5550            (match_operand:SVE_FULL_F 2 "aarch64_sve_float_maxmin_operand")]
5551           SVE_COND_FP_MAXMIN_PUBLIC))]
5552   "TARGET_SVE"
5553   {
5554     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
5555   }
5556 )
5557
5558 ;; Predicated floating-point maximum/minimum.
5559 (define_insn "@aarch64_pred_<optab><mode>"
5560   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w, ?&w")
5561         (unspec:SVE_FULL_F
5562           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
5563            (match_operand:SI 4 "aarch64_sve_gp_strictness")
5564            (match_operand:SVE_FULL_F 2 "register_operand" "%0, 0, w, w")
5565            (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand" "vsB, w, vsB, w")]
5566           SVE_COND_FP_MAXMIN))]
5567   "TARGET_SVE"
5568   "@
5569    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5570    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5571    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5572    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5573   [(set_attr "movprfx" "*,*,yes,yes")]
5574 )
5575
5576 ;; Merging forms are handled through SVE_COND_FP_BINARY and
5577 ;; SVE_COND_FP_BINARY_I1.
5578
5579 ;; -------------------------------------------------------------------------
5580 ;; ---- [PRED] Binary logical operations
5581 ;; -------------------------------------------------------------------------
5582 ;; Includes:
5583 ;; - AND
5584 ;; - ANDS
5585 ;; - EOR
5586 ;; - EORS
5587 ;; - ORR
5588 ;; - ORRS
5589 ;; -------------------------------------------------------------------------
5590
5591 ;; Predicate AND.  We can reuse one of the inputs as the GP.
5592 ;; Doubling the second operand is the preferred implementation
5593 ;; of the MOV alias, so we use that instead of %1/z, %1, %2.
5594 (define_insn "and<mode>3"
5595   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5596         (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa")
5597                       (match_operand:PRED_ALL 2 "register_operand" "Upa")))]
5598   "TARGET_SVE"
5599   "and\t%0.b, %1/z, %2.b, %2.b"
5600 )
5601
5602 ;; Unpredicated predicate EOR and ORR.
5603 (define_expand "<optab><mode>3"
5604   [(set (match_operand:PRED_ALL 0 "register_operand")
5605         (and:PRED_ALL
5606           (LOGICAL_OR:PRED_ALL
5607             (match_operand:PRED_ALL 1 "register_operand")
5608             (match_operand:PRED_ALL 2 "register_operand"))
5609           (match_dup 3)))]
5610   "TARGET_SVE"
5611   {
5612     operands[3] = aarch64_ptrue_reg (<MODE>mode);
5613   }
5614 )
5615
5616 ;; Predicated predicate AND, EOR and ORR.
5617 (define_insn "@aarch64_pred_<optab><mode>_z"
5618   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5619         (and:PRED_ALL
5620           (LOGICAL:PRED_ALL
5621             (match_operand:PRED_ALL 2 "register_operand" "Upa")
5622             (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5623           (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
5624   "TARGET_SVE"
5625   "<logical>\t%0.b, %1/z, %2.b, %3.b"
5626 )
5627
5628 ;; Perform a logical operation on operands 2 and 3, using operand 1 as
5629 ;; the GP.  Store the result in operand 0 and set the flags in the same
5630 ;; way as for PTEST.
5631 (define_insn "*<optab><mode>3_cc"
5632   [(set (reg:CC_NZC CC_REGNUM)
5633         (unspec:CC_NZC
5634           [(match_operand:VNx16BI 1 "register_operand" "Upa")
5635            (match_operand 4)
5636            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5637            (and:PRED_ALL
5638              (LOGICAL:PRED_ALL
5639                (match_operand:PRED_ALL 2 "register_operand" "Upa")
5640                (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5641              (match_dup 4))]
5642           UNSPEC_PTEST))
5643    (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5644         (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3))
5645                       (match_dup 4)))]
5646   "TARGET_SVE"
5647   "<logical>s\t%0.b, %1/z, %2.b, %3.b"
5648 )
5649
5650 ;; Same with just the flags result.
5651 (define_insn "*<optab><mode>3_ptest"
5652   [(set (reg:CC_NZC CC_REGNUM)
5653         (unspec:CC_NZC
5654           [(match_operand:VNx16BI 1 "register_operand" "Upa")
5655            (match_operand 4)
5656            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5657            (and:PRED_ALL
5658              (LOGICAL:PRED_ALL
5659                (match_operand:PRED_ALL 2 "register_operand" "Upa")
5660                (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5661              (match_dup 4))]
5662           UNSPEC_PTEST))
5663    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
5664   "TARGET_SVE"
5665   "<logical>s\t%0.b, %1/z, %2.b, %3.b"
5666 )
5667
5668 ;; -------------------------------------------------------------------------
5669 ;; ---- [PRED] Binary logical operations (inverted second input)
5670 ;; -------------------------------------------------------------------------
5671 ;; Includes:
5672 ;; - BIC
5673 ;; - ORN
5674 ;; -------------------------------------------------------------------------
5675
5676 ;; Predicated predicate BIC and ORN.
5677 (define_insn "aarch64_pred_<nlogical><mode>_z"
5678   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5679         (and:PRED_ALL
5680           (NLOGICAL:PRED_ALL
5681             (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5682             (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5683           (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
5684   "TARGET_SVE"
5685   "<nlogical>\t%0.b, %1/z, %2.b, %3.b"
5686 )
5687
5688 ;; Same, but set the flags as a side-effect.
5689 (define_insn "*<nlogical><mode>3_cc"
5690   [(set (reg:CC_NZC CC_REGNUM)
5691         (unspec:CC_NZC
5692           [(match_operand:VNx16BI 1 "register_operand" "Upa")
5693            (match_operand 4)
5694            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5695            (and:PRED_ALL
5696              (NLOGICAL:PRED_ALL
5697                (not:PRED_ALL
5698                  (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5699                (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5700              (match_dup 4))]
5701           UNSPEC_PTEST))
5702    (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5703         (and:PRED_ALL (NLOGICAL:PRED_ALL
5704                         (not:PRED_ALL (match_dup 3))
5705                         (match_dup 2))
5706                       (match_dup 4)))]
5707   "TARGET_SVE"
5708   "<nlogical>s\t%0.b, %1/z, %2.b, %3.b"
5709 )
5710
5711 ;; Same with just the flags result.
5712 (define_insn "*<nlogical><mode>3_ptest"
5713   [(set (reg:CC_NZC CC_REGNUM)
5714         (unspec:CC_NZC
5715           [(match_operand:VNx16BI 1 "register_operand" "Upa")
5716            (match_operand 4)
5717            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5718            (and:PRED_ALL
5719              (NLOGICAL:PRED_ALL
5720                (not:PRED_ALL
5721                  (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5722                (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5723              (match_dup 4))]
5724           UNSPEC_PTEST))
5725    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
5726   "TARGET_SVE"
5727   "<nlogical>s\t%0.b, %1/z, %2.b, %3.b"
5728 )
5729
5730 ;; -------------------------------------------------------------------------
5731 ;; ---- [PRED] Binary logical operations (inverted result)
5732 ;; -------------------------------------------------------------------------
5733 ;; Includes:
5734 ;; - NAND
5735 ;; - NOR
5736 ;; -------------------------------------------------------------------------
5737
5738 ;; Predicated predicate NAND and NOR.
5739 (define_insn "aarch64_pred_<logical_nn><mode>_z"
5740   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5741         (and:PRED_ALL
5742           (NLOGICAL:PRED_ALL
5743             (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5744             (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa")))
5745           (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
5746   "TARGET_SVE"
5747   "<logical_nn>\t%0.b, %1/z, %2.b, %3.b"
5748 )
5749
5750 ;; Same, but set the flags as a side-effect.
5751 (define_insn "*<logical_nn><mode>3_cc"
5752   [(set (reg:CC_NZC CC_REGNUM)
5753         (unspec:CC_NZC
5754           [(match_operand:VNx16BI 1 "register_operand" "Upa")
5755            (match_operand 4)
5756            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5757            (and:PRED_ALL
5758              (NLOGICAL:PRED_ALL
5759                (not:PRED_ALL
5760                  (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5761                (not:PRED_ALL
5762                  (match_operand:PRED_ALL 3 "register_operand" "Upa")))
5763              (match_dup 4))]
5764           UNSPEC_PTEST))
5765    (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5766         (and:PRED_ALL (NLOGICAL:PRED_ALL
5767                         (not:PRED_ALL (match_dup 2))
5768                         (not:PRED_ALL (match_dup 3)))
5769                       (match_dup 4)))]
5770   "TARGET_SVE"
5771   "<logical_nn>s\t%0.b, %1/z, %2.b, %3.b"
5772 )
5773
5774 ;; Same with just the flags result.
5775 (define_insn "*<logical_nn><mode>3_ptest"
5776   [(set (reg:CC_NZC CC_REGNUM)
5777         (unspec:CC_NZC
5778           [(match_operand:VNx16BI 1 "register_operand" "Upa")
5779            (match_operand 4)
5780            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5781            (and:PRED_ALL
5782              (NLOGICAL:PRED_ALL
5783                (not:PRED_ALL
5784                  (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5785                (not:PRED_ALL
5786                  (match_operand:PRED_ALL 3 "register_operand" "Upa")))
5787              (match_dup 4))]
5788           UNSPEC_PTEST))
5789    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
5790   "TARGET_SVE"
5791   "<logical_nn>s\t%0.b, %1/z, %2.b, %3.b"
5792 )
5793
5794 ;; =========================================================================
5795 ;; == Ternary arithmetic
5796 ;; =========================================================================
5797
5798 ;; -------------------------------------------------------------------------
5799 ;; ---- [INT] MLA and MAD
5800 ;; -------------------------------------------------------------------------
5801 ;; Includes:
5802 ;; - MAD
5803 ;; - MLA
5804 ;; -------------------------------------------------------------------------
5805
5806 ;; Unpredicated integer addition of product.
5807 (define_expand "fma<mode>4"
5808   [(set (match_operand:SVE_FULL_I 0 "register_operand")
5809         (plus:SVE_FULL_I
5810           (unspec:SVE_FULL_I
5811             [(match_dup 4)
5812              (mult:SVE_FULL_I
5813                (match_operand:SVE_FULL_I 1 "register_operand")
5814                (match_operand:SVE_FULL_I 2 "nonmemory_operand"))]
5815             UNSPEC_PRED_X)
5816           (match_operand:SVE_FULL_I 3 "register_operand")))]
5817   "TARGET_SVE"
5818   {
5819     if (aarch64_prepare_sve_int_fma (operands, PLUS))
5820       DONE;
5821     operands[4] = aarch64_ptrue_reg (<VPRED>mode);
5822   }
5823 )
5824
5825 ;; Predicated integer addition of product.
5826 (define_insn "@aarch64_pred_fma<mode>"
5827   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w")
5828         (plus:SVE_FULL_I
5829           (unspec:SVE_FULL_I
5830             [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
5831              (mult:SVE_FULL_I
5832                (match_operand:SVE_FULL_I 2 "register_operand" "%0, w, w")
5833                (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w"))]
5834             UNSPEC_PRED_X)
5835           (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w")))]
5836   "TARGET_SVE"
5837   "@
5838    mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
5839    mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
5840    movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
5841   [(set_attr "movprfx" "*,*,yes")]
5842 )
5843
5844 ;; Predicated integer addition of product with merging.
5845 (define_expand "cond_fma<mode>"
5846   [(set (match_operand:SVE_FULL_I 0 "register_operand")
5847         (unspec:SVE_FULL_I
5848           [(match_operand:<VPRED> 1 "register_operand")
5849            (plus:SVE_FULL_I
5850              (mult:SVE_FULL_I
5851                (match_operand:SVE_FULL_I 2 "register_operand")
5852                (match_operand:SVE_FULL_I 3 "general_operand"))
5853              (match_operand:SVE_FULL_I 4 "register_operand"))
5854            (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero")]
5855           UNSPEC_SEL))]
5856   "TARGET_SVE"
5857   {
5858     if (aarch64_prepare_sve_cond_int_fma (operands, PLUS))
5859       DONE;
5860     /* Swap the multiplication operands if the fallback value is the
5861        second of the two.  */
5862     if (rtx_equal_p (operands[3], operands[5]))
5863       std::swap (operands[2], operands[3]);
5864   }
5865 )
5866
5867 ;; Predicated integer addition of product, merging with the first input.
5868 (define_insn "*cond_fma<mode>_2"
5869   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
5870         (unspec:SVE_FULL_I
5871           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5872            (plus:SVE_FULL_I
5873              (mult:SVE_FULL_I
5874                (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
5875                (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
5876              (match_operand:SVE_FULL_I 4 "register_operand" "w, w"))
5877            (match_dup 2)]
5878           UNSPEC_SEL))]
5879   "TARGET_SVE"
5880   "@
5881    mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
5882    movprfx\t%0, %2\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
5883   [(set_attr "movprfx" "*,yes")]
5884 )
5885
5886 ;; Predicated integer addition of product, merging with the third input.
5887 (define_insn "*cond_fma<mode>_4"
5888   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
5889         (unspec:SVE_FULL_I
5890           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5891            (plus:SVE_FULL_I
5892              (mult:SVE_FULL_I
5893                (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
5894                (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
5895              (match_operand:SVE_FULL_I 4 "register_operand" "0, w"))
5896            (match_dup 4)]
5897           UNSPEC_SEL))]
5898   "TARGET_SVE"
5899   "@
5900    mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
5901    movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
5902   [(set_attr "movprfx" "*,yes")]
5903 )
5904
5905 ;; Predicated integer addition of product, merging with an independent value.
5906 (define_insn_and_rewrite "*cond_fma<mode>_any"
5907   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
5908         (unspec:SVE_FULL_I
5909           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
5910            (plus:SVE_FULL_I
5911              (mult:SVE_FULL_I
5912                (match_operand:SVE_FULL_I 2 "register_operand" "w, w, 0, w, w, w")
5913                (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, 0, w, w"))
5914              (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w, w, w, w"))
5915            (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
5916           UNSPEC_SEL))]
5917   "TARGET_SVE
5918    && !rtx_equal_p (operands[2], operands[5])
5919    && !rtx_equal_p (operands[3], operands[5])
5920    && !rtx_equal_p (operands[4], operands[5])"
5921   "@
5922    movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
5923    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
5924    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
5925    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
5926    movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
5927    #"
5928   "&& reload_completed
5929    && register_operand (operands[5], <MODE>mode)
5930    && !rtx_equal_p (operands[0], operands[5])"
5931   {
5932     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
5933                                              operands[5], operands[1]));
5934     operands[5] = operands[4] = operands[0];
5935   }
5936   [(set_attr "movprfx" "yes")]
5937 )
5938
5939 ;; -------------------------------------------------------------------------
5940 ;; ---- [INT] MLS and MSB
5941 ;; -------------------------------------------------------------------------
5942 ;; Includes:
5943 ;; - MLS
5944 ;; - MSB
5945 ;; -------------------------------------------------------------------------
5946
5947 ;; Unpredicated integer subtraction of product.
5948 (define_expand "fnma<mode>4"
5949   [(set (match_operand:SVE_FULL_I 0 "register_operand")
5950         (minus:SVE_FULL_I
5951           (match_operand:SVE_FULL_I 3 "register_operand")
5952           (unspec:SVE_FULL_I
5953             [(match_dup 4)
5954              (mult:SVE_FULL_I
5955                (match_operand:SVE_FULL_I 1 "register_operand")
5956                (match_operand:SVE_FULL_I 2 "general_operand"))]
5957             UNSPEC_PRED_X)))]
5958   "TARGET_SVE"
5959   {
5960     if (aarch64_prepare_sve_int_fma (operands, MINUS))
5961       DONE;
5962     operands[4] = aarch64_ptrue_reg (<VPRED>mode);
5963   }
5964 )
5965
5966 ;; Predicated integer subtraction of product.
5967 (define_insn "@aarch64_pred_fnma<mode>"
5968   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w")
5969         (minus:SVE_FULL_I
5970           (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w")
5971           (unspec:SVE_FULL_I
5972             [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
5973              (mult:SVE_FULL_I
5974                (match_operand:SVE_FULL_I 2 "register_operand" "%0, w, w")
5975                (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w"))]
5976             UNSPEC_PRED_X)))]
5977   "TARGET_SVE"
5978   "@
5979    msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
5980    mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
5981    movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
5982   [(set_attr "movprfx" "*,*,yes")]
5983 )
5984
5985 ;; Predicated integer subtraction of product with merging.
5986 (define_expand "cond_fnma<mode>"
5987   [(set (match_operand:SVE_FULL_I 0 "register_operand")
5988    (unspec:SVE_FULL_I
5989         [(match_operand:<VPRED> 1 "register_operand")
5990          (minus:SVE_FULL_I
5991            (match_operand:SVE_FULL_I 4 "register_operand")
5992            (mult:SVE_FULL_I
5993              (match_operand:SVE_FULL_I 2 "register_operand")
5994              (match_operand:SVE_FULL_I 3 "general_operand")))
5995          (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero")]
5996         UNSPEC_SEL))]
5997   "TARGET_SVE"
5998   {
5999     if (aarch64_prepare_sve_cond_int_fma (operands, MINUS))
6000       DONE;
6001     /* Swap the multiplication operands if the fallback value is the
6002        second of the two.  */
6003     if (rtx_equal_p (operands[3], operands[5]))
6004       std::swap (operands[2], operands[3]);
6005   }
6006 )
6007
6008 ;; Predicated integer subtraction of product, merging with the first input.
6009 (define_insn "*cond_fnma<mode>_2"
6010   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
6011         (unspec:SVE_FULL_I
6012           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6013            (minus:SVE_FULL_I
6014              (match_operand:SVE_FULL_I 4 "register_operand" "w, w")
6015              (mult:SVE_FULL_I
6016                (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
6017                (match_operand:SVE_FULL_I 3 "register_operand" "w, w")))
6018            (match_dup 2)]
6019           UNSPEC_SEL))]
6020   "TARGET_SVE"
6021   "@
6022    msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6023    movprfx\t%0, %2\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
6024   [(set_attr "movprfx" "*,yes")]
6025 )
6026
6027 ;; Predicated integer subtraction of product, merging with the third input.
6028 (define_insn "*cond_fnma<mode>_4"
6029   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
6030         (unspec:SVE_FULL_I
6031           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6032            (minus:SVE_FULL_I
6033              (match_operand:SVE_FULL_I 4 "register_operand" "0, w")
6034              (mult:SVE_FULL_I
6035                (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
6036                (match_operand:SVE_FULL_I 3 "register_operand" "w, w")))
6037            (match_dup 4)]
6038           UNSPEC_SEL))]
6039   "TARGET_SVE"
6040   "@
6041    mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6042    movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6043   [(set_attr "movprfx" "*,yes")]
6044 )
6045
6046 ;; Predicated integer subtraction of product, merging with an
6047 ;; independent value.
6048 (define_insn_and_rewrite "*cond_fnma<mode>_any"
6049   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
6050         (unspec:SVE_FULL_I
6051           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
6052            (minus:SVE_FULL_I
6053              (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w, w, w, w")
6054              (mult:SVE_FULL_I
6055                (match_operand:SVE_FULL_I 2 "register_operand" "w, w, 0, w, w, w")
6056                (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, 0, w, w")))
6057            (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
6058           UNSPEC_SEL))]
6059   "TARGET_SVE
6060    && !rtx_equal_p (operands[2], operands[5])
6061    && !rtx_equal_p (operands[3], operands[5])
6062    && !rtx_equal_p (operands[4], operands[5])"
6063   "@
6064    movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6065    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6066    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6067    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
6068    movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6069    #"
6070   "&& reload_completed
6071    && register_operand (operands[5], <MODE>mode)
6072    && !rtx_equal_p (operands[0], operands[5])"
6073   {
6074     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
6075                                              operands[5], operands[1]));
6076     operands[5] = operands[4] = operands[0];
6077   }
6078   [(set_attr "movprfx" "yes")]
6079 )
6080
6081 ;; -------------------------------------------------------------------------
6082 ;; ---- [INT] Dot product
6083 ;; -------------------------------------------------------------------------
6084 ;; Includes:
6085 ;; - SDOT
6086 ;; - UDOT
6087 ;; -------------------------------------------------------------------------
6088
6089 ;; Four-element integer dot-product with accumulation.
6090 (define_insn "<sur>dot_prod<vsi2qi>"
6091   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
6092         (plus:SVE_FULL_SDI
6093           (unspec:SVE_FULL_SDI
6094             [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
6095              (match_operand:<VSI2QI> 2 "register_operand" "w, w")]
6096             DOTPROD)
6097           (match_operand:SVE_FULL_SDI 3 "register_operand" "0, w")))]
6098   "TARGET_SVE"
6099   "@
6100    <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>
6101    movprfx\t%0, %3\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>"
6102   [(set_attr "movprfx" "*,yes")]
6103 )
6104
6105 ;; Four-element integer dot-product by selected lanes with accumulation.
6106 (define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>"
6107   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
6108         (plus:SVE_FULL_SDI
6109           (unspec:SVE_FULL_SDI
6110             [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
6111              (unspec:<VSI2QI>
6112                [(match_operand:<VSI2QI> 2 "register_operand" "<sve_lane_con>, <sve_lane_con>")
6113                 (match_operand:SI 3 "const_int_operand")]
6114                UNSPEC_SVE_LANE_SELECT)]
6115             DOTPROD)
6116           (match_operand:SVE_FULL_SDI 4 "register_operand" "0, w")))]
6117   "TARGET_SVE"
6118   "@
6119    <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>[%3]
6120    movprfx\t%0, %4\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>[%3]"
6121   [(set_attr "movprfx" "*,yes")]
6122 )
6123
6124 ;; -------------------------------------------------------------------------
6125 ;; ---- [INT] Sum of absolute differences
6126 ;; -------------------------------------------------------------------------
6127 ;; The patterns in this section are synthetic.
6128 ;; -------------------------------------------------------------------------
6129
6130 ;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in
6131 ;; operands 1 and 2.  The sequence also has to perform a widening reduction of
6132 ;; the difference into a vector and accumulate that into operand 3 before
6133 ;; copying that into the result operand 0.
6134 ;; Perform that with a sequence of:
6135 ;; MOV          ones.b, #1
6136 ;; [SU]ABD      diff.b, p0/m, op1.b, op2.b
6137 ;; MOVPRFX      op0, op3        // If necessary
6138 ;; UDOT         op0.s, diff.b, ones.b
6139 (define_expand "<sur>sad<vsi2qi>"
6140   [(use (match_operand:SVE_FULL_SDI 0 "register_operand"))
6141    (unspec:<VSI2QI> [(use (match_operand:<VSI2QI> 1 "register_operand"))
6142                     (use (match_operand:<VSI2QI> 2 "register_operand"))] ABAL)
6143    (use (match_operand:SVE_FULL_SDI 3 "register_operand"))]
6144   "TARGET_SVE"
6145   {
6146     rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode));
6147     rtx diff = gen_reg_rtx (<VSI2QI>mode);
6148     emit_insn (gen_<sur>abd<vsi2qi>_3 (diff, operands[1], operands[2]));
6149     emit_insn (gen_udot_prod<vsi2qi> (operands[0], diff, ones, operands[3]));
6150     DONE;
6151   }
6152 )
6153
6154 ;; -------------------------------------------------------------------------
6155 ;; ---- [FP] General ternary arithmetic corresponding to unspecs
6156 ;; -------------------------------------------------------------------------
6157 ;; Includes merging patterns for:
6158 ;; - FMAD
6159 ;; - FMLA
6160 ;; - FMLS
6161 ;; - FMSB
6162 ;; - FNMAD
6163 ;; - FNMLA
6164 ;; - FNMLS
6165 ;; - FNMSB
6166 ;; -------------------------------------------------------------------------
6167
6168 ;; Unpredicated floating-point ternary operations.
6169 (define_expand "<optab><mode>4"
6170   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6171         (unspec:SVE_FULL_F
6172           [(match_dup 4)
6173            (const_int SVE_RELAXED_GP)
6174            (match_operand:SVE_FULL_F 1 "register_operand")
6175            (match_operand:SVE_FULL_F 2 "register_operand")
6176            (match_operand:SVE_FULL_F 3 "register_operand")]
6177           SVE_COND_FP_TERNARY))]
6178   "TARGET_SVE"
6179   {
6180     operands[4] = aarch64_ptrue_reg (<VPRED>mode);
6181   }
6182 )
6183
6184 ;; Predicated floating-point ternary operations.
6185 (define_insn "@aarch64_pred_<optab><mode>"
6186   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w")
6187         (unspec:SVE_FULL_F
6188           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
6189            (match_operand:SI 5 "aarch64_sve_gp_strictness")
6190            (match_operand:SVE_FULL_F 2 "register_operand" "%w, 0, w")
6191            (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")
6192            (match_operand:SVE_FULL_F 4 "register_operand" "0, w, w")]
6193           SVE_COND_FP_TERNARY))]
6194   "TARGET_SVE"
6195   "@
6196    <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6197    <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6198    movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6199   [(set_attr "movprfx" "*,*,yes")]
6200 )
6201
6202 ;; Predicated floating-point ternary operations with merging.
6203 (define_expand "@cond_<optab><mode>"
6204   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6205         (unspec:SVE_FULL_F
6206           [(match_operand:<VPRED> 1 "register_operand")
6207            (unspec:SVE_FULL_F
6208              [(match_dup 1)
6209               (const_int SVE_STRICT_GP)
6210               (match_operand:SVE_FULL_F 2 "register_operand")
6211               (match_operand:SVE_FULL_F 3 "register_operand")
6212               (match_operand:SVE_FULL_F 4 "register_operand")]
6213              SVE_COND_FP_TERNARY)
6214            (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
6215           UNSPEC_SEL))]
6216   "TARGET_SVE"
6217 {
6218   /* Swap the multiplication operands if the fallback value is the
6219      second of the two.  */
6220   if (rtx_equal_p (operands[3], operands[5]))
6221     std::swap (operands[2], operands[3]);
6222 })
6223
6224 ;; Predicated floating-point ternary operations, merging with the
6225 ;; first input.
6226 (define_insn_and_rewrite "*cond_<optab><mode>_2"
6227   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6228         (unspec:SVE_FULL_F
6229           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6230            (unspec:SVE_FULL_F
6231              [(match_operand 5)
6232               (match_operand:SI 6 "aarch64_sve_gp_strictness")
6233               (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
6234               (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
6235               (match_operand:SVE_FULL_F 4 "register_operand" "w, w")]
6236              SVE_COND_FP_TERNARY)
6237            (match_dup 2)]
6238           UNSPEC_SEL))]
6239   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
6240   "@
6241    <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6242    movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
6243   "&& !rtx_equal_p (operands[1], operands[5])"
6244   {
6245     operands[5] = copy_rtx (operands[1]);
6246   }
6247   [(set_attr "movprfx" "*,yes")]
6248 )
6249
6250 ;; Predicated floating-point ternary operations, merging with the
6251 ;; third input.
6252 (define_insn_and_rewrite "*cond_<optab><mode>_4"
6253   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6254         (unspec:SVE_FULL_F
6255           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6256            (unspec:SVE_FULL_F
6257              [(match_operand 5)
6258               (match_operand:SI 6 "aarch64_sve_gp_strictness")
6259               (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
6260               (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
6261               (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
6262              SVE_COND_FP_TERNARY)
6263            (match_dup 4)]
6264           UNSPEC_SEL))]
6265   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
6266   "@
6267    <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6268    movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6269   "&& !rtx_equal_p (operands[1], operands[5])"
6270   {
6271     operands[5] = copy_rtx (operands[1]);
6272   }
6273   [(set_attr "movprfx" "*,yes")]
6274 )
6275
6276 ;; Predicated floating-point ternary operations, merging with an
6277 ;; independent value.
6278 (define_insn_and_rewrite "*cond_<optab><mode>_any"
6279   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
6280         (unspec:SVE_FULL_F
6281           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
6282            (unspec:SVE_FULL_F
6283              [(match_operand 6)
6284               (match_operand:SI 7 "aarch64_sve_gp_strictness")
6285               (match_operand:SVE_FULL_F 2 "register_operand" "w, w, 0, w, w, w")
6286               (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, 0, w, w")
6287               (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w, w, w")]
6288              SVE_COND_FP_TERNARY)
6289            (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
6290           UNSPEC_SEL))]
6291   "TARGET_SVE
6292    && !rtx_equal_p (operands[2], operands[5])
6293    && !rtx_equal_p (operands[3], operands[5])
6294    && !rtx_equal_p (operands[4], operands[5])
6295    && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
6296   "@
6297    movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6298    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6299    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6300    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
6301    movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6302    #"
6303   "&& 1"
6304   {
6305     if (reload_completed
6306         && register_operand (operands[5], <MODE>mode)
6307         && !rtx_equal_p (operands[0], operands[5]))
6308       {
6309         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
6310                                                  operands[5], operands[1]));
6311         operands[5] = operands[4] = operands[0];
6312       }
6313     else if (!rtx_equal_p (operands[1], operands[6]))
6314       operands[6] = copy_rtx (operands[1]);
6315     else
6316       FAIL;
6317   }
6318   [(set_attr "movprfx" "yes")]
6319 )
6320
6321 ;; Unpredicated FMLA and FMLS by selected lanes.  It doesn't seem worth using
6322 ;; (fma ...) since target-independent code won't understand the indexing.
6323 (define_insn "@aarch64_<optab>_lane_<mode>"
6324   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6325         (unspec:SVE_FULL_F
6326           [(match_operand:SVE_FULL_F 1 "register_operand" "w, w")
6327            (unspec:SVE_FULL_F
6328              [(match_operand:SVE_FULL_F 2 "register_operand" "<sve_lane_con>, <sve_lane_con>")
6329               (match_operand:SI 3 "const_int_operand")]
6330              UNSPEC_SVE_LANE_SELECT)
6331            (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
6332           SVE_FP_TERNARY_LANE))]
6333   "TARGET_SVE"
6334   "@
6335    <sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]
6336    movprfx\t%0, %4\;<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
6337   [(set_attr "movprfx" "*,yes")]
6338 )
6339
6340 ;; -------------------------------------------------------------------------
6341 ;; ---- [FP] Complex multiply-add
6342 ;; -------------------------------------------------------------------------
6343 ;; Includes merging patterns for:
6344 ;; - FCMLA
6345 ;; -------------------------------------------------------------------------
6346
6347 ;; Predicated FCMLA.
6348 (define_insn "@aarch64_pred_<optab><mode>"
6349   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6350         (unspec:SVE_FULL_F
6351           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6352            (match_operand:SI 5 "aarch64_sve_gp_strictness")
6353            (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
6354            (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
6355            (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
6356           SVE_COND_FCMLA))]
6357   "TARGET_SVE"
6358   "@
6359    fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
6360    movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>"
6361   [(set_attr "movprfx" "*,yes")]
6362 )
6363
6364 ;; Predicated FCMLA with merging.
6365 (define_expand "@cond_<optab><mode>"
6366   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6367         (unspec:SVE_FULL_F
6368           [(match_operand:<VPRED> 1 "register_operand")
6369            (unspec:SVE_FULL_F
6370              [(match_dup 1)
6371               (const_int SVE_STRICT_GP)
6372               (match_operand:SVE_FULL_F 2 "register_operand")
6373               (match_operand:SVE_FULL_F 3 "register_operand")
6374               (match_operand:SVE_FULL_F 4 "register_operand")]
6375              SVE_COND_FCMLA)
6376            (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
6377           UNSPEC_SEL))]
6378   "TARGET_SVE"
6379 )
6380
6381 ;; Predicated FCMLA, merging with the third input.
6382 (define_insn_and_rewrite "*cond_<optab><mode>_4"
6383   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6384         (unspec:SVE_FULL_F
6385           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6386            (unspec:SVE_FULL_F
6387              [(match_operand 5)
6388               (match_operand:SI 6 "aarch64_sve_gp_strictness")
6389               (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
6390               (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
6391               (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
6392              SVE_COND_FCMLA)
6393            (match_dup 4)]
6394           UNSPEC_SEL))]
6395   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
6396   "@
6397    fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
6398    movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>"
6399   "&& !rtx_equal_p (operands[1], operands[5])"
6400   {
6401     operands[5] = copy_rtx (operands[1]);
6402   }
6403   [(set_attr "movprfx" "*,yes")]
6404 )
6405
6406 ;; Predicated FCMLA, merging with an independent value.
6407 (define_insn_and_rewrite "*cond_<optab><mode>_any"
6408   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
6409         (unspec:SVE_FULL_F
6410           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
6411            (unspec:SVE_FULL_F
6412              [(match_operand 6)
6413               (match_operand:SI 7 "aarch64_sve_gp_strictness")
6414               (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w")
6415               (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")
6416               (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w")]
6417              SVE_COND_FCMLA)
6418            (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
6419           UNSPEC_SEL))]
6420   "TARGET_SVE
6421    && !rtx_equal_p (operands[4], operands[5])
6422    && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
6423   "@
6424    movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
6425    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
6426    movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
6427    #"
6428   "&& 1"
6429   {
6430     if (reload_completed
6431         && register_operand (operands[5], <MODE>mode)
6432         && !rtx_equal_p (operands[0], operands[5]))
6433       {
6434         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
6435                                                  operands[5], operands[1]));
6436         operands[5] = operands[4] = operands[0];
6437       }
6438     else if (!rtx_equal_p (operands[1], operands[6]))
6439       operands[6] = copy_rtx (operands[1]);
6440     else
6441       FAIL;
6442   }
6443   [(set_attr "movprfx" "yes")]
6444 )
6445
6446 ;; Unpredicated FCMLA with indexing.
6447 (define_insn "@aarch64_<optab>_lane_<mode>"
6448   [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w")
6449         (unspec:SVE_FULL_HSF
6450           [(match_operand:SVE_FULL_HSF 1 "register_operand" "w, w")
6451            (unspec:SVE_FULL_HSF
6452              [(match_operand:SVE_FULL_HSF 2 "register_operand" "<sve_lane_pair_con>, <sve_lane_pair_con>")
6453               (match_operand:SI 3 "const_int_operand")]
6454              UNSPEC_SVE_LANE_SELECT)
6455            (match_operand:SVE_FULL_HSF 4 "register_operand" "0, w")]
6456           FCMLA))]
6457   "TARGET_SVE"
6458   "@
6459    fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot>
6460    movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot>"
6461   [(set_attr "movprfx" "*,yes")]
6462 )
6463
6464 ;; -------------------------------------------------------------------------
6465 ;; ---- [FP] Trigonometric multiply-add
6466 ;; -------------------------------------------------------------------------
6467 ;; Includes:
6468 ;; - FTMAD
6469 ;; -------------------------------------------------------------------------
6470
6471 (define_insn "@aarch64_sve_tmad<mode>"
6472   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6473         (unspec:SVE_FULL_F
6474           [(match_operand:SVE_FULL_F 1 "register_operand" "0, w")
6475            (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
6476            (match_operand:DI 3 "const_int_operand")]
6477           UNSPEC_FTMAD))]
6478   "TARGET_SVE"
6479   "@
6480    ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3
6481    movprfx\t%0, %1\;ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3"
6482   [(set_attr "movprfx" "*,yes")]
6483 )
6484
6485 ;; =========================================================================
6486 ;; == Comparisons and selects
6487 ;; =========================================================================
6488
6489 ;; -------------------------------------------------------------------------
6490 ;; ---- [INT,FP] Select based on predicates
6491 ;; -------------------------------------------------------------------------
6492 ;; Includes merging patterns for:
6493 ;; - FMOV
6494 ;; - MOV
6495 ;; - SEL
6496 ;; -------------------------------------------------------------------------
6497
6498 ;; vcond_mask operand order: true, false, mask
6499 ;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
6500 ;; SEL operand order:        mask, true, false
6501 (define_expand "@vcond_mask_<mode><vpred>"
6502   [(set (match_operand:SVE_FULL 0 "register_operand")
6503         (unspec:SVE_FULL
6504           [(match_operand:<VPRED> 3 "register_operand")
6505            (match_operand:SVE_FULL 1 "aarch64_sve_reg_or_dup_imm")
6506            (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero")]
6507           UNSPEC_SEL))]
6508   "TARGET_SVE"
6509   {
6510     if (register_operand (operands[1], <MODE>mode))
6511       operands[2] = force_reg (<MODE>mode, operands[2]);
6512   }
6513 )
6514
6515 ;; Selects between:
6516 ;; - two registers
6517 ;; - a duplicated immediate and a register
6518 ;; - a duplicated immediate and zero
6519 (define_insn "*vcond_mask_<mode><vpred>"
6520   [(set (match_operand:SVE_FULL 0 "register_operand" "=w, w, w, w, ?w, ?&w, ?&w")
6521         (unspec:SVE_FULL
6522           [(match_operand:<VPRED> 3 "register_operand" "Upa, Upa, Upa, Upa, Upl, Upl, Upl")
6523            (match_operand:SVE_FULL 1 "aarch64_sve_reg_or_dup_imm" "w, vss, vss, Ufc, Ufc, vss, Ufc")
6524            (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero" "w, 0, Dz, 0, Dz, w, w")]
6525           UNSPEC_SEL))]
6526   "TARGET_SVE
6527    && (!register_operand (operands[1], <MODE>mode)
6528        || register_operand (operands[2], <MODE>mode))"
6529   "@
6530    sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>
6531    mov\t%0.<Vetype>, %3/m, #%I1
6532    mov\t%0.<Vetype>, %3/z, #%I1
6533    fmov\t%0.<Vetype>, %3/m, #%1
6534    movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;fmov\t%0.<Vetype>, %3/m, #%1
6535    movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, #%I1
6536    movprfx\t%0, %2\;fmov\t%0.<Vetype>, %3/m, #%1"
6537   [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")]
6538 )
6539
6540 ;; Optimize selects between a duplicated scalar variable and another vector,
6541 ;; the latter of which can be a zero constant or a variable.  Treat duplicates
6542 ;; of GPRs as being more expensive than duplicates of FPRs, since they
6543 ;; involve a cross-file move.
6544 (define_insn "@aarch64_sel_dup<mode>"
6545   [(set (match_operand:SVE_FULL 0 "register_operand" "=?w, w, ??w, ?&w, ??&w, ?&w")
6546         (unspec:SVE_FULL
6547           [(match_operand:<VPRED> 3 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
6548            (vec_duplicate:SVE_FULL
6549              (match_operand:<VEL> 1 "register_operand" "r, w, r, w, r, w"))
6550            (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero" "0, 0, Dz, Dz, w, w")]
6551           UNSPEC_SEL))]
6552   "TARGET_SVE"
6553   "@
6554    mov\t%0.<Vetype>, %3/m, %<vwcore>1
6555    mov\t%0.<Vetype>, %3/m, %<Vetype>1
6556    movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<vwcore>1
6557    movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<Vetype>1
6558    movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<vwcore>1
6559    movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<Vetype>1"
6560   [(set_attr "movprfx" "*,*,yes,yes,yes,yes")]
6561 )
6562
6563 ;; -------------------------------------------------------------------------
6564 ;; ---- [INT,FP] Compare and select
6565 ;; -------------------------------------------------------------------------
6566 ;; The patterns in this section are synthetic.
6567 ;; -------------------------------------------------------------------------
6568
6569 ;; Integer (signed) vcond.  Don't enforce an immediate range here, since it
6570 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
6571 (define_expand "vcond<mode><v_int_equiv>"
6572   [(set (match_operand:SVE_FULL 0 "register_operand")
6573         (if_then_else:SVE_FULL
6574           (match_operator 3 "comparison_operator"
6575             [(match_operand:<V_INT_EQUIV> 4 "register_operand")
6576              (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
6577           (match_operand:SVE_FULL 1 "nonmemory_operand")
6578           (match_operand:SVE_FULL 2 "nonmemory_operand")))]
6579   "TARGET_SVE"
6580   {
6581     aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
6582     DONE;
6583   }
6584 )
6585
6586 ;; Integer vcondu.  Don't enforce an immediate range here, since it
6587 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
6588 (define_expand "vcondu<mode><v_int_equiv>"
6589   [(set (match_operand:SVE_FULL 0 "register_operand")
6590         (if_then_else:SVE_FULL
6591           (match_operator 3 "comparison_operator"
6592             [(match_operand:<V_INT_EQUIV> 4 "register_operand")
6593              (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
6594           (match_operand:SVE_FULL 1 "nonmemory_operand")
6595           (match_operand:SVE_FULL 2 "nonmemory_operand")))]
6596   "TARGET_SVE"
6597   {
6598     aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
6599     DONE;
6600   }
6601 )
6602
6603 ;; Floating-point vcond.  All comparisons except FCMUO allow a zero operand;
6604 ;; aarch64_expand_sve_vcond handles the case of an FCMUO with zero.
6605 (define_expand "vcond<mode><v_fp_equiv>"
6606   [(set (match_operand:SVE_FULL_HSD 0 "register_operand")
6607         (if_then_else:SVE_FULL_HSD
6608           (match_operator 3 "comparison_operator"
6609             [(match_operand:<V_FP_EQUIV> 4 "register_operand")
6610              (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
6611           (match_operand:SVE_FULL_HSD 1 "nonmemory_operand")
6612           (match_operand:SVE_FULL_HSD 2 "nonmemory_operand")))]
6613   "TARGET_SVE"
6614   {
6615     aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
6616     DONE;
6617   }
6618 )
6619
6620 ;; -------------------------------------------------------------------------
6621 ;; ---- [INT] Comparisons
6622 ;; -------------------------------------------------------------------------
6623 ;; Includes:
6624 ;; - CMPEQ
6625 ;; - CMPGE
6626 ;; - CMPGT
6627 ;; - CMPHI
6628 ;; - CMPHS
6629 ;; - CMPLE
6630 ;; - CMPLO
6631 ;; - CMPLS
6632 ;; - CMPLT
6633 ;; - CMPNE
6634 ;; -------------------------------------------------------------------------
6635
6636 ;; Signed integer comparisons.  Don't enforce an immediate range here, since
6637 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
6638 ;; instead.
6639 (define_expand "vec_cmp<mode><vpred>"
6640   [(parallel
6641     [(set (match_operand:<VPRED> 0 "register_operand")
6642           (match_operator:<VPRED> 1 "comparison_operator"
6643             [(match_operand:SVE_FULL_I 2 "register_operand")
6644              (match_operand:SVE_FULL_I 3 "nonmemory_operand")]))
6645      (clobber (reg:CC_NZC CC_REGNUM))])]
6646   "TARGET_SVE"
6647   {
6648     aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
6649                                     operands[2], operands[3]);
6650     DONE;
6651   }
6652 )
6653
6654 ;; Unsigned integer comparisons.  Don't enforce an immediate range here, since
6655 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
6656 ;; instead.
6657 (define_expand "vec_cmpu<mode><vpred>"
6658   [(parallel
6659     [(set (match_operand:<VPRED> 0 "register_operand")
6660           (match_operator:<VPRED> 1 "comparison_operator"
6661             [(match_operand:SVE_FULL_I 2 "register_operand")
6662              (match_operand:SVE_FULL_I 3 "nonmemory_operand")]))
6663      (clobber (reg:CC_NZC CC_REGNUM))])]
6664   "TARGET_SVE"
6665   {
6666     aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
6667                                     operands[2], operands[3]);
6668     DONE;
6669   }
6670 )
6671
6672 ;; Predicated integer comparisons.
6673 (define_insn "@aarch64_pred_cmp<cmp_op><mode>"
6674   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
6675         (unspec:<VPRED>
6676           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6677            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
6678            (SVE_INT_CMP:<VPRED>
6679              (match_operand:SVE_FULL_I 3 "register_operand" "w, w")
6680              (match_operand:SVE_FULL_I 4 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
6681           UNSPEC_PRED_Z))
6682    (clobber (reg:CC_NZC CC_REGNUM))]
6683   "TARGET_SVE"
6684   "@
6685    cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #%4
6686    cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
6687 )
6688
6689 ;; Predicated integer comparisons in which both the flag and predicate
6690 ;; results are interesting.
6691 (define_insn_and_rewrite "*cmp<cmp_op><mode>_cc"
6692   [(set (reg:CC_NZC CC_REGNUM)
6693         (unspec:CC_NZC
6694           [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl")
6695            (match_operand 4)
6696            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6697            (unspec:<VPRED>
6698              [(match_operand 6)
6699               (match_operand:SI 7 "aarch64_sve_ptrue_flag")
6700               (SVE_INT_CMP:<VPRED>
6701                 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
6702                 (match_operand:SVE_FULL_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
6703              UNSPEC_PRED_Z)]
6704           UNSPEC_PTEST))
6705    (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
6706         (unspec:<VPRED>
6707           [(match_dup 6)
6708            (match_dup 7)
6709            (SVE_INT_CMP:<VPRED>
6710              (match_dup 2)
6711              (match_dup 3))]
6712           UNSPEC_PRED_Z))]
6713   "TARGET_SVE
6714    && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
6715   "@
6716    cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
6717    cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
6718   "&& !rtx_equal_p (operands[4], operands[6])"
6719   {
6720     operands[6] = copy_rtx (operands[4]);
6721     operands[7] = operands[5];
6722   }
6723 )
6724
6725 ;; Predicated integer comparisons in which only the flags result is
6726 ;; interesting.
6727 (define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest"
6728   [(set (reg:CC_NZC CC_REGNUM)
6729         (unspec:CC_NZC
6730           [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl")
6731            (match_operand 4)
6732            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6733            (unspec:<VPRED>
6734              [(match_operand 6)
6735               (match_operand:SI 7 "aarch64_sve_ptrue_flag")
6736               (SVE_INT_CMP:<VPRED>
6737                 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
6738                 (match_operand:SVE_FULL_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
6739              UNSPEC_PRED_Z)]
6740           UNSPEC_PTEST))
6741    (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))]
6742   "TARGET_SVE
6743    && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
6744   "@
6745    cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
6746    cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
6747   "&& !rtx_equal_p (operands[4], operands[6])"
6748   {
6749     operands[6] = copy_rtx (operands[4]);
6750     operands[7] = operands[5];
6751   }
6752 )
6753
6754 ;; Predicated integer comparisons, formed by combining a PTRUE-predicated
6755 ;; comparison with an AND.  Split the instruction into its preferred form
6756 ;; at the earliest opportunity, in order to get rid of the redundant
6757 ;; operand 4.
6758 (define_insn_and_split "*cmp<cmp_op><mode>_and"
6759   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
6760         (and:<VPRED>
6761           (unspec:<VPRED>
6762             [(match_operand 4)
6763              (const_int SVE_KNOWN_PTRUE)
6764              (SVE_INT_CMP:<VPRED>
6765                (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
6766                (match_operand:SVE_FULL_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
6767             UNSPEC_PRED_Z)
6768           (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))
6769    (clobber (reg:CC_NZC CC_REGNUM))]
6770   "TARGET_SVE"
6771   "#"
6772   "&& 1"
6773   [(parallel
6774      [(set (match_dup 0)
6775            (unspec:<VPRED>
6776              [(match_dup 1)
6777               (const_int SVE_MAYBE_NOT_PTRUE)
6778               (SVE_INT_CMP:<VPRED>
6779                 (match_dup 2)
6780                 (match_dup 3))]
6781              UNSPEC_PRED_Z))
6782       (clobber (reg:CC_NZC CC_REGNUM))])]
6783 )
6784
6785 ;; Predicated integer wide comparisons.
6786 (define_insn "@aarch64_pred_cmp<cmp_op><mode>_wide"
6787   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
6788         (unspec:<VPRED>
6789           [(match_operand:VNx16BI 1 "register_operand" "Upl")
6790            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
6791            (unspec:<VPRED>
6792              [(match_operand:SVE_FULL_BHSI 3 "register_operand" "w")
6793               (match_operand:VNx2DI 4 "register_operand" "w")]
6794              SVE_COND_INT_CMP_WIDE)]
6795           UNSPEC_PRED_Z))
6796    (clobber (reg:CC_NZC CC_REGNUM))]
6797   "TARGET_SVE"
6798   "cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.d"
6799 )
6800
6801 ;; Predicated integer wide comparisons in which both the flag and
6802 ;; predicate results are interesting.
6803 (define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_cc"
6804   [(set (reg:CC_NZC CC_REGNUM)
6805         (unspec:CC_NZC
6806           [(match_operand:VNx16BI 1 "register_operand" "Upl")
6807            (match_operand 4)
6808            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6809            (unspec:<VPRED>
6810              [(match_operand:VNx16BI 6 "register_operand" "Upl")
6811               (match_operand:SI 7 "aarch64_sve_ptrue_flag")
6812               (unspec:<VPRED>
6813                 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "w")
6814                  (match_operand:VNx2DI 3 "register_operand" "w")]
6815                 SVE_COND_INT_CMP_WIDE)]
6816              UNSPEC_PRED_Z)]
6817           UNSPEC_PTEST))
6818    (set (match_operand:<VPRED> 0 "register_operand" "=Upa")
6819         (unspec:<VPRED>
6820           [(match_dup 6)
6821            (match_dup 7)
6822            (unspec:<VPRED>
6823              [(match_dup 2)
6824               (match_dup 3)]
6825              SVE_COND_INT_CMP_WIDE)]
6826           UNSPEC_PRED_Z))]
6827   "TARGET_SVE
6828    && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
6829   "cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d"
6830 )
6831
6832 ;; Predicated integer wide comparisons in which only the flags result
6833 ;; is interesting.
6834 (define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_ptest"
6835   [(set (reg:CC_NZC CC_REGNUM)
6836         (unspec:CC_NZC
6837           [(match_operand:VNx16BI 1 "register_operand" "Upl")
6838            (match_operand 4)
6839            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6840            (unspec:<VPRED>
6841              [(match_operand:VNx16BI 6 "register_operand" "Upl")
6842               (match_operand:SI 7 "aarch64_sve_ptrue_flag")
6843               (unspec:<VPRED>
6844                 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "w")
6845                  (match_operand:VNx2DI 3 "register_operand" "w")]
6846                 SVE_COND_INT_CMP_WIDE)]
6847              UNSPEC_PRED_Z)]
6848           UNSPEC_PTEST))
6849    (clobber (match_scratch:<VPRED> 0 "=Upa"))]
6850   "TARGET_SVE
6851    && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
6852   "cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d"
6853 )
6854
6855 ;; -------------------------------------------------------------------------
6856 ;; ---- [INT] While tests
6857 ;; -------------------------------------------------------------------------
6858 ;; Includes:
6859 ;; - WHILEGE (SVE2)
6860 ;; - WHILEGT (SVE2)
6861 ;; - WHILEHI (SVE2)
6862 ;; - WHILEHS (SVE2)
6863 ;; - WHILELE
6864 ;; - WHILELO
6865 ;; - WHILELS
6866 ;; - WHILELT
6867 ;; - WHILERW (SVE2)
6868 ;; - WHILEWR (SVE2)
6869 ;; -------------------------------------------------------------------------
6870
6871 ;; Set element I of the result if (cmp (plus operand1 J) operand2) is
6872 ;; true for all J in [0, I].
6873 (define_insn "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>"
6874   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
6875         (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
6876                           (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
6877                          SVE_WHILE))
6878    (clobber (reg:CC_NZC CC_REGNUM))]
6879   "TARGET_SVE"
6880   "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
6881 )
6882
6883 ;; The WHILE instructions set the flags in the same way as a PTEST with
6884 ;; a PTRUE GP.  Handle the case in which both results are useful.  The GP
6885 ;; operands to the PTEST aren't needed, so we allow them to be anything.
6886 (define_insn_and_rewrite "*while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_cc"
6887   [(set (reg:CC_NZC CC_REGNUM)
6888         (unspec:CC_NZC
6889           [(match_operand 3)
6890            (match_operand 4)
6891            (const_int SVE_KNOWN_PTRUE)
6892            (unspec:PRED_ALL
6893              [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
6894               (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
6895              SVE_WHILE)]
6896           UNSPEC_PTEST))
6897    (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
6898         (unspec:PRED_ALL [(match_dup 1)
6899                           (match_dup 2)]
6900                          SVE_WHILE))]
6901   "TARGET_SVE"
6902   "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
6903   ;; Force the compiler to drop the unused predicate operand, so that we
6904   ;; don't have an unnecessary PTRUE.
6905   "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
6906   {
6907     operands[3] = CONSTM1_RTX (VNx16BImode);
6908     operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
6909   }
6910 )
6911
6912 ;; Same, but handle the case in which only the flags result is useful.
6913 (define_insn_and_rewrite "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_ptest"
6914   [(set (reg:CC_NZC CC_REGNUM)
6915         (unspec:CC_NZC
6916           [(match_operand 3)
6917            (match_operand 4)
6918            (const_int SVE_KNOWN_PTRUE)
6919            (unspec:PRED_ALL
6920              [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
6921               (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
6922              SVE_WHILE)]
6923           UNSPEC_PTEST))
6924    (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
6925   "TARGET_SVE"
6926   "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
6927   ;; Force the compiler to drop the unused predicate operand, so that we
6928   ;; don't have an unnecessary PTRUE.
6929   "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
6930   {
6931     operands[3] = CONSTM1_RTX (VNx16BImode);
6932     operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
6933   }
6934 )
6935
6936 ;; -------------------------------------------------------------------------
6937 ;; ---- [FP] Direct comparisons
6938 ;; -------------------------------------------------------------------------
6939 ;; Includes:
6940 ;; - FCMEQ
6941 ;; - FCMGE
6942 ;; - FCMGT
6943 ;; - FCMLE
6944 ;; - FCMLT
6945 ;; - FCMNE
6946 ;; - FCMUO
6947 ;; -------------------------------------------------------------------------
6948
6949 ;; Floating-point comparisons.  All comparisons except FCMUO allow a zero
6950 ;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO
6951 ;; with zero.
6952 (define_expand "vec_cmp<mode><vpred>"
6953   [(set (match_operand:<VPRED> 0 "register_operand")
6954         (match_operator:<VPRED> 1 "comparison_operator"
6955           [(match_operand:SVE_FULL_F 2 "register_operand")
6956            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]))]
6957   "TARGET_SVE"
6958   {
6959     aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
6960                                       operands[2], operands[3], false);
6961     DONE;
6962   }
6963 )
6964
6965 ;; Predicated floating-point comparisons.
6966 (define_insn "@aarch64_pred_fcm<cmp_op><mode>"
6967   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
6968         (unspec:<VPRED>
6969           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6970            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
6971            (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
6972            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, w")]
6973           SVE_COND_FP_CMP_I0))]
6974   "TARGET_SVE"
6975   "@
6976    fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #0.0
6977    fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
6978 )
6979
6980 ;; Same for unordered comparisons.
6981 (define_insn "@aarch64_pred_fcmuo<mode>"
6982   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
6983         (unspec:<VPRED>
6984           [(match_operand:<VPRED> 1 "register_operand" "Upl")
6985            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
6986            (match_operand:SVE_FULL_F 3 "register_operand" "w")
6987            (match_operand:SVE_FULL_F 4 "register_operand" "w")]
6988           UNSPEC_COND_FCMUO))]
6989   "TARGET_SVE"
6990   "fcmuo\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
6991 )
6992
6993 ;; Floating-point comparisons predicated on a PTRUE, with the results ANDed
6994 ;; with another predicate P.  This does not have the same trapping behavior
6995 ;; as predicating the comparison itself on P, but it's a legitimate fold,
6996 ;; since we can drop any potentially-trapping operations whose results
6997 ;; are not needed.
6998 ;;
6999 ;; Split the instruction into its preferred form (below) at the earliest
7000 ;; opportunity, in order to get rid of the redundant operand 1.
7001 (define_insn_and_split "*fcm<cmp_op><mode>_and_combine"
7002   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
7003         (and:<VPRED>
7004           (unspec:<VPRED>
7005             [(match_operand:<VPRED> 1)
7006              (const_int SVE_KNOWN_PTRUE)
7007              (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
7008              (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
7009             SVE_COND_FP_CMP_I0)
7010           (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
7011   "TARGET_SVE"
7012   "#"
7013   "&& 1"
7014   [(set (match_dup 0)
7015         (unspec:<VPRED>
7016           [(match_dup 4)
7017            (const_int SVE_MAYBE_NOT_PTRUE)
7018            (match_dup 2)
7019            (match_dup 3)]
7020           SVE_COND_FP_CMP_I0))]
7021 )
7022
7023 ;; Same for unordered comparisons.
7024 (define_insn_and_split "*fcmuo<mode>_and_combine"
7025   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
7026         (and:<VPRED>
7027           (unspec:<VPRED>
7028             [(match_operand:<VPRED> 1)
7029              (const_int SVE_KNOWN_PTRUE)
7030              (match_operand:SVE_FULL_F 2 "register_operand" "w")
7031              (match_operand:SVE_FULL_F 3 "register_operand" "w")]
7032             UNSPEC_COND_FCMUO)
7033           (match_operand:<VPRED> 4 "register_operand" "Upl")))]
7034   "TARGET_SVE"
7035   "#"
7036   "&& 1"
7037   [(set (match_dup 0)
7038         (unspec:<VPRED>
7039           [(match_dup 4)
7040            (const_int SVE_MAYBE_NOT_PTRUE)
7041            (match_dup 2)
7042            (match_dup 3)]
7043           UNSPEC_COND_FCMUO))]
7044 )
7045
7046 ;; -------------------------------------------------------------------------
7047 ;; ---- [FP] Absolute comparisons
7048 ;; -------------------------------------------------------------------------
7049 ;; Includes:
7050 ;; - FACGE
7051 ;; - FACGT
7052 ;; - FACLE
7053 ;; - FACLT
7054 ;; -------------------------------------------------------------------------
7055
7056 ;; Predicated floating-point absolute comparisons.
7057 (define_expand "@aarch64_pred_fac<cmp_op><mode>"
7058   [(set (match_operand:<VPRED> 0 "register_operand")
7059         (unspec:<VPRED>
7060           [(match_operand:<VPRED> 1 "register_operand")
7061            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
7062            (unspec:SVE_FULL_F
7063              [(match_dup 1)
7064               (match_dup 2)
7065               (match_operand:SVE_FULL_F 3 "register_operand")]
7066              UNSPEC_COND_FABS)
7067            (unspec:SVE_FULL_F
7068              [(match_dup 1)
7069               (match_dup 2)
7070               (match_operand:SVE_FULL_F 4 "register_operand")]
7071              UNSPEC_COND_FABS)]
7072           SVE_COND_FP_ABS_CMP))]
7073   "TARGET_SVE"
7074 )
7075
7076 (define_insn_and_rewrite "*aarch64_pred_fac<cmp_op><mode>"
7077   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
7078         (unspec:<VPRED>
7079           [(match_operand:<VPRED> 1 "register_operand" "Upl")
7080            (match_operand:SI 4 "aarch64_sve_ptrue_flag")
7081            (unspec:SVE_FULL_F
7082              [(match_operand 5)
7083               (match_operand:SI 6 "aarch64_sve_gp_strictness")
7084               (match_operand:SVE_FULL_F 2 "register_operand" "w")]
7085              UNSPEC_COND_FABS)
7086            (unspec:SVE_FULL_F
7087              [(match_operand 7)
7088               (match_operand:SI 8 "aarch64_sve_gp_strictness")
7089               (match_operand:SVE_FULL_F 3 "register_operand" "w")]
7090              UNSPEC_COND_FABS)]
7091           SVE_COND_FP_ABS_CMP))]
7092   "TARGET_SVE
7093    && aarch64_sve_pred_dominates_p (&operands[5], operands[1])
7094    && aarch64_sve_pred_dominates_p (&operands[7], operands[1])"
7095   "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
7096   "&& (!rtx_equal_p (operands[1], operands[5])
7097        || !rtx_equal_p (operands[1], operands[7]))"
7098   {
7099     operands[5] = copy_rtx (operands[1]);
7100     operands[7] = copy_rtx (operands[1]);
7101   }
7102 )
7103
7104 ;; -------------------------------------------------------------------------
7105 ;; ---- [PRED] Select
7106 ;; -------------------------------------------------------------------------
7107 ;; Includes:
7108 ;; - SEL
7109 ;; -------------------------------------------------------------------------
7110
7111 (define_insn "@vcond_mask_<mode><mode>"
7112   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
7113         (ior:PRED_ALL
7114           (and:PRED_ALL
7115             (match_operand:PRED_ALL 3 "register_operand" "Upa")
7116             (match_operand:PRED_ALL 1 "register_operand" "Upa"))
7117           (and:PRED_ALL
7118             (not (match_dup 3))
7119             (match_operand:PRED_ALL 2 "register_operand" "Upa"))))]
7120   "TARGET_SVE"
7121   "sel\t%0.b, %3, %1.b, %2.b"
7122 )
7123
7124 ;; -------------------------------------------------------------------------
7125 ;; ---- [PRED] Test bits
7126 ;; -------------------------------------------------------------------------
7127 ;; Includes:
7128 ;; - PTEST
7129 ;; -------------------------------------------------------------------------
7130
7131 ;; Branch based on predicate equality or inequality.
7132 (define_expand "cbranch<mode>4"
7133   [(set (pc)
7134         (if_then_else
7135           (match_operator 0 "aarch64_equality_operator"
7136             [(match_operand:PRED_ALL 1 "register_operand")
7137              (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")])
7138           (label_ref (match_operand 3 ""))
7139           (pc)))]
7140   ""
7141   {
7142     rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all (<data_bytes>));
7143     rtx cast_ptrue = gen_lowpart (<MODE>mode, ptrue);
7144     rtx ptrue_flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode);
7145     rtx pred;
7146     if (operands[2] == CONST0_RTX (<MODE>mode))
7147       pred = operands[1];
7148     else
7149       {
7150         pred = gen_reg_rtx (<MODE>mode);
7151         emit_insn (gen_aarch64_pred_xor<mode>_z (pred, cast_ptrue, operands[1],
7152                                                  operands[2]));
7153       }
7154     emit_insn (gen_aarch64_ptest<mode> (ptrue, cast_ptrue, ptrue_flag, pred));
7155     operands[1] = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
7156     operands[2] = const0_rtx;
7157   }
7158 )
7159
7160 ;; See "Description of UNSPEC_PTEST" above for details.
7161 (define_insn "aarch64_ptest<mode>"
7162   [(set (reg:CC_NZC CC_REGNUM)
7163         (unspec:CC_NZC [(match_operand:VNx16BI 0 "register_operand" "Upa")
7164                         (match_operand 1)
7165                         (match_operand:SI 2 "aarch64_sve_ptrue_flag")
7166                         (match_operand:PRED_ALL 3 "register_operand" "Upa")]
7167                        UNSPEC_PTEST))]
7168   "TARGET_SVE"
7169   "ptest\t%0, %3.b"
7170 )
7171
7172 ;; =========================================================================
7173 ;; == Reductions
7174 ;; =========================================================================
7175
7176 ;; -------------------------------------------------------------------------
7177 ;; ---- [INT,FP] Conditional reductions
7178 ;; -------------------------------------------------------------------------
7179 ;; Includes:
7180 ;; - CLASTA
7181 ;; - CLASTB
7182 ;; -------------------------------------------------------------------------
7183
7184 ;; Set operand 0 to the last active element in operand 3, or to tied
7185 ;; operand 1 if no elements are active.
7186 (define_insn "@fold_extract_<last_op>_<mode>"
7187   [(set (match_operand:<VEL> 0 "register_operand" "=?r, w")
7188         (unspec:<VEL>
7189           [(match_operand:<VEL> 1 "register_operand" "0, 0")
7190            (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
7191            (match_operand:SVE_FULL 3 "register_operand" "w, w")]
7192           CLAST))]
7193   "TARGET_SVE"
7194   "@
7195    clast<ab>\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype>
7196    clast<ab>\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>"
7197 )
7198
7199 (define_insn "@aarch64_fold_extract_vector_<last_op>_<mode>"
7200   [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w")
7201         (unspec:SVE_FULL
7202           [(match_operand:SVE_FULL 1 "register_operand" "0, w")
7203            (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
7204            (match_operand:SVE_FULL 3 "register_operand" "w, w")]
7205           CLAST))]
7206   "TARGET_SVE"
7207   "@
7208    clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>
7209    movprfx\t%0, %1\;clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>"
7210 )
7211
7212 ;; -------------------------------------------------------------------------
7213 ;; ---- [INT] Tree reductions
7214 ;; -------------------------------------------------------------------------
7215 ;; Includes:
7216 ;; - ANDV
7217 ;; - EORV
7218 ;; - ORV
7219 ;; - SADDV
7220 ;; - SMAXV
7221 ;; - SMINV
7222 ;; - UADDV
7223 ;; - UMAXV
7224 ;; - UMINV
7225 ;; -------------------------------------------------------------------------
7226
7227 ;; Unpredicated integer add reduction.
7228 (define_expand "reduc_plus_scal_<mode>"
7229   [(match_operand:<VEL> 0 "register_operand")
7230    (match_operand:SVE_FULL_I 1 "register_operand")]
7231   "TARGET_SVE"
7232   {
7233     rtx pred = aarch64_ptrue_reg (<VPRED>mode);
7234     rtx tmp = <VEL>mode == DImode ? operands[0] : gen_reg_rtx (DImode);
7235     emit_insn (gen_aarch64_pred_reduc_uadd_<mode> (tmp, pred, operands[1]));
7236     if (tmp != operands[0])
7237       emit_move_insn (operands[0], gen_lowpart (<VEL>mode, tmp));
7238     DONE;
7239   }
7240 )
7241
7242 ;; Predicated integer add reduction.  The result is always 64-bits.
7243 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
7244   [(set (match_operand:DI 0 "register_operand" "=w")
7245         (unspec:DI [(match_operand:<VPRED> 1 "register_operand" "Upl")
7246                     (match_operand:SVE_FULL_I 2 "register_operand" "w")]
7247                    SVE_INT_ADDV))]
7248   "TARGET_SVE && <max_elem_bits> >= <elem_bits>"
7249   "<su>addv\t%d0, %1, %2.<Vetype>"
7250 )
7251
7252 ;; Unpredicated integer reductions.
7253 (define_expand "reduc_<optab>_scal_<mode>"
7254   [(set (match_operand:<VEL> 0 "register_operand")
7255         (unspec:<VEL> [(match_dup 2)
7256                        (match_operand:SVE_FULL_I 1 "register_operand")]
7257                       SVE_INT_REDUCTION))]
7258   "TARGET_SVE"
7259   {
7260     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
7261   }
7262 )
7263
7264 ;; Predicated integer reductions.
7265 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
7266   [(set (match_operand:<VEL> 0 "register_operand" "=w")
7267         (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
7268                        (match_operand:SVE_FULL_I 2 "register_operand" "w")]
7269                       SVE_INT_REDUCTION))]
7270   "TARGET_SVE"
7271   "<sve_int_op>\t%<Vetype>0, %1, %2.<Vetype>"
7272 )
7273
7274 ;; -------------------------------------------------------------------------
7275 ;; ---- [FP] Tree reductions
7276 ;; -------------------------------------------------------------------------
7277 ;; Includes:
7278 ;; - FADDV
7279 ;; - FMAXNMV
7280 ;; - FMAXV
7281 ;; - FMINNMV
7282 ;; - FMINV
7283 ;; -------------------------------------------------------------------------
7284
7285 ;; Unpredicated floating-point tree reductions.
7286 (define_expand "reduc_<optab>_scal_<mode>"
7287   [(set (match_operand:<VEL> 0 "register_operand")
7288         (unspec:<VEL> [(match_dup 2)
7289                        (match_operand:SVE_FULL_F 1 "register_operand")]
7290                       SVE_FP_REDUCTION))]
7291   "TARGET_SVE"
7292   {
7293     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
7294   }
7295 )
7296
7297 ;; Predicated floating-point tree reductions.
7298 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
7299   [(set (match_operand:<VEL> 0 "register_operand" "=w")
7300         (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
7301                        (match_operand:SVE_FULL_F 2 "register_operand" "w")]
7302                       SVE_FP_REDUCTION))]
7303   "TARGET_SVE"
7304   "<sve_fp_op>\t%<Vetype>0, %1, %2.<Vetype>"
7305 )
7306
7307 ;; -------------------------------------------------------------------------
7308 ;; ---- [FP] Left-to-right reductions
7309 ;; -------------------------------------------------------------------------
7310 ;; Includes:
7311 ;; - FADDA
7312 ;; -------------------------------------------------------------------------
7313
7314 ;; Unpredicated in-order FP reductions.
7315 (define_expand "fold_left_plus_<mode>"
7316   [(set (match_operand:<VEL> 0 "register_operand")
7317         (unspec:<VEL> [(match_dup 3)
7318                        (match_operand:<VEL> 1 "register_operand")
7319                        (match_operand:SVE_FULL_F 2 "register_operand")]
7320                       UNSPEC_FADDA))]
7321   "TARGET_SVE"
7322   {
7323     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
7324   }
7325 )
7326
7327 ;; Predicated in-order FP reductions.
7328 (define_insn "mask_fold_left_plus_<mode>"
7329   [(set (match_operand:<VEL> 0 "register_operand" "=w")
7330         (unspec:<VEL> [(match_operand:<VPRED> 3 "register_operand" "Upl")
7331                        (match_operand:<VEL> 1 "register_operand" "0")
7332                        (match_operand:SVE_FULL_F 2 "register_operand" "w")]
7333                       UNSPEC_FADDA))]
7334   "TARGET_SVE"
7335   "fadda\t%<Vetype>0, %3, %<Vetype>0, %2.<Vetype>"
7336 )
7337
7338 ;; =========================================================================
7339 ;; == Permutes
7340 ;; =========================================================================
7341
7342 ;; -------------------------------------------------------------------------
7343 ;; ---- [INT,FP] General permutes
7344 ;; -------------------------------------------------------------------------
7345 ;; Includes:
7346 ;; - TBL
7347 ;; -------------------------------------------------------------------------
7348
7349 (define_expand "vec_perm<mode>"
7350   [(match_operand:SVE_FULL 0 "register_operand")
7351    (match_operand:SVE_FULL 1 "register_operand")
7352    (match_operand:SVE_FULL 2 "register_operand")
7353    (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")]
7354   "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
7355   {
7356     aarch64_expand_sve_vec_perm (operands[0], operands[1],
7357                                  operands[2], operands[3]);
7358     DONE;
7359   }
7360 )
7361
7362 (define_insn "@aarch64_sve_tbl<mode>"
7363   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
7364         (unspec:SVE_FULL
7365           [(match_operand:SVE_FULL 1 "register_operand" "w")
7366            (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
7367           UNSPEC_TBL))]
7368   "TARGET_SVE"
7369   "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
7370 )
7371
7372 ;; -------------------------------------------------------------------------
7373 ;; ---- [INT,FP] Special-purpose unary permutes
7374 ;; -------------------------------------------------------------------------
7375 ;; Includes:
7376 ;; - COMPACT
7377 ;; - DUP
7378 ;; - REV
7379 ;; -------------------------------------------------------------------------
7380
7381 ;; Compact active elements and pad with zeros.
7382 (define_insn "@aarch64_sve_compact<mode>"
7383   [(set (match_operand:SVE_FULL_SD 0 "register_operand" "=w")
7384         (unspec:SVE_FULL_SD
7385           [(match_operand:<VPRED> 1 "register_operand" "Upl")
7386            (match_operand:SVE_FULL_SD 2 "register_operand" "w")]
7387           UNSPEC_SVE_COMPACT))]
7388   "TARGET_SVE"
7389   "compact\t%0.<Vetype>, %1, %2.<Vetype>"
7390 )
7391
7392 ;; Duplicate one element of a vector.
7393 (define_insn "@aarch64_sve_dup_lane<mode>"
7394   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
7395         (vec_duplicate:SVE_FULL
7396           (vec_select:<VEL>
7397             (match_operand:SVE_FULL 1 "register_operand" "w")
7398             (parallel [(match_operand:SI 2 "const_int_operand")]))))]
7399   "TARGET_SVE
7400    && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 63)"
7401   "dup\t%0.<Vetype>, %1.<Vetype>[%2]"
7402 )
7403
7404 ;; Use DUP.Q to duplicate a 128-bit segment of a register.
7405 ;;
7406 ;; The vec_select:<V128> sets memory lane number N of the V128 to lane
7407 ;; number op2 + N of op1.  (We don't need to distinguish between memory
7408 ;; and architectural register lane numbering for op1 or op0, since the
7409 ;; two numbering schemes are the same for SVE.)
7410 ;;
7411 ;; The vec_duplicate:SVE_FULL then copies memory lane number N of the
7412 ;; V128 (and thus lane number op2 + N of op1) to lane numbers N + I * STEP
7413 ;; of op0.  We therefore get the correct result for both endiannesses.
7414 ;;
7415 ;; The wrinkle is that for big-endian V128 registers, memory lane numbering
7416 ;; is in the opposite order to architectural register lane numbering.
7417 ;; Thus if we were to do this operation via a V128 temporary register,
7418 ;; the vec_select and vec_duplicate would both involve a reverse operation
7419 ;; for big-endian targets.  In this fused pattern the two reverses cancel
7420 ;; each other out.
7421 (define_insn "@aarch64_sve_dupq_lane<mode>"
7422   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
7423         (vec_duplicate:SVE_FULL
7424           (vec_select:<V128>
7425             (match_operand:SVE_FULL 1 "register_operand" "w")
7426             (match_operand 2 "ascending_int_parallel"))))]
7427   "TARGET_SVE
7428    && (INTVAL (XVECEXP (operands[2], 0, 0))
7429        * GET_MODE_SIZE (<VEL>mode)) % 16 == 0
7430    && IN_RANGE (INTVAL (XVECEXP (operands[2], 0, 0))
7431                 * GET_MODE_SIZE (<VEL>mode), 0, 63)"
7432   {
7433     unsigned int byte = (INTVAL (XVECEXP (operands[2], 0, 0))
7434                          * GET_MODE_SIZE (<VEL>mode));
7435     operands[2] = gen_int_mode (byte / 16, DImode);
7436     return "dup\t%0.q, %1.q[%2]";
7437   }
7438 )
7439
7440 ;; Reverse the order of elements within a full vector.
7441 (define_insn "@aarch64_sve_rev<mode>"
7442   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
7443         (unspec:SVE_FULL
7444           [(match_operand:SVE_FULL 1 "register_operand" "w")]
7445           UNSPEC_REV))]
7446   "TARGET_SVE"
7447   "rev\t%0.<Vetype>, %1.<Vetype>")
7448
7449 ;; -------------------------------------------------------------------------
7450 ;; ---- [INT,FP] Special-purpose binary permutes
7451 ;; -------------------------------------------------------------------------
7452 ;; Includes:
7453 ;; - SPLICE
7454 ;; - TRN1
7455 ;; - TRN2
7456 ;; - UZP1
7457 ;; - UZP2
7458 ;; - ZIP1
7459 ;; - ZIP2
7460 ;; -------------------------------------------------------------------------
7461
7462 ;; Like EXT, but start at the first active element.
7463 (define_insn "@aarch64_sve_splice<mode>"
7464   [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w")
7465         (unspec:SVE_FULL
7466           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7467            (match_operand:SVE_FULL 2 "register_operand" "0, w")
7468            (match_operand:SVE_FULL 3 "register_operand" "w, w")]
7469           UNSPEC_SVE_SPLICE))]
7470   "TARGET_SVE"
7471   "@
7472    splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype>
7473    movprfx\t%0, %2\;splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype>"
7474   [(set_attr "movprfx" "*, yes")]
7475 )
7476
7477 ;; Permutes that take half the elements from one vector and half the
7478 ;; elements from the other.
7479 (define_insn "@aarch64_sve_<perm_insn><mode>"
7480   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
7481         (unspec:SVE_FULL
7482           [(match_operand:SVE_FULL 1 "register_operand" "w")
7483            (match_operand:SVE_FULL 2 "register_operand" "w")]
7484           PERMUTE))]
7485   "TARGET_SVE"
7486   "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
7487 )
7488
7489 ;; Concatenate two vectors and extract a subvector.  Note that the
7490 ;; immediate (third) operand is the lane index not the byte index.
7491 (define_insn "@aarch64_sve_ext<mode>"
7492   [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w")
7493         (unspec:SVE_FULL
7494           [(match_operand:SVE_FULL 1 "register_operand" "0, w")
7495            (match_operand:SVE_FULL 2 "register_operand" "w, w")
7496            (match_operand:SI 3 "const_int_operand")]
7497           UNSPEC_EXT))]
7498   "TARGET_SVE
7499    && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode), 0, 255)"
7500   {
7501     operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode));
7502     return (which_alternative == 0
7503             ? "ext\\t%0.b, %0.b, %2.b, #%3"
7504             : "movprfx\t%0, %1\;ext\\t%0.b, %0.b, %2.b, #%3");
7505   }
7506   [(set_attr "movprfx" "*,yes")]
7507 )
7508
7509 ;; -------------------------------------------------------------------------
7510 ;; ---- [PRED] Special-purpose unary permutes
7511 ;; -------------------------------------------------------------------------
7512 ;; Includes:
7513 ;; - REV
7514 ;; -------------------------------------------------------------------------
7515
7516 (define_insn "@aarch64_sve_rev<mode>"
7517   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
7518         (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")]
7519                          UNSPEC_REV))]
7520   "TARGET_SVE"
7521   "rev\t%0.<Vetype>, %1.<Vetype>")
7522
7523 ;; -------------------------------------------------------------------------
7524 ;; ---- [PRED] Special-purpose binary permutes
7525 ;; -------------------------------------------------------------------------
7526 ;; Includes:
7527 ;; - TRN1
7528 ;; - TRN2
7529 ;; - UZP1
7530 ;; - UZP2
7531 ;; - ZIP1
7532 ;; - ZIP2
7533 ;; -------------------------------------------------------------------------
7534
7535 ;; Permutes that take half the elements from one vector and half the
7536 ;; elements from the other.
7537 (define_insn "@aarch64_sve_<perm_insn><mode>"
7538   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
7539         (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")
7540                           (match_operand:PRED_ALL 2 "register_operand" "Upa")]
7541                          PERMUTE))]
7542   "TARGET_SVE"
7543   "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
7544 )
7545
7546 ;; =========================================================================
7547 ;; == Conversions
7548 ;; =========================================================================
7549
7550 ;; -------------------------------------------------------------------------
7551 ;; ---- [INT<-INT] Packs
7552 ;; -------------------------------------------------------------------------
7553 ;; Includes:
7554 ;; - UZP1
7555 ;; -------------------------------------------------------------------------
7556
7557 ;; Integer pack.  Use UZP1 on the narrower type, which discards
7558 ;; the high part of each wide element.
7559 (define_insn "vec_pack_trunc_<Vwide>"
7560   [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
7561         (unspec:SVE_FULL_BHSI
7562           [(match_operand:<VWIDE> 1 "register_operand" "w")
7563            (match_operand:<VWIDE> 2 "register_operand" "w")]
7564           UNSPEC_PACK))]
7565   "TARGET_SVE"
7566   "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
7567 )
7568
7569 ;; -------------------------------------------------------------------------
7570 ;; ---- [INT<-INT] Unpacks
7571 ;; -------------------------------------------------------------------------
7572 ;; Includes:
7573 ;; - SUNPKHI
7574 ;; - SUNPKLO
7575 ;; - UUNPKHI
7576 ;; - UUNPKLO
7577 ;; -------------------------------------------------------------------------
7578
7579 ;; Unpack the low or high half of a vector, where "high" refers to
7580 ;; the low-numbered lanes for big-endian and the high-numbered lanes
7581 ;; for little-endian.
7582 (define_expand "vec_unpack<su>_<perm_hilo>_<SVE_FULL_BHSI:mode>"
7583   [(match_operand:<VWIDE> 0 "register_operand")
7584    (unspec:<VWIDE>
7585      [(match_operand:SVE_FULL_BHSI 1 "register_operand")] UNPACK)]
7586   "TARGET_SVE"
7587   {
7588     emit_insn ((<hi_lanes_optab>
7589                 ? gen_aarch64_sve_<su>unpkhi_<SVE_FULL_BHSI:mode>
7590                 : gen_aarch64_sve_<su>unpklo_<SVE_FULL_BHSI:mode>)
7591                (operands[0], operands[1]));
7592     DONE;
7593   }
7594 )
7595
7596 (define_insn "@aarch64_sve_<su>unpk<perm_hilo>_<SVE_FULL_BHSI:mode>"
7597   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
7598         (unspec:<VWIDE>
7599           [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")]
7600           UNPACK))]
7601   "TARGET_SVE"
7602   "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
7603 )
7604
7605 ;; -------------------------------------------------------------------------
7606 ;; ---- [INT<-FP] Conversions
7607 ;; -------------------------------------------------------------------------
7608 ;; Includes:
7609 ;; - FCVTZS
7610 ;; - FCVTZU
7611 ;; -------------------------------------------------------------------------
7612
7613 ;; Unpredicated conversion of floats to integers of the same size (HF to HI,
7614 ;; SF to SI or DF to DI).
7615 (define_expand "<optab><mode><v_int_equiv>2"
7616   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
7617         (unspec:<V_INT_EQUIV>
7618           [(match_dup 2)
7619            (const_int SVE_RELAXED_GP)
7620            (match_operand:SVE_FULL_F 1 "register_operand")]
7621           SVE_COND_FCVTI))]
7622   "TARGET_SVE"
7623   {
7624     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
7625   }
7626 )
7627
7628 ;; Predicated float-to-integer conversion, either to the same width or wider.
7629 (define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
7630   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
7631         (unspec:SVE_FULL_HSDI
7632           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl")
7633            (match_operand:SI 3 "aarch64_sve_gp_strictness")
7634            (match_operand:SVE_FULL_F 2 "register_operand" "w")]
7635           SVE_COND_FCVTI))]
7636   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
7637   "fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>"
7638 )
7639
7640 ;; Predicated narrowing float-to-integer conversion.
7641 (define_insn "@aarch64_sve_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
7642   [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w")
7643         (unspec:VNx4SI_ONLY
7644           [(match_operand:VNx2BI 1 "register_operand" "Upl")
7645            (match_operand:SI 3 "aarch64_sve_gp_strictness")
7646            (match_operand:VNx2DF_ONLY 2 "register_operand" "w")]
7647           SVE_COND_FCVTI))]
7648   "TARGET_SVE"
7649   "fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>"
7650 )
7651
7652 ;; Predicated float-to-integer conversion with merging, either to the same
7653 ;; width or wider.
7654 (define_expand "@cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
7655   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
7656         (unspec:SVE_FULL_HSDI
7657           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
7658            (unspec:SVE_FULL_HSDI
7659              [(match_dup 1)
7660               (const_int SVE_STRICT_GP)
7661               (match_operand:SVE_FULL_F 2 "register_operand")]
7662              SVE_COND_FCVTI)
7663            (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero")]
7664           UNSPEC_SEL))]
7665   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
7666 )
7667
7668 ;; The first alternative doesn't need the earlyclobber, but the only case
7669 ;; it would help is the uninteresting one in which operands 2 and 3 are
7670 ;; the same register (despite having different modes).  Making all the
7671 ;; alternatives earlyclobber makes things more consistent for the
7672 ;; register allocator.
7673 (define_insn_and_rewrite "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
7674   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w, ?&w")
7675         (unspec:SVE_FULL_HSDI
7676           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
7677            (unspec:SVE_FULL_HSDI
7678              [(match_operand 4)
7679               (match_operand:SI 5 "aarch64_sve_gp_strictness")
7680               (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
7681              SVE_COND_FCVTI)
7682            (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
7683           UNSPEC_SEL))]
7684   "TARGET_SVE
7685    && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>
7686    && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
7687   "@
7688    fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
7689    movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
7690    movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>"
7691   "&& !rtx_equal_p (operands[1], operands[4])"
7692   {
7693     operands[4] = copy_rtx (operands[1]);
7694   }
7695   [(set_attr "movprfx" "*,yes,yes")]
7696 )
7697
7698 ;; Predicated narrowing float-to-integer conversion with merging.
7699 (define_expand "@cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
7700   [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
7701         (unspec:VNx4SI_ONLY
7702           [(match_operand:VNx2BI 1 "register_operand")
7703            (unspec:VNx4SI_ONLY
7704              [(match_dup 1)
7705               (const_int SVE_STRICT_GP)
7706               (match_operand:VNx2DF_ONLY 2 "register_operand")]
7707              SVE_COND_FCVTI)
7708            (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")]
7709           UNSPEC_SEL))]
7710   "TARGET_SVE"
7711 )
7712
7713 (define_insn "*cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
7714   [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=&w, &w, ?&w")
7715         (unspec:VNx4SI_ONLY
7716           [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl, Upl")
7717            (unspec:VNx4SI_ONLY
7718              [(match_dup 1)
7719               (match_operand:SI 4 "aarch64_sve_gp_strictness")
7720               (match_operand:VNx2DF_ONLY 2 "register_operand" "w, w, w")]
7721              SVE_COND_FCVTI)
7722            (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
7723           UNSPEC_SEL))]
7724   "TARGET_SVE"
7725   "@
7726    fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
7727    movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
7728    movprfx\t%0, %3\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>"
7729   [(set_attr "movprfx" "*,yes,yes")]
7730 )
7731
7732 ;; -------------------------------------------------------------------------
7733 ;; ---- [INT<-FP] Packs
7734 ;; -------------------------------------------------------------------------
7735 ;; The patterns in this section are synthetic.
7736 ;; -------------------------------------------------------------------------
7737
7738 ;; Convert two vectors of DF to SI and pack the results into a single vector.
7739 (define_expand "vec_pack_<su>fix_trunc_vnx2df"
7740   [(set (match_dup 4)
7741         (unspec:VNx4SI
7742           [(match_dup 3)
7743            (const_int SVE_RELAXED_GP)
7744            (match_operand:VNx2DF 1 "register_operand")]
7745           SVE_COND_FCVTI))
7746    (set (match_dup 5)
7747         (unspec:VNx4SI
7748           [(match_dup 3)
7749            (const_int SVE_RELAXED_GP)
7750            (match_operand:VNx2DF 2 "register_operand")]
7751           SVE_COND_FCVTI))
7752    (set (match_operand:VNx4SI 0 "register_operand")
7753         (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
7754   "TARGET_SVE"
7755   {
7756     operands[3] = aarch64_ptrue_reg (VNx2BImode);
7757     operands[4] = gen_reg_rtx (VNx4SImode);
7758     operands[5] = gen_reg_rtx (VNx4SImode);
7759   }
7760 )
7761
7762 ;; -------------------------------------------------------------------------
7763 ;; ---- [INT<-FP] Unpacks
7764 ;; -------------------------------------------------------------------------
7765 ;; No patterns here yet!
7766 ;; -------------------------------------------------------------------------
7767
7768 ;; -------------------------------------------------------------------------
7769 ;; ---- [FP<-INT] Conversions
7770 ;; -------------------------------------------------------------------------
7771 ;; Includes:
7772 ;; - SCVTF
7773 ;; - UCVTF
7774 ;; -------------------------------------------------------------------------
7775
7776 ;; Unpredicated conversion of integers to floats of the same size
7777 ;; (HI to HF, SI to SF or DI to DF).
7778 (define_expand "<optab><v_int_equiv><mode>2"
7779   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7780         (unspec:SVE_FULL_F
7781           [(match_dup 2)
7782            (const_int SVE_RELAXED_GP)
7783            (match_operand:<V_INT_EQUIV> 1 "register_operand")]
7784           SVE_COND_ICVTF))]
7785   "TARGET_SVE"
7786   {
7787     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
7788   }
7789 )
7790
7791 ;; Predicated integer-to-float conversion, either to the same width or
7792 ;; narrower.
7793 (define_insn "@aarch64_sve_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
7794   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
7795         (unspec:SVE_FULL_F
7796           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl")
7797            (match_operand:SI 3 "aarch64_sve_gp_strictness")
7798            (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")]
7799           SVE_COND_ICVTF))]
7800   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
7801   "<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
7802 )
7803
7804 ;; Predicated widening integer-to-float conversion.
7805 (define_insn "@aarch64_sve_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
7806   [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w")
7807         (unspec:VNx2DF_ONLY
7808           [(match_operand:VNx2BI 1 "register_operand" "Upl")
7809            (match_operand:SI 3 "aarch64_sve_gp_strictness")
7810            (match_operand:VNx4SI_ONLY 2 "register_operand" "w")]
7811           SVE_COND_ICVTF))]
7812   "TARGET_SVE"
7813   "<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>"
7814 )
7815
7816 ;; Predicated integer-to-float conversion with merging, either to the same
7817 ;; width or narrower.
7818 (define_expand "@cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
7819   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7820         (unspec:SVE_FULL_F
7821           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
7822            (unspec:SVE_FULL_F
7823              [(match_dup 1)
7824               (const_int SVE_STRICT_GP)
7825               (match_operand:SVE_FULL_HSDI 2 "register_operand")]
7826              SVE_COND_ICVTF)
7827            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
7828           UNSPEC_SEL))]
7829   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
7830 )
7831
7832 ;; The first alternative doesn't need the earlyclobber, but the only case
7833 ;; it would help is the uninteresting one in which operands 2 and 3 are
7834 ;; the same register (despite having different modes).  Making all the
7835 ;; alternatives earlyclobber makes things more consistent for the
7836 ;; register allocator.
7837 (define_insn_and_rewrite "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
7838   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, ?&w")
7839         (unspec:SVE_FULL_F
7840           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
7841            (unspec:SVE_FULL_F
7842              [(match_operand 4)
7843               (match_operand:SI 5 "aarch64_sve_gp_strictness")
7844               (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")]
7845              SVE_COND_ICVTF)
7846            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
7847           UNSPEC_SEL))]
7848   "TARGET_SVE
7849    && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>
7850    && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
7851   "@
7852    <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
7853    movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
7854    movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
7855   "&& !rtx_equal_p (operands[1], operands[4])"
7856   {
7857     operands[4] = copy_rtx (operands[1]);
7858   }
7859   [(set_attr "movprfx" "*,yes,yes")]
7860 )
7861
7862 ;; Predicated widening integer-to-float conversion with merging.
7863 (define_expand "@cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
7864   [(set (match_operand:VNx2DF_ONLY 0 "register_operand")
7865         (unspec:VNx2DF_ONLY
7866           [(match_operand:VNx2BI 1 "register_operand")
7867            (unspec:VNx2DF_ONLY
7868              [(match_dup 1)
7869               (const_int SVE_STRICT_GP)
7870               (match_operand:VNx4SI_ONLY 2 "register_operand")]
7871              SVE_COND_ICVTF)
7872            (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero")]
7873           UNSPEC_SEL))]
7874   "TARGET_SVE"
7875 )
7876
7877 (define_insn "*cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
7878   [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w, ?&w, ?&w")
7879         (unspec:VNx2DF_ONLY
7880           [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl, Upl")
7881            (unspec:VNx2DF_ONLY
7882              [(match_dup 1)
7883               (match_operand:SI 4 "aarch64_sve_gp_strictness")
7884               (match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w")]
7885              SVE_COND_ICVTF)
7886            (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
7887           UNSPEC_SEL))]
7888   "TARGET_SVE"
7889   "@
7890    <su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
7891    movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
7892    movprfx\t%0, %3\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>"
7893   [(set_attr "movprfx" "*,yes,yes")]
7894 )
7895
7896 ;; -------------------------------------------------------------------------
7897 ;; ---- [FP<-INT] Packs
7898 ;; -------------------------------------------------------------------------
7899 ;; No patterns here yet!
7900 ;; -------------------------------------------------------------------------
7901
7902 ;; -------------------------------------------------------------------------
7903 ;; ---- [FP<-INT] Unpacks
7904 ;; -------------------------------------------------------------------------
7905 ;; The patterns in this section are synthetic.
7906 ;; -------------------------------------------------------------------------
7907
7908 ;; Unpack one half of a VNx4SI to VNx2DF.  First unpack from VNx4SI
7909 ;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the
7910 ;; unpacked VNx4SI to VNx2DF.
7911 (define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si"
7912   [(match_operand:VNx2DF 0 "register_operand")
7913    (FLOATUORS:VNx2DF
7914      (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")]
7915                     UNPACK_UNSIGNED))]
7916   "TARGET_SVE"
7917   {
7918     /* Use ZIP to do the unpack, since we don't care about the upper halves
7919        and since it has the nice property of not needing any subregs.
7920        If using UUNPK* turns out to be preferable, we could model it as
7921        a ZIP whose first operand is zero.  */
7922     rtx temp = gen_reg_rtx (VNx4SImode);
7923     emit_insn ((<hi_lanes_optab>
7924                 ? gen_aarch64_sve_zip2vnx4si
7925                 : gen_aarch64_sve_zip1vnx4si)
7926                (temp, operands[1], operands[1]));
7927     rtx ptrue = aarch64_ptrue_reg (VNx2BImode);
7928     rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
7929     emit_insn (gen_aarch64_sve_<FLOATUORS:optab>_extendvnx4sivnx2df
7930                (operands[0], ptrue, temp, strictness));
7931     DONE;
7932   }
7933 )
7934
7935 ;; -------------------------------------------------------------------------
7936 ;; ---- [FP<-FP] Packs
7937 ;; -------------------------------------------------------------------------
7938 ;; Includes:
7939 ;; - FCVT
7940 ;; -------------------------------------------------------------------------
7941
7942 ;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack
7943 ;; the results into a single vector.
7944 (define_expand "vec_pack_trunc_<Vwide>"
7945   [(set (match_dup 4)
7946         (unspec:SVE_FULL_HSF
7947           [(match_dup 3)
7948            (const_int SVE_RELAXED_GP)
7949            (match_operand:<VWIDE> 1 "register_operand")]
7950           UNSPEC_COND_FCVT))
7951    (set (match_dup 5)
7952         (unspec:SVE_FULL_HSF
7953           [(match_dup 3)
7954            (const_int SVE_RELAXED_GP)
7955            (match_operand:<VWIDE> 2 "register_operand")]
7956           UNSPEC_COND_FCVT))
7957    (set (match_operand:SVE_FULL_HSF 0 "register_operand")
7958         (unspec:SVE_FULL_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
7959   "TARGET_SVE"
7960   {
7961     operands[3] = aarch64_ptrue_reg (<VWIDE_PRED>mode);
7962     operands[4] = gen_reg_rtx (<MODE>mode);
7963     operands[5] = gen_reg_rtx (<MODE>mode);
7964   }
7965 )
7966
7967 ;; Predicated float-to-float truncation.
7968 (define_insn "@aarch64_sve_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
7969   [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w")
7970         (unspec:SVE_FULL_HSF
7971           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl")
7972            (match_operand:SI 3 "aarch64_sve_gp_strictness")
7973            (match_operand:SVE_FULL_SDF 2 "register_operand" "w")]
7974           SVE_COND_FCVT))]
7975   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
7976   "fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>"
7977 )
7978
7979 ;; Predicated float-to-float truncation with merging.
7980 (define_expand "@cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
7981   [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
7982         (unspec:SVE_FULL_HSF
7983           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
7984            (unspec:SVE_FULL_HSF
7985              [(match_dup 1)
7986               (const_int SVE_STRICT_GP)
7987               (match_operand:SVE_FULL_SDF 2 "register_operand")]
7988              SVE_COND_FCVT)
7989            (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero")]
7990           UNSPEC_SEL))]
7991   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
7992 )
7993
7994 (define_insn "*cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
7995   [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w, ?&w")
7996         (unspec:SVE_FULL_HSF
7997           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl, Upl")
7998            (unspec:SVE_FULL_HSF
7999              [(match_dup 1)
8000               (match_operand:SI 4 "aarch64_sve_gp_strictness")
8001               (match_operand:SVE_FULL_SDF 2 "register_operand" "w, w, w")]
8002              SVE_COND_FCVT)
8003            (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
8004           UNSPEC_SEL))]
8005   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
8006   "@
8007    fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
8008    movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
8009    movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>"
8010   [(set_attr "movprfx" "*,yes,yes")]
8011 )
8012
8013 ;; -------------------------------------------------------------------------
8014 ;; ---- [FP<-FP] Unpacks
8015 ;; -------------------------------------------------------------------------
8016 ;; Includes:
8017 ;; - FCVT
8018 ;; -------------------------------------------------------------------------
8019
8020 ;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF.
8021 ;; First unpack the source without conversion, then float-convert the
8022 ;; unpacked source.
8023 (define_expand "vec_unpacks_<perm_hilo>_<mode>"
8024   [(match_operand:<VWIDE> 0 "register_operand")
8025    (unspec:SVE_FULL_HSF
8026      [(match_operand:SVE_FULL_HSF 1 "register_operand")]
8027      UNPACK_UNSIGNED)]
8028   "TARGET_SVE"
8029   {
8030     /* Use ZIP to do the unpack, since we don't care about the upper halves
8031        and since it has the nice property of not needing any subregs.
8032        If using UUNPK* turns out to be preferable, we could model it as
8033        a ZIP whose first operand is zero.  */
8034     rtx temp = gen_reg_rtx (<MODE>mode);
8035     emit_insn ((<hi_lanes_optab>
8036                 ? gen_aarch64_sve_zip2<mode>
8037                 : gen_aarch64_sve_zip1<mode>)
8038                 (temp, operands[1], operands[1]));
8039     rtx ptrue = aarch64_ptrue_reg (<VWIDE_PRED>mode);
8040     rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
8041     emit_insn (gen_aarch64_sve_fcvt_nontrunc<mode><Vwide>
8042                (operands[0], ptrue, temp, strictness));
8043     DONE;
8044   }
8045 )
8046
8047 ;; Predicated float-to-float extension.
8048 (define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
8049   [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
8050         (unspec:SVE_FULL_SDF
8051           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl")
8052            (match_operand:SI 3 "aarch64_sve_gp_strictness")
8053            (match_operand:SVE_FULL_HSF 2 "register_operand" "w")]
8054           SVE_COND_FCVT))]
8055   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
8056   "fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>"
8057 )
8058
8059 ;; Predicated float-to-float extension with merging.
8060 (define_expand "@cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
8061   [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
8062         (unspec:SVE_FULL_SDF
8063           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
8064            (unspec:SVE_FULL_SDF
8065              [(match_dup 1)
8066               (const_int SVE_STRICT_GP)
8067               (match_operand:SVE_FULL_HSF 2 "register_operand")]
8068              SVE_COND_FCVT)
8069            (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero")]
8070           UNSPEC_SEL))]
8071   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
8072 )
8073
8074 (define_insn "*cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
8075   [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w, ?&w, ?&w")
8076         (unspec:SVE_FULL_SDF
8077           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl, Upl")
8078            (unspec:SVE_FULL_SDF
8079              [(match_dup 1)
8080               (match_operand:SI 4 "aarch64_sve_gp_strictness")
8081               (match_operand:SVE_FULL_HSF 2 "register_operand" "w, w, w")]
8082              SVE_COND_FCVT)
8083            (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
8084           UNSPEC_SEL))]
8085   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
8086   "@
8087    fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
8088    movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
8089    movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>"
8090   [(set_attr "movprfx" "*,yes,yes")]
8091 )
8092
8093 ;; -------------------------------------------------------------------------
8094 ;; ---- [PRED<-PRED] Packs
8095 ;; -------------------------------------------------------------------------
8096 ;; Includes:
8097 ;; - UZP1
8098 ;; -------------------------------------------------------------------------
8099
8100 ;; Predicate pack.  Use UZP1 on the narrower type, which discards
8101 ;; the high part of each wide element.
8102 (define_insn "vec_pack_trunc_<Vwide>"
8103   [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa")
8104         (unspec:PRED_BHS
8105           [(match_operand:<VWIDE> 1 "register_operand" "Upa")
8106            (match_operand:<VWIDE> 2 "register_operand" "Upa")]
8107           UNSPEC_PACK))]
8108   "TARGET_SVE"
8109   "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
8110 )
8111
8112 ;; -------------------------------------------------------------------------
8113 ;; ---- [PRED<-PRED] Unpacks
8114 ;; -------------------------------------------------------------------------
8115 ;; Includes:
8116 ;; - PUNPKHI
8117 ;; - PUNPKLO
8118 ;; -------------------------------------------------------------------------
8119
8120 ;; Unpack the low or high half of a predicate, where "high" refers to
8121 ;; the low-numbered lanes for big-endian and the high-numbered lanes
8122 ;; for little-endian.
8123 (define_expand "vec_unpack<su>_<perm_hilo>_<mode>"
8124   [(match_operand:<VWIDE> 0 "register_operand")
8125    (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")]
8126                    UNPACK)]
8127   "TARGET_SVE"
8128   {
8129     emit_insn ((<hi_lanes_optab>
8130                 ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode>
8131                 : gen_aarch64_sve_punpklo_<PRED_BHS:mode>)
8132                (operands[0], operands[1]));
8133     DONE;
8134   }
8135 )
8136
8137 (define_insn "@aarch64_sve_punpk<perm_hilo>_<mode>"
8138   [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa")
8139         (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")]
8140                         UNPACK_UNSIGNED))]
8141   "TARGET_SVE"
8142   "punpk<perm_hilo>\t%0.h, %1.b"
8143 )
8144
8145 ;; =========================================================================
8146 ;; == Vector partitioning
8147 ;; =========================================================================
8148
8149 ;; -------------------------------------------------------------------------
8150 ;; ---- [PRED] Unary partitioning
8151 ;; -------------------------------------------------------------------------
8152 ;; Includes:
8153 ;; - BRKA
8154 ;; - BRKAS
8155 ;; - BRKB
8156 ;; - BRKBS
8157 ;; -------------------------------------------------------------------------
8158
8159 ;; Note that unlike most other instructions that have both merging and
8160 ;; zeroing forms, these instructions don't operate elementwise and so
8161 ;; don't fit the IFN_COND model.
8162 (define_insn "@aarch64_brk<brk_op>"
8163   [(set (match_operand:VNx16BI 0 "register_operand" "=Upa, Upa")
8164         (unspec:VNx16BI
8165           [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa")
8166            (match_operand:VNx16BI 2 "register_operand" "Upa, Upa")
8167            (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")]
8168           SVE_BRK_UNARY))]
8169   "TARGET_SVE"
8170   "@
8171    brk<brk_op>\t%0.b, %1/z, %2.b
8172    brk<brk_op>\t%0.b, %1/m, %2.b"
8173 )
8174
8175 ;; Same, but also producing a flags result.
8176 (define_insn "*aarch64_brk<brk_op>_cc"
8177   [(set (reg:CC_NZC CC_REGNUM)
8178         (unspec:CC_NZC
8179           [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa")
8180            (match_dup 1)
8181            (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8182            (unspec:VNx16BI
8183              [(match_dup 1)
8184               (match_operand:VNx16BI 2 "register_operand" "Upa, Upa")
8185               (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")]
8186              SVE_BRK_UNARY)]
8187           UNSPEC_PTEST))
8188    (set (match_operand:VNx16BI 0 "register_operand" "=Upa, Upa")
8189         (unspec:VNx16BI
8190           [(match_dup 1)
8191            (match_dup 2)
8192            (match_dup 3)]
8193           SVE_BRK_UNARY))]
8194   "TARGET_SVE"
8195   "@
8196    brk<brk_op>s\t%0.b, %1/z, %2.b
8197    brk<brk_op>s\t%0.b, %1/m, %2.b"
8198 )
8199
8200 ;; Same, but with only the flags result being interesting.
8201 (define_insn "*aarch64_brk<brk_op>_ptest"
8202   [(set (reg:CC_NZC CC_REGNUM)
8203         (unspec:CC_NZC
8204           [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa")
8205            (match_dup 1)
8206            (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8207            (unspec:VNx16BI
8208              [(match_dup 1)
8209               (match_operand:VNx16BI 2 "register_operand" "Upa, Upa")
8210               (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")]
8211              SVE_BRK_UNARY)]
8212           UNSPEC_PTEST))
8213    (clobber (match_scratch:VNx16BI 0 "=Upa, Upa"))]
8214   "TARGET_SVE"
8215   "@
8216    brk<brk_op>s\t%0.b, %1/z, %2.b
8217    brk<brk_op>s\t%0.b, %1/m, %2.b"
8218 )
8219
8220 ;; -------------------------------------------------------------------------
8221 ;; ---- [PRED] Binary partitioning
8222 ;; -------------------------------------------------------------------------
8223 ;; Includes:
8224 ;; - BRKN
8225 ;; - BRKNS
8226 ;; - BRKPA
8227 ;; - BRKPAS
8228 ;; - BRKPB
8229 ;; - BRKPBS
8230 ;; -------------------------------------------------------------------------
8231
8232 ;; Binary BRKs (BRKN, BRKPA, BRKPB).
8233 (define_insn "@aarch64_brk<brk_op>"
8234   [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
8235         (unspec:VNx16BI
8236           [(match_operand:VNx16BI 1 "register_operand" "Upa")
8237            (match_operand:VNx16BI 2 "register_operand" "Upa")
8238            (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")]
8239           SVE_BRK_BINARY))]
8240   "TARGET_SVE"
8241   "brk<brk_op>\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b"
8242 )
8243
8244 ;; Same, but also producing a flags result.
8245 (define_insn "*aarch64_brk<brk_op>_cc"
8246   [(set (reg:CC_NZC CC_REGNUM)
8247         (unspec:CC_NZC
8248           [(match_operand:VNx16BI 1 "register_operand" "Upa")
8249            (match_dup 1)
8250            (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8251            (unspec:VNx16BI
8252              [(match_dup 1)
8253               (match_operand:VNx16BI 2 "register_operand" "Upa")
8254               (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")]
8255              SVE_BRK_BINARY)]
8256           UNSPEC_PTEST))
8257    (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
8258         (unspec:VNx16BI
8259           [(match_dup 1)
8260            (match_dup 2)
8261            (match_dup 3)]
8262           SVE_BRK_BINARY))]
8263   "TARGET_SVE"
8264   "brk<brk_op>s\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b"
8265 )
8266
8267 ;; Same, but with only the flags result being interesting.
8268 (define_insn "*aarch64_brk<brk_op>_ptest"
8269   [(set (reg:CC_NZC CC_REGNUM)
8270         (unspec:CC_NZC
8271           [(match_operand:VNx16BI 1 "register_operand" "Upa")
8272            (match_dup 1)
8273            (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8274            (unspec:VNx16BI
8275              [(match_dup 1)
8276               (match_operand:VNx16BI 2 "register_operand" "Upa")
8277               (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")]
8278              SVE_BRK_BINARY)]
8279           UNSPEC_PTEST))
8280    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
8281   "TARGET_SVE"
8282   "brk<brk_op>s\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b"
8283 )
8284
8285 ;; -------------------------------------------------------------------------
8286 ;; ---- [PRED] Scalarization
8287 ;; -------------------------------------------------------------------------
8288 ;; Includes:
8289 ;; - PFIRST
8290 ;; - PNEXT
8291 ;; -------------------------------------------------------------------------
8292
8293 (define_insn "@aarch64_sve_<sve_pred_op><mode>"
8294   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8295         (unspec:PRED_ALL
8296           [(match_operand:PRED_ALL 1 "register_operand" "Upa")
8297            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8298            (match_operand:PRED_ALL 3 "register_operand" "0")]
8299           SVE_PITER))
8300    (clobber (reg:CC_NZC CC_REGNUM))]
8301   "TARGET_SVE && <max_elem_bits> >= <elem_bits>"
8302   "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
8303 )
8304
8305 ;; Same, but also producing a flags result.
8306 (define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_cc"
8307   [(set (reg:CC_NZC CC_REGNUM)
8308         (unspec:CC_NZC
8309           [(match_operand:VNx16BI 1 "register_operand" "Upa")
8310            (match_operand 2)
8311            (match_operand:SI 3 "aarch64_sve_ptrue_flag")
8312            (unspec:PRED_ALL
8313              [(match_operand 4)
8314               (match_operand:SI 5 "aarch64_sve_ptrue_flag")
8315               (match_operand:PRED_ALL 6 "register_operand" "0")]
8316              SVE_PITER)]
8317           UNSPEC_PTEST))
8318    (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8319         (unspec:PRED_ALL
8320           [(match_dup 4)
8321            (match_dup 5)
8322            (match_dup 6)]
8323           SVE_PITER))]
8324   "TARGET_SVE
8325    && <max_elem_bits> >= <elem_bits>
8326    && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])"
8327   "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
8328   "&& !rtx_equal_p (operands[2], operands[4])"
8329   {
8330     operands[4] = operands[2];
8331     operands[5] = operands[3];
8332   }
8333 )
8334
8335 ;; Same, but with only the flags result being interesting.
8336 (define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_ptest"
8337   [(set (reg:CC_NZC CC_REGNUM)
8338         (unspec:CC_NZC
8339           [(match_operand:VNx16BI 1 "register_operand" "Upa")
8340            (match_operand 2)
8341            (match_operand:SI 3 "aarch64_sve_ptrue_flag")
8342            (unspec:PRED_ALL
8343              [(match_operand 4)
8344               (match_operand:SI 5 "aarch64_sve_ptrue_flag")
8345               (match_operand:PRED_ALL 6 "register_operand" "0")]
8346              SVE_PITER)]
8347           UNSPEC_PTEST))
8348    (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
8349   "TARGET_SVE
8350    && <max_elem_bits> >= <elem_bits>
8351    && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])"
8352   "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
8353   "&& !rtx_equal_p (operands[2], operands[4])"
8354   {
8355     operands[4] = operands[2];
8356     operands[5] = operands[3];
8357   }
8358 )
8359
8360 ;; =========================================================================
8361 ;; == Counting elements
8362 ;; =========================================================================
8363
8364 ;; -------------------------------------------------------------------------
8365 ;; ---- [INT] Count elements in a pattern (scalar)
8366 ;; -------------------------------------------------------------------------
8367 ;; Includes:
8368 ;; - CNTB
8369 ;; - CNTD
8370 ;; - CNTH
8371 ;; - CNTW
8372 ;; -------------------------------------------------------------------------
8373
8374 ;; Count the number of elements in an svpattern.  Operand 1 is the pattern,
8375 ;; operand 2 is the number of elements that fit in a 128-bit block, and
8376 ;; operand 3 is a multiplier in the range [1, 16].
8377 ;;
8378 ;; Note that this pattern isn't used for SV_ALL (but would work for that too).
8379 (define_insn "aarch64_sve_cnt_pat"
8380   [(set (match_operand:DI 0 "register_operand" "=r")
8381         (zero_extend:DI
8382           (unspec:SI [(match_operand:DI 1 "const_int_operand")
8383                       (match_operand:DI 2 "const_int_operand")
8384                       (match_operand:DI 3 "const_int_operand")]
8385                      UNSPEC_SVE_CNT_PAT)))]
8386   "TARGET_SVE"
8387   {
8388     return aarch64_output_sve_cnt_pat_immediate ("cnt", "%x0", operands + 1);
8389   }
8390 )
8391
8392 ;; -------------------------------------------------------------------------
8393 ;; ---- [INT] Increment by the number of elements in a pattern (scalar)
8394 ;; -------------------------------------------------------------------------
8395 ;; Includes:
8396 ;; - INC
8397 ;; - SQINC
8398 ;; - UQINC
8399 ;; -------------------------------------------------------------------------
8400
8401 ;; Increment a DImode register by the number of elements in an svpattern.
8402 ;; See aarch64_sve_cnt_pat for the counting behavior.
8403 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8404   [(set (match_operand:DI 0 "register_operand" "=r")
8405         (ANY_PLUS:DI (zero_extend:DI
8406                        (unspec:SI [(match_operand:DI 2 "const_int_operand")
8407                                    (match_operand:DI 3 "const_int_operand")
8408                                    (match_operand:DI 4 "const_int_operand")]
8409                                   UNSPEC_SVE_CNT_PAT))
8410                      (match_operand:DI_ONLY 1 "register_operand" "0")))]
8411   "TARGET_SVE"
8412   {
8413     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0",
8414                                                  operands + 2);
8415   }
8416 )
8417
8418 ;; Increment an SImode register by the number of elements in an svpattern
8419 ;; using modular arithmetic.  See aarch64_sve_cnt_pat for the counting
8420 ;; behavior.
8421 (define_insn "*aarch64_sve_incsi_pat"
8422   [(set (match_operand:SI 0 "register_operand" "=r")
8423         (plus:SI (unspec:SI [(match_operand:DI 2 "const_int_operand")
8424                              (match_operand:DI 3 "const_int_operand")
8425                              (match_operand:DI 4 "const_int_operand")]
8426                             UNSPEC_SVE_CNT_PAT)
8427                  (match_operand:SI 1 "register_operand" "0")))]
8428   "TARGET_SVE"
8429   {
8430     return aarch64_output_sve_cnt_pat_immediate ("inc", "%x0", operands + 2);
8431   }
8432 )
8433
8434 ;; Increment an SImode register by the number of elements in an svpattern
8435 ;; using saturating arithmetic, extending the result to 64 bits.
8436 ;;
8437 ;; See aarch64_sve_cnt_pat for the counting behavior.
8438 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8439   [(set (match_operand:DI 0 "register_operand" "=r")
8440         (<paired_extend>:DI
8441           (SAT_PLUS:SI
8442             (unspec:SI [(match_operand:DI 2 "const_int_operand")
8443                         (match_operand:DI 3 "const_int_operand")
8444                         (match_operand:DI 4 "const_int_operand")]
8445                        UNSPEC_SVE_CNT_PAT)
8446             (match_operand:SI_ONLY 1 "register_operand" "0"))))]
8447   "TARGET_SVE"
8448   {
8449     const char *registers = (<CODE> == SS_PLUS ? "%x0, %w0" : "%w0");
8450     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers,
8451                                                  operands + 2);
8452   }
8453 )
8454
8455 ;; -------------------------------------------------------------------------
8456 ;; ---- [INT] Increment by the number of elements in a pattern (vector)
8457 ;; -------------------------------------------------------------------------
8458 ;; Includes:
8459 ;; - INC
8460 ;; - SQINC
8461 ;; - UQINC
8462 ;; -------------------------------------------------------------------------
8463
8464 ;; Increment a vector of DIs by the number of elements in an svpattern.
8465 ;; See aarch64_sve_cnt_pat for the counting behavior.
8466 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8467   [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
8468         (ANY_PLUS:VNx2DI
8469           (vec_duplicate:VNx2DI
8470             (zero_extend:DI
8471               (unspec:SI [(match_operand:DI 2 "const_int_operand")
8472                           (match_operand:DI 3 "const_int_operand")
8473                           (match_operand:DI 4 "const_int_operand")]
8474                          UNSPEC_SVE_CNT_PAT)))
8475           (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))]
8476   "TARGET_SVE"
8477   {
8478     if (which_alternative == 1)
8479       output_asm_insn ("movprfx\t%0, %1", operands);
8480     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8481                                                  operands + 2);
8482   }
8483   [(set_attr "movprfx" "*,yes")]
8484 )
8485
8486 ;; Increment a vector of SIs by the number of elements in an svpattern.
8487 ;; See aarch64_sve_cnt_pat for the counting behavior.
8488 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8489   [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
8490         (ANY_PLUS:VNx4SI
8491           (vec_duplicate:VNx4SI
8492             (unspec:SI [(match_operand:DI 2 "const_int_operand")
8493                         (match_operand:DI 3 "const_int_operand")
8494                         (match_operand:DI 4 "const_int_operand")]
8495                        UNSPEC_SVE_CNT_PAT))
8496           (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
8497   "TARGET_SVE"
8498   {
8499     if (which_alternative == 1)
8500       output_asm_insn ("movprfx\t%0, %1", operands);
8501     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8502                                                  operands + 2);
8503   }
8504   [(set_attr "movprfx" "*,yes")]
8505 )
8506
8507 ;; Increment a vector of HIs by the number of elements in an svpattern.
8508 ;; See aarch64_sve_cnt_pat for the counting behavior.
8509 (define_expand "@aarch64_sve_<inc_dec><mode>_pat"
8510   [(set (match_operand:VNx8HI 0 "register_operand")
8511         (ANY_PLUS:VNx8HI
8512           (vec_duplicate:VNx8HI
8513             (truncate:HI
8514               (unspec:SI [(match_operand:DI 2 "const_int_operand")
8515                           (match_operand:DI 3 "const_int_operand")
8516                           (match_operand:DI 4 "const_int_operand")]
8517                          UNSPEC_SVE_CNT_PAT)))
8518           (match_operand:VNx8HI_ONLY 1 "register_operand")))]
8519   "TARGET_SVE"
8520 )
8521
8522 (define_insn "*aarch64_sve_<inc_dec><mode>_pat"
8523   [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
8524         (ANY_PLUS:VNx8HI
8525           (vec_duplicate:VNx8HI
8526             (match_operator:HI 5 "subreg_lowpart_operator"
8527               [(unspec:SI [(match_operand:DI 2 "const_int_operand")
8528                            (match_operand:DI 3 "const_int_operand")
8529                            (match_operand:DI 4 "const_int_operand")]
8530                           UNSPEC_SVE_CNT_PAT)]))
8531           (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))]
8532   "TARGET_SVE"
8533   {
8534     if (which_alternative == 1)
8535       output_asm_insn ("movprfx\t%0, %1", operands);
8536     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8537                                                  operands + 2);
8538   }
8539   [(set_attr "movprfx" "*,yes")]
8540 )
8541
8542 ;; -------------------------------------------------------------------------
8543 ;; ---- [INT] Decrement by the number of elements in a pattern (scalar)
8544 ;; -------------------------------------------------------------------------
8545 ;; Includes:
8546 ;; - DEC
8547 ;; - SQDEC
8548 ;; - UQDEC
8549 ;; -------------------------------------------------------------------------
8550
8551 ;; Decrement a DImode register by the number of elements in an svpattern.
8552 ;; See aarch64_sve_cnt_pat for the counting behavior.
8553 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8554   [(set (match_operand:DI 0 "register_operand" "=r")
8555         (ANY_MINUS:DI (match_operand:DI_ONLY 1 "register_operand" "0")
8556                       (zero_extend:DI
8557                         (unspec:SI [(match_operand:DI 2 "const_int_operand")
8558                                     (match_operand:DI 3 "const_int_operand")
8559                                     (match_operand:DI 4 "const_int_operand")]
8560                                    UNSPEC_SVE_CNT_PAT))))]
8561   "TARGET_SVE"
8562   {
8563     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0",
8564                                                  operands + 2);
8565   }
8566 )
8567
8568 ;; Decrement an SImode register by the number of elements in an svpattern
8569 ;; using modular arithmetic.  See aarch64_sve_cnt_pat for the counting
8570 ;; behavior.
8571 (define_insn "*aarch64_sve_decsi_pat"
8572   [(set (match_operand:SI 0 "register_operand" "=r")
8573         (minus:SI (match_operand:SI 1 "register_operand" "0")
8574                   (unspec:SI [(match_operand:DI 2 "const_int_operand")
8575                               (match_operand:DI 3 "const_int_operand")
8576                               (match_operand:DI 4 "const_int_operand")]
8577                              UNSPEC_SVE_CNT_PAT)))]
8578   "TARGET_SVE"
8579   {
8580     return aarch64_output_sve_cnt_pat_immediate ("dec", "%x0", operands + 2);
8581   }
8582 )
8583
8584 ;; Decrement an SImode register by the number of elements in an svpattern
8585 ;; using saturating arithmetic, extending the result to 64 bits.
8586 ;;
8587 ;; See aarch64_sve_cnt_pat for the counting behavior.
8588 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8589   [(set (match_operand:DI 0 "register_operand" "=r")
8590         (<paired_extend>:DI
8591           (SAT_MINUS:SI
8592             (match_operand:SI_ONLY 1 "register_operand" "0")
8593             (unspec:SI [(match_operand:DI 2 "const_int_operand")
8594                         (match_operand:DI 3 "const_int_operand")
8595                         (match_operand:DI 4 "const_int_operand")]
8596                        UNSPEC_SVE_CNT_PAT))))]
8597   "TARGET_SVE"
8598   {
8599     const char *registers = (<CODE> == SS_MINUS ? "%x0, %w0" : "%w0");
8600     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers,
8601                                                  operands + 2);
8602   }
8603 )
8604
8605 ;; -------------------------------------------------------------------------
8606 ;; ---- [INT] Decrement by the number of elements in a pattern (vector)
8607 ;; -------------------------------------------------------------------------
8608 ;; Includes:
8609 ;; - DEC
8610 ;; - SQDEC
8611 ;; - UQDEC
8612 ;; -------------------------------------------------------------------------
8613
8614 ;; Decrement a vector of DIs by the number of elements in an svpattern.
8615 ;; See aarch64_sve_cnt_pat for the counting behavior.
8616 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8617   [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
8618         (ANY_MINUS:VNx2DI
8619           (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")
8620           (vec_duplicate:VNx2DI
8621             (zero_extend:DI
8622               (unspec:SI [(match_operand:DI 2 "const_int_operand")
8623                           (match_operand:DI 3 "const_int_operand")
8624                           (match_operand:DI 4 "const_int_operand")]
8625                          UNSPEC_SVE_CNT_PAT)))))]
8626   "TARGET_SVE"
8627   {
8628     if (which_alternative == 1)
8629       output_asm_insn ("movprfx\t%0, %1", operands);
8630     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8631                                                  operands + 2);
8632   }
8633   [(set_attr "movprfx" "*,yes")]
8634 )
8635
8636 ;; Decrement a vector of SIs by the number of elements in an svpattern.
8637 ;; See aarch64_sve_cnt_pat for the counting behavior.
8638 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8639   [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
8640         (ANY_MINUS:VNx4SI
8641           (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")
8642           (vec_duplicate:VNx4SI
8643             (unspec:SI [(match_operand:DI 2 "const_int_operand")
8644                         (match_operand:DI 3 "const_int_operand")
8645                         (match_operand:DI 4 "const_int_operand")]
8646                        UNSPEC_SVE_CNT_PAT))))]
8647   "TARGET_SVE"
8648   {
8649     if (which_alternative == 1)
8650       output_asm_insn ("movprfx\t%0, %1", operands);
8651     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8652                                                  operands + 2);
8653   }
8654   [(set_attr "movprfx" "*,yes")]
8655 )
8656
8657 ;; Decrement a vector of HIs by the number of elements in an svpattern.
8658 ;; See aarch64_sve_cnt_pat for the counting behavior.
8659 (define_expand "@aarch64_sve_<inc_dec><mode>_pat"
8660   [(set (match_operand:VNx8HI 0 "register_operand")
8661         (ANY_MINUS:VNx8HI
8662           (match_operand:VNx8HI_ONLY 1 "register_operand")
8663           (vec_duplicate:VNx8HI
8664             (truncate:HI
8665               (unspec:SI [(match_operand:DI 2 "const_int_operand")
8666                           (match_operand:DI 3 "const_int_operand")
8667                           (match_operand:DI 4 "const_int_operand")]
8668                          UNSPEC_SVE_CNT_PAT)))))]
8669   "TARGET_SVE"
8670 )
8671
8672 (define_insn "*aarch64_sve_<inc_dec><mode>_pat"
8673   [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
8674         (ANY_MINUS:VNx8HI
8675           (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")
8676           (vec_duplicate:VNx8HI
8677             (match_operator:HI 5 "subreg_lowpart_operator"
8678               [(unspec:SI [(match_operand:DI 2 "const_int_operand")
8679                            (match_operand:DI 3 "const_int_operand")
8680                            (match_operand:DI 4 "const_int_operand")]
8681                           UNSPEC_SVE_CNT_PAT)]))))]
8682   "TARGET_SVE"
8683   {
8684     if (which_alternative == 1)
8685       output_asm_insn ("movprfx\t%0, %1", operands);
8686     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8687                                                  operands + 2);
8688   }
8689   [(set_attr "movprfx" "*,yes")]
8690 )
8691
8692 ;; -------------------------------------------------------------------------
8693 ;; ---- [INT] Count elements in a predicate (scalar)
8694 ;; -------------------------------------------------------------------------
8695 ;; Includes:
8696 ;; - CNTP
8697 ;; -------------------------------------------------------------------------
8698
8699 ;; Count the number of set bits in a predicate.  Operand 3 is true if
8700 ;; operand 1 is known to be all-true.
8701 (define_insn "@aarch64_pred_cntp<mode>"
8702   [(set (match_operand:DI 0 "register_operand" "=r")
8703         (zero_extend:DI
8704           (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upl")
8705                       (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8706                       (match_operand:PRED_ALL 3 "register_operand" "Upa")]
8707                      UNSPEC_CNTP)))]
8708   "TARGET_SVE"
8709   "cntp\t%x0, %1, %3.<Vetype>")
8710
8711 ;; -------------------------------------------------------------------------
8712 ;; ---- [INT] Increment by the number of elements in a predicate (scalar)
8713 ;; -------------------------------------------------------------------------
8714 ;; Includes:
8715 ;; - INCP
8716 ;; - SQINCP
8717 ;; - UQINCP
8718 ;; -------------------------------------------------------------------------
8719
8720 ;; Increment a DImode register by the number of set bits in a predicate.
8721 ;; See aarch64_sve_cntp for a description of the operands.
8722 (define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
8723   [(set (match_operand:DI 0 "register_operand")
8724         (ANY_PLUS:DI
8725           (zero_extend:DI
8726             (unspec:SI [(match_dup 3)
8727                         (const_int SVE_KNOWN_PTRUE)
8728                         (match_operand:PRED_ALL 2 "register_operand")]
8729                        UNSPEC_CNTP))
8730           (match_operand:DI_ONLY 1 "register_operand")))]
8731   "TARGET_SVE"
8732   {
8733     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
8734   }
8735 )
8736
8737 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
8738   [(set (match_operand:DI 0 "register_operand" "=r")
8739         (ANY_PLUS:DI
8740           (zero_extend:DI
8741             (unspec:SI [(match_operand 3)
8742                         (const_int SVE_KNOWN_PTRUE)
8743                         (match_operand:PRED_ALL 2 "register_operand" "Upa")]
8744                        UNSPEC_CNTP))
8745           (match_operand:DI_ONLY 1 "register_operand" "0")))]
8746   "TARGET_SVE"
8747   "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>"
8748   "&& !CONSTANT_P (operands[3])"
8749   {
8750     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
8751   }
8752 )
8753
8754 ;; Increment an SImode register by the number of set bits in a predicate
8755 ;; using modular arithmetic.  See aarch64_sve_cntp for a description of
8756 ;; the operands.
8757 (define_insn_and_rewrite "*aarch64_incsi<mode>_cntp"
8758   [(set (match_operand:SI 0 "register_operand" "=r")
8759         (plus:SI
8760           (unspec:SI [(match_operand 3)
8761                       (const_int SVE_KNOWN_PTRUE)
8762                       (match_operand:PRED_ALL 2 "register_operand" "Upa")]
8763                      UNSPEC_CNTP)
8764           (match_operand:SI 1 "register_operand" "0")))]
8765   "TARGET_SVE"
8766   "incp\t%x0, %2.<Vetype>"
8767   "&& !CONSTANT_P (operands[3])"
8768   {
8769     operands[3] = CONSTM1_RTX (<MODE>mode);
8770   }
8771 )
8772
8773 ;; Increment an SImode register by the number of set bits in a predicate
8774 ;; using saturating arithmetic, extending the result to 64 bits.
8775 ;;
8776 ;; See aarch64_sve_cntp for a description of the operands.
8777 (define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
8778   [(set (match_operand:DI 0 "register_operand")
8779         (<paired_extend>:DI
8780           (SAT_PLUS:SI
8781             (unspec:SI [(match_dup 3)
8782                         (const_int SVE_KNOWN_PTRUE)
8783                         (match_operand:PRED_ALL 2 "register_operand")]
8784                        UNSPEC_CNTP)
8785             (match_operand:SI_ONLY 1 "register_operand"))))]
8786   "TARGET_SVE"
8787   {
8788     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
8789   }
8790 )
8791
8792 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
8793   [(set (match_operand:DI 0 "register_operand" "=r")
8794         (<paired_extend>:DI
8795           (SAT_PLUS:SI
8796             (unspec:SI [(match_operand 3)
8797                         (const_int SVE_KNOWN_PTRUE)
8798                         (match_operand:PRED_ALL 2 "register_operand" "Upa")]
8799                        UNSPEC_CNTP)
8800             (match_operand:SI_ONLY 1 "register_operand" "0"))))]
8801   "TARGET_SVE"
8802   {
8803     if (<CODE> == SS_PLUS)
8804       return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0";
8805     else
8806       return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>";
8807   }
8808   "&& !CONSTANT_P (operands[3])"
8809   {
8810     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
8811   }
8812 )
8813
8814 ;; -------------------------------------------------------------------------
8815 ;; ---- [INT] Increment by the number of elements in a predicate (vector)
8816 ;; -------------------------------------------------------------------------
8817 ;; Includes:
8818 ;; - INCP
8819 ;; - SQINCP
8820 ;; - UQINCP
8821 ;; -------------------------------------------------------------------------
8822
8823 ;; Increment a vector of DIs by the number of set bits in a predicate.
8824 ;; See aarch64_sve_cntp for a description of the operands.
8825 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
8826   [(set (match_operand:VNx2DI 0 "register_operand")
8827         (ANY_PLUS:VNx2DI
8828           (vec_duplicate:VNx2DI
8829             (zero_extend:DI
8830               (unspec:SI
8831                 [(match_dup 3)
8832                  (const_int SVE_KNOWN_PTRUE)
8833                  (match_operand:<VPRED> 2 "register_operand")]
8834                 UNSPEC_CNTP)))
8835           (match_operand:VNx2DI_ONLY 1 "register_operand")))]
8836   "TARGET_SVE"
8837   {
8838     operands[3] = CONSTM1_RTX (<VPRED>mode);
8839   }
8840 )
8841
8842 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
8843   [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
8844         (ANY_PLUS:VNx2DI
8845           (vec_duplicate:VNx2DI
8846             (zero_extend:DI
8847               (unspec:SI
8848                 [(match_operand 3)
8849                  (const_int SVE_KNOWN_PTRUE)
8850                  (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
8851                 UNSPEC_CNTP)))
8852           (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))]
8853   "TARGET_SVE"
8854   "@
8855    <inc_dec>p\t%0.d, %2
8856    movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2"
8857   "&& !CONSTANT_P (operands[3])"
8858   {
8859     operands[3] = CONSTM1_RTX (<VPRED>mode);
8860   }
8861   [(set_attr "movprfx" "*,yes")]
8862 )
8863
8864 ;; Increment a vector of SIs by the number of set bits in a predicate.
8865 ;; See aarch64_sve_cntp for a description of the operands.
8866 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
8867   [(set (match_operand:VNx4SI 0 "register_operand")
8868         (ANY_PLUS:VNx4SI
8869           (vec_duplicate:VNx4SI
8870             (unspec:SI
8871               [(match_dup 3)
8872                (const_int SVE_KNOWN_PTRUE)
8873                (match_operand:<VPRED> 2 "register_operand")]
8874               UNSPEC_CNTP))
8875           (match_operand:VNx4SI_ONLY 1 "register_operand")))]
8876   "TARGET_SVE"
8877   {
8878     operands[3] = CONSTM1_RTX (<VPRED>mode);
8879   }
8880 )
8881
8882 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
8883   [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
8884         (ANY_PLUS:VNx4SI
8885           (vec_duplicate:VNx4SI
8886             (unspec:SI
8887               [(match_operand 3)
8888                (const_int SVE_KNOWN_PTRUE)
8889                (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
8890               UNSPEC_CNTP))
8891           (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
8892   "TARGET_SVE"
8893   "@
8894    <inc_dec>p\t%0.s, %2
8895    movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2"
8896   "&& !CONSTANT_P (operands[3])"
8897   {
8898     operands[3] = CONSTM1_RTX (<VPRED>mode);
8899   }
8900   [(set_attr "movprfx" "*,yes")]
8901 )
8902
8903 ;; Increment a vector of HIs by the number of set bits in a predicate.
8904 ;; See aarch64_sve_cntp for a description of the operands.
8905 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
8906   [(set (match_operand:VNx8HI 0 "register_operand")
8907         (ANY_PLUS:VNx8HI
8908           (vec_duplicate:VNx8HI
8909             (truncate:HI
8910               (unspec:SI
8911                 [(match_dup 3)
8912                  (const_int SVE_KNOWN_PTRUE)
8913                  (match_operand:<VPRED> 2 "register_operand")]
8914                 UNSPEC_CNTP)))
8915           (match_operand:VNx8HI_ONLY 1 "register_operand")))]
8916   "TARGET_SVE"
8917   {
8918     operands[3] = CONSTM1_RTX (<VPRED>mode);
8919   }
8920 )
8921
8922 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
8923   [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
8924         (ANY_PLUS:VNx8HI
8925           (vec_duplicate:VNx8HI
8926             (match_operator:HI 3 "subreg_lowpart_operator"
8927               [(unspec:SI
8928                  [(match_operand 4)
8929                   (const_int SVE_KNOWN_PTRUE)
8930                   (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
8931                  UNSPEC_CNTP)]))
8932           (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))]
8933   "TARGET_SVE"
8934   "@
8935    <inc_dec>p\t%0.h, %2
8936    movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2"
8937   "&& !CONSTANT_P (operands[4])"
8938   {
8939     operands[4] = CONSTM1_RTX (<VPRED>mode);
8940   }
8941   [(set_attr "movprfx" "*,yes")]
8942 )
8943
8944 ;; -------------------------------------------------------------------------
8945 ;; ---- [INT] Decrement by the number of elements in a predicate (scalar)
8946 ;; -------------------------------------------------------------------------
8947 ;; Includes:
8948 ;; - DECP
8949 ;; - SQDECP
8950 ;; - UQDECP
8951 ;; -------------------------------------------------------------------------
8952
8953 ;; Decrement a DImode register by the number of set bits in a predicate.
8954 ;; See aarch64_sve_cntp for a description of the operands.
8955 (define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
8956   [(set (match_operand:DI 0 "register_operand")
8957         (ANY_MINUS:DI
8958           (match_operand:DI_ONLY 1 "register_operand")
8959           (zero_extend:DI
8960             (unspec:SI [(match_dup 3)
8961                         (const_int SVE_KNOWN_PTRUE)
8962                         (match_operand:PRED_ALL 2 "register_operand")]
8963                        UNSPEC_CNTP))))]
8964   "TARGET_SVE"
8965   {
8966     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
8967   }
8968 )
8969
8970 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
8971   [(set (match_operand:DI 0 "register_operand" "=r")
8972         (ANY_MINUS:DI
8973           (match_operand:DI_ONLY 1 "register_operand" "0")
8974           (zero_extend:DI
8975             (unspec:SI [(match_operand 3)
8976                         (const_int SVE_KNOWN_PTRUE)
8977                         (match_operand:PRED_ALL 2 "register_operand" "Upa")]
8978                        UNSPEC_CNTP))))]
8979   "TARGET_SVE"
8980   "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>"
8981   "&& !CONSTANT_P (operands[3])"
8982   {
8983     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
8984   }
8985 )
8986
8987 ;; Decrement an SImode register by the number of set bits in a predicate
8988 ;; using modular arithmetic.  See aarch64_sve_cntp for a description of the
8989 ;; operands.
8990 (define_insn_and_rewrite "*aarch64_decsi<mode>_cntp"
8991   [(set (match_operand:SI 0 "register_operand" "=r")
8992         (minus:SI
8993           (match_operand:SI 1 "register_operand" "0")
8994           (unspec:SI [(match_operand 3)
8995                       (const_int SVE_KNOWN_PTRUE)
8996                       (match_operand:PRED_ALL 2 "register_operand" "Upa")]
8997                      UNSPEC_CNTP)))]
8998   "TARGET_SVE"
8999   "decp\t%x0, %2.<Vetype>"
9000   "&& !CONSTANT_P (operands[3])"
9001   {
9002     operands[3] = CONSTM1_RTX (<MODE>mode);
9003   }
9004 )
9005
9006 ;; Decrement an SImode register by the number of set bits in a predicate
9007 ;; using saturating arithmetic, extending the result to 64 bits.
9008 ;;
9009 ;; See aarch64_sve_cntp for a description of the operands.
9010 (define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
9011   [(set (match_operand:DI 0 "register_operand")
9012         (<paired_extend>:DI
9013           (SAT_MINUS:SI
9014             (match_operand:SI_ONLY 1 "register_operand")
9015             (unspec:SI [(match_dup 3)
9016                         (const_int SVE_KNOWN_PTRUE)
9017                         (match_operand:PRED_ALL 2 "register_operand")]
9018                        UNSPEC_CNTP))))]
9019   "TARGET_SVE"
9020   {
9021     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
9022   }
9023 )
9024
9025 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
9026   [(set (match_operand:DI 0 "register_operand" "=r")
9027         (<paired_extend>:DI
9028           (SAT_MINUS:SI
9029             (match_operand:SI_ONLY 1 "register_operand" "0")
9030             (unspec:SI [(match_operand 3)
9031                         (const_int SVE_KNOWN_PTRUE)
9032                         (match_operand:PRED_ALL 2 "register_operand" "Upa")]
9033                        UNSPEC_CNTP))))]
9034   "TARGET_SVE"
9035   {
9036     if (<CODE> == SS_MINUS)
9037       return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0";
9038     else
9039       return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>";
9040   }
9041   "&& !CONSTANT_P (operands[3])"
9042   {
9043     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
9044   }
9045 )
9046
9047 ;; -------------------------------------------------------------------------
9048 ;; ---- [INT] Decrement by the number of elements in a predicate (vector)
9049 ;; -------------------------------------------------------------------------
9050 ;; Includes:
9051 ;; - DECP
9052 ;; - SQDECP
9053 ;; - UQDECP
9054 ;; -------------------------------------------------------------------------
9055
9056 ;; Decrement a vector of DIs by the number of set bits in a predicate.
9057 ;; See aarch64_sve_cntp for a description of the operands.
9058 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
9059   [(set (match_operand:VNx2DI 0 "register_operand")
9060         (ANY_MINUS:VNx2DI
9061           (match_operand:VNx2DI_ONLY 1 "register_operand")
9062           (vec_duplicate:VNx2DI
9063             (zero_extend:DI
9064               (unspec:SI
9065                 [(match_dup 3)
9066                  (const_int SVE_KNOWN_PTRUE)
9067                  (match_operand:<VPRED> 2 "register_operand")]
9068                 UNSPEC_CNTP)))))]
9069   "TARGET_SVE"
9070   {
9071     operands[3] = CONSTM1_RTX (<VPRED>mode);
9072   }
9073 )
9074
9075 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
9076   [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
9077         (ANY_MINUS:VNx2DI
9078           (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")
9079           (vec_duplicate:VNx2DI
9080             (zero_extend:DI
9081               (unspec:SI
9082                 [(match_operand 3)
9083                  (const_int SVE_KNOWN_PTRUE)
9084                  (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
9085                 UNSPEC_CNTP)))))]
9086   "TARGET_SVE"
9087   "@
9088    <inc_dec>p\t%0.d, %2
9089    movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2"
9090   "&& !CONSTANT_P (operands[3])"
9091   {
9092     operands[3] = CONSTM1_RTX (<VPRED>mode);
9093   }
9094   [(set_attr "movprfx" "*,yes")]
9095 )
9096
9097 ;; Decrement a vector of SIs by the number of set bits in a predicate.
9098 ;; See aarch64_sve_cntp for a description of the operands.
9099 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
9100   [(set (match_operand:VNx4SI 0 "register_operand")
9101         (ANY_MINUS:VNx4SI
9102           (match_operand:VNx4SI_ONLY 1 "register_operand")
9103           (vec_duplicate:VNx4SI
9104             (unspec:SI
9105               [(match_dup 3)
9106                (const_int SVE_KNOWN_PTRUE)
9107                (match_operand:<VPRED> 2 "register_operand")]
9108               UNSPEC_CNTP))))]
9109   "TARGET_SVE"
9110   {
9111     operands[3] = CONSTM1_RTX (<VPRED>mode);
9112   }
9113 )
9114
9115 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
9116   [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
9117         (ANY_MINUS:VNx4SI
9118           (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")
9119           (vec_duplicate:VNx4SI
9120             (unspec:SI
9121               [(match_operand 3)
9122                (const_int SVE_KNOWN_PTRUE)
9123                (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
9124               UNSPEC_CNTP))))]
9125   "TARGET_SVE"
9126   "@
9127    <inc_dec>p\t%0.s, %2
9128    movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2"
9129   "&& !CONSTANT_P (operands[3])"
9130   {
9131     operands[3] = CONSTM1_RTX (<VPRED>mode);
9132   }
9133   [(set_attr "movprfx" "*,yes")]
9134 )
9135
9136 ;; Decrement a vector of HIs by the number of set bits in a predicate.
9137 ;; See aarch64_sve_cntp for a description of the operands.
9138 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
9139   [(set (match_operand:VNx8HI 0 "register_operand")
9140         (ANY_MINUS:VNx8HI
9141           (match_operand:VNx8HI_ONLY 1 "register_operand")
9142           (vec_duplicate:VNx8HI
9143             (truncate:HI
9144               (unspec:SI
9145                 [(match_dup 3)
9146                  (const_int SVE_KNOWN_PTRUE)
9147                  (match_operand:<VPRED> 2 "register_operand")]
9148                 UNSPEC_CNTP)))))]
9149   "TARGET_SVE"
9150   {
9151     operands[3] = CONSTM1_RTX (<VPRED>mode);
9152   }
9153 )
9154
9155 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
9156   [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
9157         (ANY_MINUS:VNx8HI
9158           (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")
9159           (vec_duplicate:VNx8HI
9160             (match_operator:HI 3 "subreg_lowpart_operator"
9161               [(unspec:SI
9162                  [(match_operand 4)
9163                   (const_int SVE_KNOWN_PTRUE)
9164                   (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
9165                  UNSPEC_CNTP)]))))]
9166   "TARGET_SVE"
9167   "@
9168    <inc_dec>p\t%0.h, %2
9169    movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2"
9170   "&& !CONSTANT_P (operands[4])"
9171   {
9172     operands[4] = CONSTM1_RTX (<VPRED>mode);
9173   }
9174   [(set_attr "movprfx" "*,yes")]
9175 )