gcc/config/aarch64/aarch64-sve.md

   1 ;; Machine description for AArch64 SVE.
   2 ;; Copyright (C) 2009-2020 Free Software Foundation, Inc.
   3 ;; Contributed by ARM Ltd.
   4 ;;
   5 ;; This file is part of GCC.
   6 ;;
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published by
   9 ;; the Free Software Foundation; either version 3, or (at your option)
  10 ;; any later version.
  11 ;;
  12 ;; GCC is distributed in the hope that it will be useful, but
  13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 ;; General Public License for more details.
  16 ;;
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 ;; The file is organised into the following sections (search for the full
  22 ;; line):
  23 ;;
  24 ;; == General notes
  25 ;; ---- Note on the handling of big-endian SVE
  26 ;; ---- Description of UNSPEC_PTEST
  27 ;; ---- Description of UNSPEC_PRED_Z
  28 ;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
  29 ;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
  30 ;; ---- Note on FFR handling
  31 ;;
  32 ;; == Moves
  33 ;; ---- Moves of single vectors
  34 ;; ---- Moves of multiple vectors
  35 ;; ---- Moves of predicates
  36 ;; ---- Moves relating to the FFR
  37 ;;
  38 ;; == Loads
  39 ;; ---- Normal contiguous loads
  40 ;; ---- Extending contiguous loads
  41 ;; ---- First-faulting contiguous loads
  42 ;; ---- First-faulting extending contiguous loads
  43 ;; ---- Non-temporal contiguous loads
  44 ;; ---- Normal gather loads
  45 ;; ---- Extending gather loads
  46 ;; ---- First-faulting gather loads
  47 ;; ---- First-faulting extending gather loads
  48 ;;
  49 ;; == Prefetches
  50 ;; ---- Contiguous prefetches
  51 ;; ---- Gather prefetches
  52 ;;
  53 ;; == Stores
  54 ;; ---- Normal contiguous stores
  55 ;; ---- Truncating contiguous stores
  56 ;; ---- Non-temporal contiguous stores
  57 ;; ---- Normal scatter stores
  58 ;; ---- Truncating scatter stores
  59 ;;
  60 ;; == Vector creation
  61 ;; ---- [INT,FP] Duplicate element
  62 ;; ---- [INT,FP] Initialize from individual elements
  63 ;; ---- [INT] Linear series
  64 ;; ---- [PRED] Duplicate element
  65 ;;
  66 ;; == Vector decomposition
  67 ;; ---- [INT,FP] Extract index
  68 ;; ---- [INT,FP] Extract active element
  69 ;; ---- [PRED] Extract index
  70 ;;
  71 ;; == Unary arithmetic
  72 ;; ---- [INT] General unary arithmetic corresponding to rtx codes
  73 ;; ---- [INT] General unary arithmetic corresponding to unspecs
  74 ;; ---- [INT] Sign and zero extension
  75 ;; ---- [INT] Truncation
  76 ;; ---- [INT] Logical inverse
  77 ;; ---- [FP<-INT] General unary arithmetic that maps to unspecs
  78 ;; ---- [FP] General unary arithmetic corresponding to unspecs
  79 ;; ---- [PRED] Inverse
  80
  81 ;; == Binary arithmetic
  82 ;; ---- [INT] General binary arithmetic corresponding to rtx codes
  83 ;; ---- [INT] Addition
  84 ;; ---- [INT] Subtraction
  85 ;; ---- [INT] Take address
  86 ;; ---- [INT] Absolute difference
  87 ;; ---- [INT] Saturating addition and subtraction
  88 ;; ---- [INT] Highpart multiplication
  89 ;; ---- [INT] Division
  90 ;; ---- [INT] Binary logical operations
  91 ;; ---- [INT] Binary logical operations (inverted second input)
  92 ;; ---- [INT] Shifts (rounding towards -Inf)
  93 ;; ---- [INT] Shifts (rounding towards 0)
  94 ;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs
  95 ;; ---- [FP] General binary arithmetic corresponding to rtx codes
  96 ;; ---- [FP] General binary arithmetic corresponding to unspecs
  97 ;; ---- [FP] Addition
  98 ;; ---- [FP] Complex addition
  99 ;; ---- [FP] Subtraction
 100 ;; ---- [FP] Absolute difference
 101 ;; ---- [FP] Multiplication
 102 ;; ---- [FP] Binary logical operations
 103 ;; ---- [FP] Sign copying
 104 ;; ---- [FP] Maximum and minimum
 105 ;; ---- [PRED] Binary logical operations
 106 ;; ---- [PRED] Binary logical operations (inverted second input)
 107 ;; ---- [PRED] Binary logical operations (inverted result)
 108 ;;
 109 ;; == Ternary arithmetic
 110 ;; ---- [INT] MLA and MAD
 111 ;; ---- [INT] MLS and MSB
 112 ;; ---- [INT] Dot product
 113 ;; ---- [INT] Sum of absolute differences
 114 ;; ---- [FP] General ternary arithmetic corresponding to unspecs
 115 ;; ---- [FP] Complex multiply-add
 116 ;; ---- [FP] Trigonometric multiply-add
 117 ;;
 118 ;; == Comparisons and selects
 119 ;; ---- [INT,FP] Select based on predicates
 120 ;; ---- [INT,FP] Compare and select
 121 ;; ---- [INT] Comparisons
 122 ;; ---- [INT] While tests
 123 ;; ---- [FP] Direct comparisons
 124 ;; ---- [FP] Absolute comparisons
 125 ;; ---- [PRED] Select
 126 ;; ---- [PRED] Test bits
 127 ;;
 128 ;; == Reductions
 129 ;; ---- [INT,FP] Conditional reductions
 130 ;; ---- [INT] Tree reductions
 131 ;; ---- [FP] Tree reductions
 132 ;; ---- [FP] Left-to-right reductions
 133 ;;
 134 ;; == Permutes
 135 ;; ---- [INT,FP] General permutes
 136 ;; ---- [INT,FP] Special-purpose unary permutes
 137 ;; ---- [INT,FP] Special-purpose binary permutes
 138 ;; ---- [PRED] Special-purpose unary permutes
 139 ;; ---- [PRED] Special-purpose binary permutes
 140 ;;
 141 ;; == Conversions
 142 ;; ---- [INT<-INT] Packs
 143 ;; ---- [INT<-INT] Unpacks
 144 ;; ---- [INT<-FP] Conversions
 145 ;; ---- [INT<-FP] Packs
 146 ;; ---- [INT<-FP] Unpacks
 147 ;; ---- [FP<-INT] Conversions
 148 ;; ---- [FP<-INT] Packs
 149 ;; ---- [FP<-INT] Unpacks
 150 ;; ---- [FP<-FP] Packs
 151 ;; ---- [FP<-FP] Unpacks
 152 ;; ---- [PRED<-PRED] Packs
 153 ;; ---- [PRED<-PRED] Unpacks
 154 ;;
 155 ;; == Vector partitioning
 156 ;; ---- [PRED] Unary partitioning
 157 ;; ---- [PRED] Binary partitioning
 158 ;; ---- [PRED] Scalarization
 159 ;;
 160 ;; == Counting elements
 161 ;; ---- [INT] Count elements in a pattern (scalar)
 162 ;; ---- [INT] Increment by the number of elements in a pattern (scalar)
 163 ;; ---- [INT] Increment by the number of elements in a pattern (vector)
 164 ;; ---- [INT] Decrement by the number of elements in a pattern (scalar)
 165 ;; ---- [INT] Decrement by the number of elements in a pattern (vector)
 166 ;; ---- [INT] Count elements in a predicate (scalar)
 167 ;; ---- [INT] Increment by the number of elements in a predicate (scalar)
 168 ;; ---- [INT] Increment by the number of elements in a predicate (vector)
 169 ;; ---- [INT] Decrement by the number of elements in a predicate (scalar)
 170 ;; ---- [INT] Decrement by the number of elements in a predicate (vector)
 171
 172 ;; =========================================================================
 173 ;; == General notes
 174 ;; =========================================================================
 175 ;;
 176 ;; -------------------------------------------------------------------------
 177 ;; ---- Note on the handling of big-endian SVE
 178 ;; -------------------------------------------------------------------------
 179 ;;
 180 ;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the
 181 ;; same way as movdi or movti would: the first byte of memory goes
 182 ;; into the most significant byte of the register and the last byte
 183 ;; of memory goes into the least significant byte of the register.
 184 ;; This is the most natural ordering for Advanced SIMD and matches
 185 ;; the ABI layout for 64-bit and 128-bit vector types.
 186 ;;
 187 ;; As a result, the order of bytes within the register is what GCC
 188 ;; expects for a big-endian target, and subreg offsets therefore work
 189 ;; as expected, with the first element in memory having subreg offset 0
 190 ;; and the last element in memory having the subreg offset associated
 191 ;; with a big-endian lowpart.  However, this ordering also means that
 192 ;; GCC's lane numbering does not match the architecture's numbering:
 193 ;; GCC always treats the element at the lowest address in memory
 194 ;; (subreg offset 0) as element 0, while the architecture treats
 195 ;; the least significant end of the register as element 0.
 196 ;;
 197 ;; The situation for SVE is different.  We want the layout of the
 198 ;; SVE register to be same for mov<mode> as it is for maskload<mode>:
 199 ;; logically, a mov<mode> load must be indistinguishable from a
 200 ;; maskload<mode> whose mask is all true.  We therefore need the
 201 ;; register layout to match LD1 rather than LDR.  The ABI layout of
 202 ;; SVE types also matches LD1 byte ordering rather than LDR byte ordering.
 203 ;;
 204 ;; As a result, the architecture lane numbering matches GCC's lane
 205 ;; numbering, with element 0 always being the first in memory.
 206 ;; However:
 207 ;;
 208 ;; - Applying a subreg offset to a register does not give the element
 209 ;;   that GCC expects: the first element in memory has the subreg offset
 210 ;;   associated with a big-endian lowpart while the last element in memory
 211 ;;   has subreg offset 0.  We handle this via TARGET_CAN_CHANGE_MODE_CLASS.
 212 ;;
 213 ;; - We cannot use LDR and STR for spill slots that might be accessed
 214 ;;   via subregs, since although the elements have the order GCC expects,
 215 ;;   the order of the bytes within the elements is different.  We instead
 216 ;;   access spill slots via LD1 and ST1, using secondary reloads to
 217 ;;   reserve a predicate register.
 218 ;;
 219 ;; -------------------------------------------------------------------------
 220 ;; ---- Description of UNSPEC_PTEST
 221 ;; -------------------------------------------------------------------------
 222 ;;
 223 ;; SVE provides a PTEST instruction for testing the active lanes of a
 224 ;; predicate and setting the flags based on the result.  The associated
 225 ;; condition code tests are:
 226 ;;
 227 ;; - any   (= ne): at least one active bit is set
 228 ;; - none  (= eq): all active bits are clear (*)
 229 ;; - first (= mi): the first active bit is set
 230 ;; - nfrst (= pl): the first active bit is clear (*)
 231 ;; - last  (= cc): the last active bit is set
 232 ;; - nlast (= cs): the last active bit is clear (*)
 233 ;;
 234 ;; where the conditions marked (*) are also true when there are no active
 235 ;; lanes (i.e. when the governing predicate is a PFALSE).  The flags results
 236 ;; of a PTEST use the condition code mode CC_NZC.
 237 ;;
 238 ;; PTEST is always a .B operation (i.e. it always operates on VNx16BI).
 239 ;; This means that for other predicate modes, we need a governing predicate
 240 ;; in which all bits are defined.
 241 ;;
 242 ;; For example, most predicated .H operations ignore the odd bits of the
 243 ;; governing predicate, so that an active lane is represented by the
 244 ;; bits "1x" and an inactive lane by the bits "0x", where "x" can be
 245 ;; any value.  To test a .H predicate, we instead need "10" and "00"
 246 ;; respectively, so that the condition only tests the even bits of the
 247 ;; predicate.
 248 ;;
 249 ;; Several instructions set the flags as a side-effect, in the same way
 250 ;; that a separate PTEST would.  It's important for code quality that we
 251 ;; use these flags results as often as possible, particularly in the case
 252 ;; of WHILE* and RDFFR.
 253 ;;
 254 ;; Also, some of the instructions that set the flags are unpredicated
 255 ;; and instead implicitly test all .B, .H, .S or .D elements, as though
 256 ;; they were predicated on a PTRUE of that size.  For example, a .S
 257 ;; WHILELO sets the flags in the same way as a PTEST with a .S PTRUE
 258 ;; would.
 259 ;;
 260 ;; We therefore need to represent PTEST operations in a way that
 261 ;; makes it easy to combine them with both predicated and unpredicated
 262 ;; operations, while using a VNx16BI governing predicate for all
 263 ;; predicate modes.  We do this using:
 264 ;;
 265 ;;   (unspec:CC_NZC [gp cast_gp ptrue_flag op] UNSPEC_PTEST)
 266 ;;
 267 ;; where:
 268 ;;
 269 ;; - GP is the real VNx16BI governing predicate
 270 ;;
 271 ;; - CAST_GP is GP cast to the mode of OP.  All bits dropped by casting
 272 ;;   GP to CAST_GP are guaranteed to be clear in GP.
 273 ;;
 274 ;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
 275 ;;   SVE_KNOWN_PTRUE if we know that CAST_GP (rather than GP) is all-true and
 276 ;;   SVE_MAYBE_NOT_PTRUE otherwise.
 277 ;;
 278 ;; - OP is the predicate we want to test, of the same mode as CAST_GP.
 279 ;;
 280 ;; -------------------------------------------------------------------------
 281 ;; ---- Description of UNSPEC_PRED_Z
 282 ;; -------------------------------------------------------------------------
 283 ;;
 284 ;; SVE integer comparisons are predicated and return zero for inactive
 285 ;; lanes.  Sometimes we use them with predicates that are all-true and
 286 ;; sometimes we use them with general predicates.
 287 ;;
 288 ;; The integer comparisons also set the flags and so build-in the effect
 289 ;; of a PTEST.  We therefore want to be able to combine integer comparison
 290 ;; patterns with PTESTs of the result.  One difficulty with doing this is
 291 ;; that (as noted above) the PTEST is always a .B operation and so can place
 292 ;; stronger requirements on the governing predicate than the comparison does.
 293 ;;
 294 ;; For example, when applying a separate PTEST to the result of a full-vector
 295 ;; .H comparison, the PTEST must be predicated on a .H PTRUE instead of a
 296 ;; .B PTRUE.  In constrast, the comparison might be predicated on either
 297 ;; a .H PTRUE or a .B PTRUE, since the values of odd-indexed predicate
 298 ;; bits don't matter for .H operations.
 299 ;;
 300 ;; We therefore can't rely on a full-vector comparison using the same
 301 ;; predicate register as a following PTEST.  We instead need to remember
 302 ;; whether a comparison is known to be a full-vector comparison and use
 303 ;; this information in addition to a check for equal predicate registers.
 304 ;; At the same time, it's useful to have a common representation for all
 305 ;; integer comparisons, so that they can be handled by a single set of
 306 ;; patterns.
 307 ;;
 308 ;; We therefore take a similar approach to UNSPEC_PTEST above and use:
 309 ;;
 310 ;;   (unspec:<M:VPRED> [gp ptrue_flag (code:M op0 op1)] UNSPEC_PRED_Z)
 311 ;;
 312 ;; where:
 313 ;;
 314 ;; - GP is the governing predicate, of mode <M:VPRED>
 315 ;;
 316 ;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
 317 ;;   SVE_KNOWN_PTRUE if we know that GP is all-true and SVE_MAYBE_NOT_PTRUE
 318 ;;   otherwise
 319 ;;
 320 ;; - CODE is the comparison code
 321 ;;
 322 ;; - OP0 and OP1 are the values being compared, of mode M
 323 ;;
 324 ;; The "Z" in UNSPEC_PRED_Z indicates that inactive lanes are zero.
 325 ;;
 326 ;; -------------------------------------------------------------------------
 327 ;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
 328 ;; -------------------------------------------------------------------------
 329 ;;
 330 ;; Many SVE integer operations are predicated.  We can generate them
 331 ;; from four sources:
 332 ;;
 333 ;; (1) Using normal unpredicated optabs.  In this case we need to create
 334 ;;     an all-true predicate register to act as the governing predicate
 335 ;;     for the SVE instruction.  There are no inactive lanes, and thus
 336 ;;     the values of inactive lanes don't matter.
 337 ;;
 338 ;; (2) Using _x ACLE functions.  In this case the function provides a
 339 ;;     specific predicate and some lanes might be inactive.  However,
 340 ;;     as for (1), the values of the inactive lanes don't matter.
 341 ;;     We can make extra lanes active without changing the behavior
 342 ;;     (although for code-quality reasons we should avoid doing so
 343 ;;     needlessly).
 344 ;;
 345 ;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
 346 ;;     These optabs have a predicate operand that specifies which lanes are
 347 ;;     active and another operand that provides the values of inactive lanes.
 348 ;;
 349 ;; (4) Using _m and _z ACLE functions.  These functions map to the same
 350 ;;     patterns as (3), with the _z functions setting inactive lanes to zero
 351 ;;     and the _m functions setting the inactive lanes to one of the function
 352 ;;     arguments.
 353 ;;
 354 ;; For (1) and (2) we need a way of attaching the predicate to a normal
 355 ;; unpredicated integer operation.  We do this using:
 356 ;;
 357 ;;   (unspec:M [pred (code:M (op0 op1 ...))] UNSPEC_PRED_X)
 358 ;;
 359 ;; where (code:M (op0 op1 ...)) is the normal integer operation and PRED
 360 ;; is a predicate of mode <M:VPRED>.  PRED might or might not be a PTRUE;
 361 ;; it always is for (1), but might not be for (2).
 362 ;;
 363 ;; The unspec as a whole has the same value as (code:M ...) when PRED is
 364 ;; all-true.  It is always semantically valid to replace PRED with a PTRUE,
 365 ;; but as noted above, we should only do so if there's a specific benefit.
 366 ;;
 367 ;; (The "_X" in the unspec is named after the ACLE functions in (2).)
 368 ;;
 369 ;; For (3) and (4) we can simply use the SVE port's normal representation
 370 ;; of a predicate-based select:
 371 ;;
 372 ;;   (unspec:M [pred (code:M (op0 op1 ...)) inactive] UNSPEC_SEL)
 373 ;;
 374 ;; where INACTIVE specifies the values of inactive lanes.
 375 ;;
 376 ;; We can also use the UNSPEC_PRED_X wrapper in the UNSPEC_SEL rather
 377 ;; than inserting the integer operation directly.  This is mostly useful
 378 ;; if we want the combine pass to merge an integer operation with an explicit
 379 ;; vcond_mask (in other words, with a following SEL instruction).  However,
 380 ;; it's generally better to merge such operations at the gimple level
 381 ;; using (3).
 382 ;;
 383 ;; -------------------------------------------------------------------------
 384 ;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
 385 ;; -------------------------------------------------------------------------
 386 ;;
 387 ;; Most SVE floating-point operations are predicated.  We can generate
 388 ;; them from four sources:
 389 ;;
 390 ;; (1) Using normal unpredicated optabs.  In this case we need to create
 391 ;;     an all-true predicate register to act as the governing predicate
 392 ;;     for the SVE instruction.  There are no inactive lanes, and thus
 393 ;;     the values of inactive lanes don't matter.
 394 ;;
 395 ;; (2) Using _x ACLE functions.  In this case the function provides a
 396 ;;     specific predicate and some lanes might be inactive.  However,
 397 ;;     as for (1), the values of the inactive lanes don't matter.
 398 ;;
 399 ;;     The instruction must have the same exception behavior as the
 400 ;;     function call unless things like command-line flags specifically
 401 ;;     allow otherwise.  For example, with -ffast-math, it is OK to
 402 ;;     raise exceptions for inactive lanes, but normally it isn't.
 403 ;;
 404 ;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
 405 ;;     These optabs have a predicate operand that specifies which lanes are
 406 ;;     active and another operand that provides the values of inactive lanes.
 407 ;;
 408 ;; (4) Using _m and _z ACLE functions.  These functions map to the same
 409 ;;     patterns as (3), with the _z functions setting inactive lanes to zero
 410 ;;     and the _m functions setting the inactive lanes to one of the function
 411 ;;     arguments.
 412 ;;
 413 ;; So:
 414 ;;
 415 ;; - In (1), the predicate is known to be all true and the pattern can use
 416 ;;   unpredicated operations where available.
 417 ;;
 418 ;; - In (2), the predicate might or might not be all true.  The pattern can
 419 ;;   use unpredicated instructions if the predicate is all-true or if things
 420 ;;   like command-line flags allow exceptions for inactive lanes.
 421 ;;
 422 ;; - (3) and (4) represent a native SVE predicated operation.  Some lanes
 423 ;;   might be inactive and inactive lanes of the result must have specific
 424 ;;   values.  There is no scope for using unpredicated instructions (and no
 425 ;;   reason to want to), so the question about command-line flags doesn't
 426 ;;   arise.
 427 ;;
 428 ;; It would be inaccurate to model (2) as an rtx code like (sqrt ...)
 429 ;; in combination with a separate predicate operand, e.g.
 430 ;;
 431 ;;   (unspec [(match_operand:<VPRED> 1 "register_operand" "Upl")
 432 ;;            (sqrt:SVE_FULL_F 2 "register_operand" "w")]
 433 ;;           ....)
 434 ;;
 435 ;; because (sqrt ...) can raise an exception for any lane, including
 436 ;; inactive ones.  We therefore need to use an unspec instead.
 437 ;;
 438 ;; Also, (2) requires some way of distinguishing the case in which the
 439 ;; predicate might have inactive lanes and cannot be changed from the
 440 ;; case in which the predicate has no inactive lanes or can be changed.
 441 ;; This information is also useful when matching combined FP patterns
 442 ;; in which the predicates might not be equal.
 443 ;;
 444 ;; We therefore model FP operations as an unspec of the form:
 445 ;;
 446 ;;   (unspec [pred strictness op0 op1 ...] UNSPEC_COND_<MNEMONIC>)
 447 ;;
 448 ;; where:
 449 ;;
 450 ;; - PRED is the governing predicate.
 451 ;;
 452 ;; - STRICTNESS is a CONST_INT that conceptually has mode SI.  It has the
 453 ;;   value SVE_STRICT_GP if PRED might have inactive lanes and if those
 454 ;;   lanes must remain inactive.  It has the value SVE_RELAXED_GP otherwise.
 455 ;;
 456 ;; - OP0 OP1 ... are the normal input operands to the operation.
 457 ;;
 458 ;; - MNEMONIC is the mnemonic of the associated SVE instruction.
 459 ;;
 460 ;; -------------------------------------------------------------------------
 461 ;; ---- Note on FFR handling
 462 ;; -------------------------------------------------------------------------
 463 ;;
 464 ;; Logically we want to divide FFR-related instructions into regions
 465 ;; that contain exactly one of:
 466 ;;
 467 ;; - a single write to the FFR
 468 ;; - any number of reads from the FFR (but only one read is likely)
 469 ;; - any number of LDFF1 and LDNF1 instructions
 470 ;;
 471 ;; However, LDFF1 and LDNF1 instructions should otherwise behave like
 472 ;; normal loads as far as possible.  This means that they should be
 473 ;; schedulable within a region in the same way that LD1 would be,
 474 ;; and they should be deleted as dead if the result is unused.  The loads
 475 ;; should therefore not write to the FFR, since that would both serialize
 476 ;; the loads with respect to each other and keep the loads live for any
 477 ;; later RDFFR.
 478 ;;
 479 ;; We get around this by using a fake "FFR token" (FFRT) to help describe
 480 ;; the dependencies.  Writing to the FFRT starts a new "FFRT region",
 481 ;; while using the FFRT keeps the instruction within its region.
 482 ;; Specifically:
 483 ;;
 484 ;; - Writes start a new FFRT region as well as setting the FFR:
 485 ;;
 486 ;;       W1: parallel (FFRT = <new value>, FFR = <actual FFR value>)
 487 ;;
 488 ;; - Loads use an LD1-like instruction that also uses the FFRT, so that the
 489 ;;   loads stay within the same FFRT region:
 490 ;;
 491 ;;       L1: load data while using the FFRT
 492 ;;
 493 ;;   In addition, any FFRT region that includes a load also has at least one
 494 ;;   instance of:
 495 ;;
 496 ;;       L2: FFR = update(FFR, FFRT)  [type == no_insn]
 497 ;;
 498 ;;   to make it clear that the region both reads from and writes to the FFR.
 499 ;;
 500 ;; - Reads do the following:
 501 ;;
 502 ;;       R1: FFRT = FFR               [type == no_insn]
 503 ;;       R2: read from the FFRT
 504 ;;       R3: FFRT = update(FFRT)      [type == no_insn]
 505 ;;
 506 ;;   R1 and R3 both create new FFRT regions, so that previous LDFF1s and
 507 ;;   LDNF1s cannot move forwards across R1 and later LDFF1s and LDNF1s
 508 ;;   cannot move backwards across R3.
 509 ;;
 510 ;; This way, writes are only kept alive by later loads or reads,
 511 ;; and write/read pairs fold normally.  For two consecutive reads,
 512 ;; the first R3 is made dead by the second R1, which in turn becomes
 513 ;; redundant with the first R1.  We then have:
 514 ;;
 515 ;;     first R1: FFRT = FFR
 516 ;;     first read from the FFRT
 517 ;;     second read from the FFRT
 518 ;;     second R3: FFRT = update(FFRT)
 519 ;;
 520 ;; i.e. the two FFRT regions collapse into a single one with two
 521 ;; independent reads.
 522 ;;
 523 ;; The model still prevents some valid optimizations though.  For example,
 524 ;; if all loads in an FFRT region are deleted as dead, nothing would remove
 525 ;; the L2 instructions.
 526
 527 ;; =========================================================================
 528 ;; == Moves
 529 ;; =========================================================================
 530
 531 ;; -------------------------------------------------------------------------
 532 ;; ---- Moves of single vectors
 533 ;; -------------------------------------------------------------------------
 534 ;; Includes:
 535 ;; - MOV  (including aliases)
 536 ;; - LD1B (contiguous form)
 537 ;; - LD1D (    "    "     )
 538 ;; - LD1H (    "    "     )
 539 ;; - LD1W (    "    "     )
 540 ;; - LDR
 541 ;; - ST1B (contiguous form)
 542 ;; - ST1D (    "    "     )
 543 ;; - ST1H (    "    "     )
 544 ;; - ST1W (    "    "     )
 545 ;; - STR
 546 ;; -------------------------------------------------------------------------
 547
 548 (define_expand "mov<mode>"
 549   [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
 550         (match_operand:SVE_ALL 1 "general_operand"))]
 551   "TARGET_SVE"
 552   {
 553     /* Use the predicated load and store patterns where possible.
 554        This is required for big-endian targets (see the comment at the
 555        head of the file) and increases the addressing choices for
 556        little-endian.  */
 557     if ((MEM_P (operands[0]) || MEM_P (operands[1]))
 558         && can_create_pseudo_p ())
 559       {
 560         aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
 561         DONE;
 562       }
 563
 564     if (CONSTANT_P (operands[1]))
 565       {
 566         aarch64_expand_mov_immediate (operands[0], operands[1]);
 567         DONE;
 568       }
 569
 570     /* Optimize subregs on big-endian targets: we can use REV[BHW]
 571        instead of going through memory.  */
 572     if (BYTES_BIG_ENDIAN
 573         && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1]))
 574       DONE;
 575   }
 576 )
 577
 578 (define_expand "movmisalign<mode>"
 579   [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
 580         (match_operand:SVE_ALL 1 "general_operand"))]
 581   "TARGET_SVE"
 582   {
 583     /* Equivalent to a normal move for our purpooses.  */
 584     emit_move_insn (operands[0], operands[1]);
 585     DONE;
 586   }
 587 )
 588
 589 ;; Unpredicated moves that can use LDR and STR, i.e. full vectors for which
 590 ;; little-endian ordering is acceptable.  Only allow memory operations during
 591 ;; and after RA; before RA we want the predicated load and store patterns to
 592 ;; be used instead.
 593 (define_insn "*aarch64_sve_mov<mode>_ldr_str"
 594   [(set (match_operand:SVE_FULL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
 595         (match_operand:SVE_FULL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
 596   "TARGET_SVE
 597    && (<MODE>mode == VNx16QImode || !BYTES_BIG_ENDIAN)
 598    && ((lra_in_progress || reload_completed)
 599        || (register_operand (operands[0], <MODE>mode)
 600            && nonmemory_operand (operands[1], <MODE>mode)))"
 601   "@
 602    ldr\t%0, %1
 603    str\t%1, %0
 604    mov\t%0.d, %1.d
 605    * return aarch64_output_sve_mov_immediate (operands[1]);"
 606 )
 607
 608 ;; Unpredicated moves that cannot use LDR and STR, i.e. partial vectors
 609 ;; or vectors for which little-endian ordering isn't acceptable.  Memory
 610 ;; accesses require secondary reloads.
 611 (define_insn "*aarch64_sve_mov<mode>_no_ldr_str"
 612   [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
 613         (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))]
 614   "TARGET_SVE
 615    && <MODE>mode != VNx16QImode
 616    && (BYTES_BIG_ENDIAN
 617        || maybe_ne (BYTES_PER_SVE_VECTOR, GET_MODE_SIZE (<MODE>mode)))"
 618   "@
 619    mov\t%0.d, %1.d
 620    * return aarch64_output_sve_mov_immediate (operands[1]);"
 621 )
 622
 623 ;; Handle memory reloads for modes that can't use LDR and STR.  We use
 624 ;; byte PTRUE for all modes to try to encourage reuse.  This pattern
 625 ;; needs constraints because it is returned by TARGET_SECONDARY_RELOAD.
 626 (define_expand "aarch64_sve_reload_mem"
 627   [(parallel
 628      [(set (match_operand 0)
 629            (match_operand 1))
 630       (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])]
 631   "TARGET_SVE"
 632   {
 633     /* Create a PTRUE.  */
 634     emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
 635
 636     /* Refer to the PTRUE in the appropriate mode for this move.  */
 637     machine_mode mode = GET_MODE (operands[0]);
 638     rtx pred = gen_lowpart (aarch64_sve_pred_mode (mode), operands[2]);
 639
 640     /* Emit a predicated load or store.  */
 641     aarch64_emit_sve_pred_move (operands[0], pred, operands[1]);
 642     DONE;
 643   }
 644 )
 645
 646 ;; A predicated move in which the predicate is known to be all-true.
 647 ;; Note that this pattern is generated directly by aarch64_emit_sve_pred_move,
 648 ;; so changes to this pattern will need changes there as well.
 649 (define_insn_and_split "@aarch64_pred_mov<mode>"
 650   [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, w, m")
 651         (unspec:SVE_ALL
 652           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
 653            (match_operand:SVE_ALL 2 "nonimmediate_operand" "w, m, w")]
 654           UNSPEC_PRED_X))]
 655   "TARGET_SVE
 656    && (register_operand (operands[0], <MODE>mode)
 657        || register_operand (operands[2], <MODE>mode))"
 658   "@
 659    #
 660    ld1<Vesize>\t%0.<Vctype>, %1/z, %2
 661    st1<Vesize>\t%2.<Vctype>, %1, %0"
 662   "&& register_operand (operands[0], <MODE>mode)
 663    && register_operand (operands[2], <MODE>mode)"
 664   [(set (match_dup 0) (match_dup 2))]
 665 )
 666
 667 ;; A pattern for optimizing SUBREGs that have a reinterpreting effect
 668 ;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move
 669 ;; for details.  We use a special predicate for operand 2 to reduce
 670 ;; the number of patterns.
 671 (define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be"
 672   [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w")
 673         (unspec:SVE_ALL
 674           [(match_operand:VNx16BI 1 "register_operand" "Upl")
 675            (match_operand 2 "aarch64_any_register_operand" "w")]
 676           UNSPEC_REV_SUBREG))]
 677   "TARGET_SVE && BYTES_BIG_ENDIAN"
 678   "#"
 679   "&& reload_completed"
 680   [(const_int 0)]
 681   {
 682     aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]);
 683     DONE;
 684   }
 685 )
 686
 687 ;; Reinterpret operand 1 in operand 0's mode, without changing its contents.
 688 ;; This is equivalent to a subreg on little-endian targets but not for
 689 ;; big-endian; see the comment at the head of the file for details.
 690 (define_expand "@aarch64_sve_reinterpret<mode>"
 691   [(set (match_operand:SVE_ALL 0 "register_operand")
 692         (unspec:SVE_ALL
 693           [(match_operand 1 "aarch64_any_register_operand")]
 694           UNSPEC_REINTERPRET))]
 695   "TARGET_SVE"
 696   {
 697     machine_mode src_mode = GET_MODE (operands[1]);
 698     if (targetm.can_change_mode_class (<MODE>mode, src_mode, FP_REGS))
 699       {
 700         emit_move_insn (operands[0], gen_lowpart (<MODE>mode, operands[1]));
 701         DONE;
 702       }
 703   }
 704 )
 705
 706 ;; A pattern for handling type punning on big-endian targets.  We use a
 707 ;; special predicate for operand 1 to reduce the number of patterns.
 708 (define_insn_and_split "*aarch64_sve_reinterpret<mode>"
 709   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
 710         (unspec:SVE_ALL
 711           [(match_operand 1 "aarch64_any_register_operand" "w")]
 712           UNSPEC_REINTERPRET))]
 713   "TARGET_SVE"
 714   "#"
 715   "&& reload_completed"
 716   [(set (match_dup 0) (match_dup 1))]
 717   {
 718     operands[1] = aarch64_replace_reg_mode (operands[1], <MODE>mode);
 719   }
 720 )
 721
 722 ;; -------------------------------------------------------------------------
 723 ;; ---- Moves of multiple vectors
 724 ;; -------------------------------------------------------------------------
 725 ;; All patterns in this section are synthetic and split to real
 726 ;; instructions after reload.
 727 ;; -------------------------------------------------------------------------
 728
 729 (define_expand "mov<mode>"
 730   [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand")
 731         (match_operand:SVE_STRUCT 1 "general_operand"))]
 732   "TARGET_SVE"
 733   {
 734     /* Big-endian loads and stores need to be done via LD1 and ST1;
 735        see the comment at the head of the file for details.  */
 736     if ((MEM_P (operands[0]) || MEM_P (operands[1]))
 737         && BYTES_BIG_ENDIAN)
 738       {
 739         gcc_assert (can_create_pseudo_p ());
 740         aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
 741         DONE;
 742       }
 743
 744     if (CONSTANT_P (operands[1]))
 745       {
 746         aarch64_expand_mov_immediate (operands[0], operands[1]);
 747         DONE;
 748       }
 749   }
 750 )
 751
 752 ;; Unpredicated structure moves (little-endian).
 753 (define_insn "*aarch64_sve_mov<mode>_le"
 754   [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
 755         (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
 756   "TARGET_SVE && !BYTES_BIG_ENDIAN"
 757   "#"
 758   [(set_attr "length" "<insn_length>")]
 759 )
 760
 761 ;; Unpredicated structure moves (big-endian).  Memory accesses require
 762 ;; secondary reloads.
 763 (define_insn "*aarch64_sve_mov<mode>_be"
 764   [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w")
 765         (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))]
 766   "TARGET_SVE && BYTES_BIG_ENDIAN"
 767   "#"
 768   [(set_attr "length" "<insn_length>")]
 769 )
 770
 771 ;; Split unpredicated structure moves into pieces.  This is the same
 772 ;; for both big-endian and little-endian code, although it only needs
 773 ;; to handle memory operands for little-endian code.
 774 (define_split
 775   [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand")
 776         (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))]
 777   "TARGET_SVE && reload_completed"
 778   [(const_int 0)]
 779   {
 780     rtx dest = operands[0];
 781     rtx src = operands[1];
 782     if (REG_P (dest) && REG_P (src))
 783       aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>);
 784     else
 785       for (unsigned int i = 0; i < <vector_count>; ++i)
 786         {
 787           rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode,
 788                                              i * BYTES_PER_SVE_VECTOR);
 789           rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode,
 790                                             i * BYTES_PER_SVE_VECTOR);
 791           emit_insn (gen_rtx_SET (subdest, subsrc));
 792         }
 793     DONE;
 794   }
 795 )
 796
 797 ;; Predicated structure moves.  This works for both endiannesses but in
 798 ;; practice is only useful for big-endian.
 799 (define_insn_and_split "@aarch64_pred_mov<mode>"
 800   [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, w, Utx")
 801         (unspec:SVE_STRUCT
 802           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
 803            (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "w, Utx, w")]
 804           UNSPEC_PRED_X))]
 805   "TARGET_SVE
 806    && (register_operand (operands[0], <MODE>mode)
 807        || register_operand (operands[2], <MODE>mode))"
 808   "#"
 809   "&& reload_completed"
 810   [(const_int 0)]
 811   {
 812     for (unsigned int i = 0; i < <vector_count>; ++i)
 813       {
 814         rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0],
 815                                            <MODE>mode,
 816                                            i * BYTES_PER_SVE_VECTOR);
 817         rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2],
 818                                           <MODE>mode,
 819                                           i * BYTES_PER_SVE_VECTOR);
 820         aarch64_emit_sve_pred_move (subdest, operands[1], subsrc);
 821       }
 822     DONE;
 823   }
 824   [(set_attr "length" "<insn_length>")]
 825 )
 826
 827 ;; -------------------------------------------------------------------------
 828 ;; ---- Moves of predicates
 829 ;; -------------------------------------------------------------------------
 830 ;; Includes:
 831 ;; - MOV
 832 ;; - LDR
 833 ;; - PFALSE
 834 ;; - PTRUE
 835 ;; - PTRUES
 836 ;; - STR
 837 ;; -------------------------------------------------------------------------
 838
 839 (define_expand "mov<mode>"
 840   [(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
 841         (match_operand:PRED_ALL 1 "general_operand"))]
 842   "TARGET_SVE"
 843   {
 844     if (GET_CODE (operands[0]) == MEM)
 845       operands[1] = force_reg (<MODE>mode, operands[1]);
 846
 847     if (CONSTANT_P (operands[1]))
 848       {
 849         aarch64_expand_mov_immediate (operands[0], operands[1]);
 850         DONE;
 851       }
 852   }
 853 )
 854
 855 (define_insn "*aarch64_sve_mov<mode>"
 856   [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa")
 857         (match_operand:PRED_ALL 1 "aarch64_mov_operand" "Upa, Upa, m, Dn"))]
 858   "TARGET_SVE
 859    && (register_operand (operands[0], <MODE>mode)
 860        || register_operand (operands[1], <MODE>mode))"
 861   "@
 862    mov\t%0.b, %1.b
 863    str\t%1, %0
 864    ldr\t%0, %1
 865    * return aarch64_output_sve_mov_immediate (operands[1]);"
 866 )
 867
 868 ;; Match PTRUES Pn.B when both the predicate and flags are useful.
 869 (define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_cc"
 870   [(set (reg:CC_NZC CC_REGNUM)
 871         (unspec:CC_NZC
 872           [(match_operand 2)
 873            (match_operand 3)
 874            (const_int SVE_KNOWN_PTRUE)
 875            (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
 876              [(unspec:VNx16BI
 877                 [(match_operand:SI 4 "const_int_operand")
 878                  (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")]
 879                 UNSPEC_PTRUE)])]
 880           UNSPEC_PTEST))
 881    (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
 882         (match_dup 1))]
 883   "TARGET_SVE"
 884   {
 885     return aarch64_output_sve_ptrues (operands[1]);
 886   }
 887   "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
 888   {
 889     operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode);
 890   }
 891 )
 892
 893 ;; Match PTRUES Pn.[HSD] when both the predicate and flags are useful.
 894 (define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_cc"
 895   [(set (reg:CC_NZC CC_REGNUM)
 896         (unspec:CC_NZC
 897           [(match_operand 2)
 898            (match_operand 3)
 899            (const_int SVE_KNOWN_PTRUE)
 900            (subreg:PRED_HSD
 901              (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
 902                [(unspec:VNx16BI
 903                   [(match_operand:SI 4 "const_int_operand")
 904                    (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")]
 905                   UNSPEC_PTRUE)]) 0)]
 906           UNSPEC_PTEST))
 907    (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
 908         (match_dup 1))]
 909   "TARGET_SVE"
 910   {
 911     return aarch64_output_sve_ptrues (operands[1]);
 912   }
 913   "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
 914   {
 915     operands[2] = CONSTM1_RTX (VNx16BImode);
 916     operands[3] = CONSTM1_RTX (<MODE>mode);
 917   }
 918 )
 919
 920 ;; Match PTRUES Pn.B when only the flags result is useful (which is
 921 ;; a way of testing VL).
 922 (define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_ptest"
 923   [(set (reg:CC_NZC CC_REGNUM)
 924         (unspec:CC_NZC
 925           [(match_operand 2)
 926            (match_operand 3)
 927            (const_int SVE_KNOWN_PTRUE)
 928            (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
 929              [(unspec:VNx16BI
 930                 [(match_operand:SI 4 "const_int_operand")
 931                  (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")]
 932                 UNSPEC_PTRUE)])]
 933           UNSPEC_PTEST))
 934    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
 935   "TARGET_SVE"
 936   {
 937     return aarch64_output_sve_ptrues (operands[1]);
 938   }
 939   "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
 940   {
 941     operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode);
 942   }
 943 )
 944
 945 ;; Match PTRUES Pn.[HWD] when only the flags result is useful (which is
 946 ;; a way of testing VL).
 947 (define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_ptest"
 948   [(set (reg:CC_NZC CC_REGNUM)
 949         (unspec:CC_NZC
 950           [(match_operand 2)
 951            (match_operand 3)
 952            (const_int SVE_KNOWN_PTRUE)
 953            (subreg:PRED_HSD
 954              (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
 955                [(unspec:VNx16BI
 956                   [(match_operand:SI 4 "const_int_operand")
 957                    (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")]
 958                   UNSPEC_PTRUE)]) 0)]
 959           UNSPEC_PTEST))
 960    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
 961   "TARGET_SVE"
 962   {
 963     return aarch64_output_sve_ptrues (operands[1]);
 964   }
 965   "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
 966   {
 967     operands[2] = CONSTM1_RTX (VNx16BImode);
 968     operands[3] = CONSTM1_RTX (<MODE>mode);
 969   }
 970 )
 971
 972 ;; -------------------------------------------------------------------------
 973 ;; ---- Moves relating to the FFR
 974 ;; -------------------------------------------------------------------------
 975 ;; RDFFR
 976 ;; RDFFRS
 977 ;; SETFFR
 978 ;; WRFFR
 979 ;; -------------------------------------------------------------------------
 980
 981 ;; [W1 in the block comment above about FFR handling]
 982 ;;
 983 ;; Write to the FFR and start a new FFRT scheduling region.
 984 (define_insn "aarch64_wrffr"
 985   [(set (reg:VNx16BI FFR_REGNUM)
 986         (match_operand:VNx16BI 0 "aarch64_simd_reg_or_minus_one" "Dm, Upa"))
 987    (set (reg:VNx16BI FFRT_REGNUM)
 988         (unspec:VNx16BI [(match_dup 0)] UNSPEC_WRFFR))]
 989   "TARGET_SVE"
 990   "@
 991    setffr
 992    wrffr\t%0.b"
 993 )
 994
 995 ;; [L2 in the block comment above about FFR handling]
 996 ;;
 997 ;; Introduce a read from and write to the FFR in the current FFRT region,
 998 ;; so that the FFR value is live on entry to the region and so that the FFR
 999 ;; value visibly changes within the region.  This is used (possibly multiple
1000 ;; times) in an FFRT region that includes LDFF1 or LDNF1 instructions.
1001 (define_insn "aarch64_update_ffr_for_load"
1002   [(set (reg:VNx16BI FFR_REGNUM)
1003         (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)
1004                          (reg:VNx16BI FFR_REGNUM)] UNSPEC_UPDATE_FFR))]
1005   "TARGET_SVE"
1006   ""
1007   [(set_attr "type" "no_insn")]
1008 )
1009
1010 ;; [R1 in the block comment above about FFR handling]
1011 ;;
1012 ;; Notionally copy the FFR to the FFRT, so that the current FFR value
1013 ;; can be read from there by the RDFFR instructions below.  This acts
1014 ;; as a scheduling barrier for earlier LDFF1 and LDNF1 instructions and
1015 ;; creates a natural dependency with earlier writes.
1016 (define_insn "aarch64_copy_ffr_to_ffrt"
1017   [(set (reg:VNx16BI FFRT_REGNUM)
1018         (reg:VNx16BI FFR_REGNUM))]
1019   "TARGET_SVE"
1020   ""
1021   [(set_attr "type" "no_insn")]
1022 )
1023
1024 ;; [R2 in the block comment above about FFR handling]
1025 ;;
1026 ;; Read the FFR via the FFRT.
1027 (define_insn "aarch64_rdffr"
1028   [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1029         (reg:VNx16BI FFRT_REGNUM))]
1030   "TARGET_SVE"
1031   "rdffr\t%0.b"
1032 )
1033
1034 ;; Likewise with zero predication.
1035 (define_insn "aarch64_rdffr_z"
1036   [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1037         (and:VNx16BI
1038           (reg:VNx16BI FFRT_REGNUM)
1039           (match_operand:VNx16BI 1 "register_operand" "Upa")))]
1040   "TARGET_SVE"
1041   "rdffr\t%0.b, %1/z"
1042 )
1043
1044 ;; Read the FFR to test for a fault, without using the predicate result.
1045 (define_insn "*aarch64_rdffr_z_ptest"
1046   [(set (reg:CC_NZC CC_REGNUM)
1047         (unspec:CC_NZC
1048           [(match_operand:VNx16BI 1 "register_operand" "Upa")
1049            (match_dup 1)
1050            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
1051            (and:VNx16BI
1052              (reg:VNx16BI FFRT_REGNUM)
1053              (match_dup 1))]
1054           UNSPEC_PTEST))
1055    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1056   "TARGET_SVE"
1057   "rdffrs\t%0.b, %1/z"
1058 )
1059
1060 ;; Same for unpredicated RDFFR when tested with a known PTRUE.
1061 (define_insn "*aarch64_rdffr_ptest"
1062   [(set (reg:CC_NZC CC_REGNUM)
1063         (unspec:CC_NZC
1064           [(match_operand:VNx16BI 1 "register_operand" "Upa")
1065            (match_dup 1)
1066            (const_int SVE_KNOWN_PTRUE)
1067            (reg:VNx16BI FFRT_REGNUM)]
1068           UNSPEC_PTEST))
1069    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1070   "TARGET_SVE"
1071   "rdffrs\t%0.b, %1/z"
1072 )
1073
1074 ;; Read the FFR with zero predication and test the result.
1075 (define_insn "*aarch64_rdffr_z_cc"
1076   [(set (reg:CC_NZC CC_REGNUM)
1077         (unspec:CC_NZC
1078           [(match_operand:VNx16BI 1 "register_operand" "Upa")
1079            (match_dup 1)
1080            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
1081            (and:VNx16BI
1082              (reg:VNx16BI FFRT_REGNUM)
1083              (match_dup 1))]
1084           UNSPEC_PTEST))
1085    (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1086         (and:VNx16BI
1087           (reg:VNx16BI FFRT_REGNUM)
1088           (match_dup 1)))]
1089   "TARGET_SVE"
1090   "rdffrs\t%0.b, %1/z"
1091 )
1092
1093 ;; Same for unpredicated RDFFR when tested with a known PTRUE.
1094 (define_insn "*aarch64_rdffr_cc"
1095   [(set (reg:CC_NZC CC_REGNUM)
1096         (unspec:CC_NZC
1097           [(match_operand:VNx16BI 1 "register_operand" "Upa")
1098            (match_dup 1)
1099            (const_int SVE_KNOWN_PTRUE)
1100            (reg:VNx16BI FFRT_REGNUM)]
1101           UNSPEC_PTEST))
1102    (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1103         (reg:VNx16BI FFRT_REGNUM))]
1104   "TARGET_SVE"
1105   "rdffrs\t%0.b, %1/z"
1106 )
1107
1108 ;; [R3 in the block comment above about FFR handling]
1109 ;;
1110 ;; Arbitrarily update the FFRT after a read from the FFR.  This acts as
1111 ;; a scheduling barrier for later LDFF1 and LDNF1 instructions.
1112 (define_insn "aarch64_update_ffrt"
1113   [(set (reg:VNx16BI FFRT_REGNUM)
1114         (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)] UNSPEC_UPDATE_FFRT))]
1115   "TARGET_SVE"
1116   ""
1117   [(set_attr "type" "no_insn")]
1118 )
1119
1120 ;; =========================================================================
1121 ;; == Loads
1122 ;; =========================================================================
1123
1124 ;; -------------------------------------------------------------------------
1125 ;; ---- Normal contiguous loads
1126 ;; -------------------------------------------------------------------------
1127 ;; Includes contiguous forms of:
1128 ;; - LD1B
1129 ;; - LD1D
1130 ;; - LD1H
1131 ;; - LD1W
1132 ;; - LD2B
1133 ;; - LD2D
1134 ;; - LD2H
1135 ;; - LD2W
1136 ;; - LD3B
1137 ;; - LD3D
1138 ;; - LD3H
1139 ;; - LD3W
1140 ;; - LD4B
1141 ;; - LD4D
1142 ;; - LD4H
1143 ;; - LD4W
1144 ;; -------------------------------------------------------------------------
1145
1146 ;; Predicated LD1.
1147 (define_insn "maskload<mode><vpred>"
1148   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
1149         (unspec:SVE_ALL
1150           [(match_operand:<VPRED> 2 "register_operand" "Upl")
1151            (match_operand:SVE_ALL 1 "memory_operand" "m")]
1152           UNSPEC_LD1_SVE))]
1153   "TARGET_SVE"
1154   "ld1<Vesize>\t%0.<Vctype>, %2/z, %1"
1155 )
1156
1157 ;; Unpredicated LD[234].
1158 (define_expand "vec_load_lanes<mode><vsingle>"
1159   [(set (match_operand:SVE_STRUCT 0 "register_operand")
1160         (unspec:SVE_STRUCT
1161           [(match_dup 2)
1162            (match_operand:SVE_STRUCT 1 "memory_operand")]
1163           UNSPEC_LDN))]
1164   "TARGET_SVE"
1165   {
1166     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
1167   }
1168 )
1169
1170 ;; Predicated LD[234].
1171 (define_insn "vec_mask_load_lanes<mode><vsingle>"
1172   [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
1173         (unspec:SVE_STRUCT
1174           [(match_operand:<VPRED> 2 "register_operand" "Upl")
1175            (match_operand:SVE_STRUCT 1 "memory_operand" "m")]
1176           UNSPEC_LDN))]
1177   "TARGET_SVE"
1178   "ld<vector_count><Vesize>\t%0, %2/z, %1"
1179 )
1180
1181 ;; -------------------------------------------------------------------------
1182 ;; ---- Extending contiguous loads
1183 ;; -------------------------------------------------------------------------
1184 ;; Includes contiguous forms of:
1185 ;; LD1B
1186 ;; LD1H
1187 ;; LD1SB
1188 ;; LD1SH
1189 ;; LD1SW
1190 ;; LD1W
1191 ;; -------------------------------------------------------------------------
1192
1193 ;; Predicated load and extend, with 8 elements per 128-bit block.
1194 (define_insn_and_rewrite "@aarch64_load_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
1195   [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
1196         (unspec:SVE_HSDI
1197           [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
1198            (ANY_EXTEND:SVE_HSDI
1199              (unspec:SVE_PARTIAL_I
1200                [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
1201                 (match_operand:SVE_PARTIAL_I 1 "memory_operand" "m")]
1202                UNSPEC_LD1_SVE))]
1203           UNSPEC_PRED_X))]
1204   "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
1205   "ld1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
1206   "&& !CONSTANT_P (operands[3])"
1207   {
1208     operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
1209   }
1210 )
1211
1212 ;; -------------------------------------------------------------------------
1213 ;; ---- First-faulting contiguous loads
1214 ;; -------------------------------------------------------------------------
1215 ;; Includes contiguous forms of:
1216 ;; - LDFF1B
1217 ;; - LDFF1D
1218 ;; - LDFF1H
1219 ;; - LDFF1W
1220 ;; - LDNF1B
1221 ;; - LDNF1D
1222 ;; - LDNF1H
1223 ;; - LDNF1W
1224 ;; -------------------------------------------------------------------------
1225
1226 ;; Contiguous non-extending first-faulting or non-faulting loads.
1227 (define_insn "@aarch64_ld<fn>f1<mode>"
1228   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
1229         (unspec:SVE_FULL
1230           [(match_operand:<VPRED> 2 "register_operand" "Upl")
1231            (match_operand:SVE_FULL 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
1232            (reg:VNx16BI FFRT_REGNUM)]
1233           SVE_LDFF1_LDNF1))]
1234   "TARGET_SVE"
1235   "ld<fn>f1<Vesize>\t%0.<Vetype>, %2/z, %1"
1236 )
1237
1238 ;; -------------------------------------------------------------------------
1239 ;; ---- First-faulting extending contiguous loads
1240 ;; -------------------------------------------------------------------------
1241 ;; Includes contiguous forms of:
1242 ;; - LDFF1B
1243 ;; - LDFF1H
1244 ;; - LDFF1SB
1245 ;; - LDFF1SH
1246 ;; - LDFF1SW
1247 ;; - LDFF1W
1248 ;; - LDNF1B
1249 ;; - LDNF1H
1250 ;; - LDNF1SB
1251 ;; - LDNF1SH
1252 ;; - LDNF1SW
1253 ;; - LDNF1W
1254 ;; -------------------------------------------------------------------------
1255
1256 ;; Predicated first-faulting or non-faulting load and extend.
1257 (define_insn_and_rewrite "@aarch64_ld<fn>f1_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
1258   [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
1259         (unspec:SVE_HSDI
1260           [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
1261            (ANY_EXTEND:SVE_HSDI
1262              (unspec:SVE_PARTIAL_I
1263                [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
1264                 (match_operand:SVE_PARTIAL_I 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
1265                 (reg:VNx16BI FFRT_REGNUM)]
1266                SVE_LDFF1_LDNF1))]
1267           UNSPEC_PRED_X))]
1268   "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
1269   "ld<fn>f1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
1270   "&& !CONSTANT_P (operands[3])"
1271   {
1272     operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
1273   }
1274 )
1275
1276 ;; -------------------------------------------------------------------------
1277 ;; ---- Non-temporal contiguous loads
1278 ;; -------------------------------------------------------------------------
1279 ;; Includes:
1280 ;; - LDNT1B
1281 ;; - LDNT1D
1282 ;; - LDNT1H
1283 ;; - LDNT1W
1284 ;; -------------------------------------------------------------------------
1285
1286 ;; Predicated contiguous non-temporal load.
1287 (define_insn "@aarch64_ldnt1<mode>"
1288   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
1289         (unspec:SVE_FULL
1290           [(match_operand:<VPRED> 2 "register_operand" "Upl")
1291            (match_operand:SVE_FULL 1 "memory_operand" "m")]
1292           UNSPEC_LDNT1_SVE))]
1293   "TARGET_SVE"
1294   "ldnt1<Vesize>\t%0.<Vetype>, %2/z, %1"
1295 )
1296
1297 ;; -------------------------------------------------------------------------
1298 ;; ---- Normal gather loads
1299 ;; -------------------------------------------------------------------------
1300 ;; Includes gather forms of:
1301 ;; - LD1D
1302 ;; - LD1W
1303 ;; -------------------------------------------------------------------------
1304
1305 ;; Unpredicated gather loads.
1306 (define_expand "gather_load<mode><v_int_container>"
1307   [(set (match_operand:SVE_24 0 "register_operand")
1308         (unspec:SVE_24
1309           [(match_dup 5)
1310            (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>")
1311            (match_operand:<V_INT_CONTAINER> 2 "register_operand")
1312            (match_operand:DI 3 "const_int_operand")
1313            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
1314            (mem:BLK (scratch))]
1315           UNSPEC_LD1_GATHER))]
1316   "TARGET_SVE"
1317   {
1318     operands[5] = aarch64_ptrue_reg (<VPRED>mode);
1319   }
1320 )
1321
1322 ;; Predicated gather loads for 32-bit elements.  Operand 3 is true for
1323 ;; unsigned extension and false for signed extension.
1324 (define_insn "mask_gather_load<mode><v_int_container>"
1325   [(set (match_operand:SVE_4 0 "register_operand" "=w, w, w, w, w, w")
1326         (unspec:SVE_4
1327           [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1328            (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk")
1329            (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w")
1330            (match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
1331            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1332            (mem:BLK (scratch))]
1333           UNSPEC_LD1_GATHER))]
1334   "TARGET_SVE"
1335   "@
1336    ld1<Vesize>\t%0.s, %5/z, [%2.s]
1337    ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1]
1338    ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1339    ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1340    ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1341    ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1342 )
1343
1344 ;; Predicated gather loads for 64-bit elements.  The value of operand 3
1345 ;; doesn't matter in this case.
1346 (define_insn "mask_gather_load<mode><v_int_container>"
1347   [(set (match_operand:SVE_2 0 "register_operand" "=w, w, w, w")
1348         (unspec:SVE_2
1349           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1350            (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk")
1351            (match_operand:VNx2DI 2 "register_operand" "w, w, w, w")
1352            (match_operand:DI 3 "const_int_operand")
1353            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i")
1354            (mem:BLK (scratch))]
1355           UNSPEC_LD1_GATHER))]
1356   "TARGET_SVE"
1357   "@
1358    ld1<Vesize>\t%0.d, %5/z, [%2.d]
1359    ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1]
1360    ld1<Vesize>\t%0.d, %5/z, [%1, %2.d]
1361    ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1362 )
1363
1364 ;; Likewise, but with the offset being extended from 32 bits.
1365 (define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_<su>xtw_unpacked"
1366   [(set (match_operand:SVE_2 0 "register_operand" "=w, w")
1367         (unspec:SVE_2
1368           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1369            (match_operand:DI 1 "register_operand" "rk, rk")
1370            (unspec:VNx2DI
1371              [(match_operand 6)
1372               (ANY_EXTEND:VNx2DI
1373                 (match_operand:VNx2SI 2 "register_operand" "w, w"))]
1374              UNSPEC_PRED_X)
1375            (match_operand:DI 3 "const_int_operand")
1376            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
1377            (mem:BLK (scratch))]
1378           UNSPEC_LD1_GATHER))]
1379   "TARGET_SVE"
1380   "@
1381    ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw]
1382    ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw %p4]"
1383   "&& !CONSTANT_P (operands[6])"
1384   {
1385     operands[6] = CONSTM1_RTX (VNx2BImode);
1386   }
1387 )
1388
1389 ;; Likewise, but with the offset being truncated to 32 bits and then
1390 ;; sign-extended.
1391 (define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_sxtw"
1392   [(set (match_operand:SVE_2 0 "register_operand" "=w, w")
1393         (unspec:SVE_2
1394           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1395            (match_operand:DI 1 "register_operand" "rk, rk")
1396            (unspec:VNx2DI
1397              [(match_operand 6)
1398               (sign_extend:VNx2DI
1399                 (truncate:VNx2SI
1400                   (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1401              UNSPEC_PRED_X)
1402            (match_operand:DI 3 "const_int_operand")
1403            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
1404            (mem:BLK (scratch))]
1405           UNSPEC_LD1_GATHER))]
1406   "TARGET_SVE"
1407   "@
1408    ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1409    ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1410   "&& !CONSTANT_P (operands[6])"
1411   {
1412     operands[6] = CONSTM1_RTX (VNx2BImode);
1413   }
1414 )
1415
1416 ;; Likewise, but with the offset being truncated to 32 bits and then
1417 ;; zero-extended.
1418 (define_insn "*mask_gather_load<mode><v_int_container>_uxtw"
1419   [(set (match_operand:SVE_2 0 "register_operand" "=w, w")
1420         (unspec:SVE_2
1421           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1422            (match_operand:DI 1 "register_operand" "rk, rk")
1423            (and:VNx2DI
1424              (match_operand:VNx2DI 2 "register_operand" "w, w")
1425              (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1426            (match_operand:DI 3 "const_int_operand")
1427            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
1428            (mem:BLK (scratch))]
1429           UNSPEC_LD1_GATHER))]
1430   "TARGET_SVE"
1431   "@
1432    ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1433    ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1434 )
1435
1436 ;; -------------------------------------------------------------------------
1437 ;; ---- Extending gather loads
1438 ;; -------------------------------------------------------------------------
1439 ;; Includes gather forms of:
1440 ;; - LD1B
1441 ;; - LD1H
1442 ;; - LD1SB
1443 ;; - LD1SH
1444 ;; - LD1SW
1445 ;; - LD1W
1446 ;; -------------------------------------------------------------------------
1447
1448 ;; Predicated extending gather loads for 32-bit elements.  Operand 3 is
1449 ;; true for unsigned extension and false for signed extension.
1450 (define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_4HSI:mode><SVE_4BHI:mode>"
1451   [(set (match_operand:SVE_4HSI 0 "register_operand" "=w, w, w, w, w, w")
1452         (unspec:SVE_4HSI
1453           [(match_operand:VNx4BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm, UplDnm, UplDnm")
1454            (ANY_EXTEND:SVE_4HSI
1455              (unspec:SVE_4BHI
1456                [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1457                 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_4BHI:Vesize>" "Z, vg<SVE_4BHI:Vesize>, rk, rk, rk, rk")
1458                 (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w")
1459                 (match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
1460                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_4BHI:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1461                 (mem:BLK (scratch))]
1462                UNSPEC_LD1_GATHER))]
1463           UNSPEC_PRED_X))]
1464   "TARGET_SVE && (~<SVE_4HSI:narrower_mask> & <SVE_4BHI:self_mask>) == 0"
1465   "@
1466    ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s]
1467    ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s, #%1]
1468    ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1469    ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1470    ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1471    ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1472   "&& !CONSTANT_P (operands[6])"
1473   {
1474     operands[6] = CONSTM1_RTX (VNx4BImode);
1475   }
1476 )
1477
1478 ;; Predicated extending gather loads for 64-bit elements.  The value of
1479 ;; operand 3 doesn't matter in this case.
1480 (define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>"
1481   [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w, w, w")
1482         (unspec:SVE_2HSDI
1483           [(match_operand:VNx2BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm")
1484            (ANY_EXTEND:SVE_2HSDI
1485              (unspec:SVE_2BHSI
1486                [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1487                 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_2BHSI:Vesize>" "Z, vg<SVE_2BHSI:Vesize>, rk, rk")
1488                 (match_operand:VNx2DI 2 "register_operand" "w, w, w, w")
1489                 (match_operand:DI 3 "const_int_operand")
1490                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, Ui1, Ui1, i")
1491                 (mem:BLK (scratch))]
1492                UNSPEC_LD1_GATHER))]
1493           UNSPEC_PRED_X))]
1494   "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1495   "@
1496    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d]
1497    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d, #%1]
1498    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d]
1499    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1500   "&& !CONSTANT_P (operands[6])"
1501   {
1502     operands[6] = CONSTM1_RTX (VNx2BImode);
1503   }
1504 )
1505
1506 ;; Likewise, but with the offset being extended from 32 bits.
1507 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_<ANY_EXTEND2:su>xtw_unpacked"
1508   [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
1509         (unspec:SVE_2HSDI
1510           [(match_operand 6)
1511            (ANY_EXTEND:SVE_2HSDI
1512              (unspec:SVE_2BHSI
1513                [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1514                 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1515                 (unspec:VNx2DI
1516                   [(match_operand 7)
1517                    (ANY_EXTEND2:VNx2DI
1518                      (match_operand:VNx2SI 2 "register_operand" "w, w"))]
1519                   UNSPEC_PRED_X)
1520                 (match_operand:DI 3 "const_int_operand")
1521                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
1522                 (mem:BLK (scratch))]
1523                UNSPEC_LD1_GATHER))]
1524           UNSPEC_PRED_X))]
1525   "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1526   "@
1527    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw]
1528    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw %p4]"
1529   "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1530   {
1531     operands[6] = CONSTM1_RTX (VNx2BImode);
1532     operands[7] = CONSTM1_RTX (VNx2BImode);
1533   }
1534 )
1535
1536 ;; Likewise, but with the offset being truncated to 32 bits and then
1537 ;; sign-extended.
1538 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_sxtw"
1539   [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
1540         (unspec:SVE_2HSDI
1541           [(match_operand 6)
1542            (ANY_EXTEND:SVE_2HSDI
1543              (unspec:SVE_2BHSI
1544                [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1545                 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1546                 (unspec:VNx2DI
1547                   [(match_operand 7)
1548                    (sign_extend:VNx2DI
1549                      (truncate:VNx2SI
1550                        (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1551                   UNSPEC_PRED_X)
1552                 (match_operand:DI 3 "const_int_operand")
1553                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
1554                 (mem:BLK (scratch))]
1555                UNSPEC_LD1_GATHER))]
1556           UNSPEC_PRED_X))]
1557   "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1558   "@
1559    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1560    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1561   "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1562   {
1563     operands[6] = CONSTM1_RTX (VNx2BImode);
1564     operands[7] = CONSTM1_RTX (VNx2BImode);
1565   }
1566 )
1567
1568 ;; Likewise, but with the offset being truncated to 32 bits and then
1569 ;; zero-extended.
1570 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_uxtw"
1571   [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
1572         (unspec:SVE_2HSDI
1573           [(match_operand 7)
1574            (ANY_EXTEND:SVE_2HSDI
1575              (unspec:SVE_2BHSI
1576                [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1577                 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1578                 (and:VNx2DI
1579                   (match_operand:VNx2DI 2 "register_operand" "w, w")
1580                   (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1581                 (match_operand:DI 3 "const_int_operand")
1582                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
1583                 (mem:BLK (scratch))]
1584                UNSPEC_LD1_GATHER))]
1585           UNSPEC_PRED_X))]
1586   "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1587   "@
1588    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1589    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1590   "&& !CONSTANT_P (operands[7])"
1591   {
1592     operands[7] = CONSTM1_RTX (VNx2BImode);
1593   }
1594 )
1595
1596 ;; -------------------------------------------------------------------------
1597 ;; ---- First-faulting gather loads
1598 ;; -------------------------------------------------------------------------
1599 ;; Includes gather forms of:
1600 ;; - LDFF1D
1601 ;; - LDFF1W
1602 ;; -------------------------------------------------------------------------
1603
1604 ;; Predicated first-faulting gather loads for 32-bit elements.  Operand
1605 ;; 3 is true for unsigned extension and false for signed extension.
1606 (define_insn "@aarch64_ldff1_gather<mode>"
1607   [(set (match_operand:SVE_FULL_S 0 "register_operand" "=w, w, w, w, w, w")
1608         (unspec:SVE_FULL_S
1609           [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1610            (match_operand:DI 1 "aarch64_sve_gather_offset_w" "Z, vgw, rk, rk, rk, rk")
1611            (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w")
1612            (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
1613            (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, Ui1, i, i")
1614            (mem:BLK (scratch))
1615            (reg:VNx16BI FFRT_REGNUM)]
1616           UNSPEC_LDFF1_GATHER))]
1617   "TARGET_SVE"
1618   "@
1619    ldff1w\t%0.s, %5/z, [%2.s]
1620    ldff1w\t%0.s, %5/z, [%2.s, #%1]
1621    ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw]
1622    ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw]
1623    ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1624    ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1625 )
1626
1627 ;; Predicated first-faulting gather loads for 64-bit elements.  The value
1628 ;; of operand 3 doesn't matter in this case.
1629 (define_insn "@aarch64_ldff1_gather<mode>"
1630   [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w, w, w")
1631         (unspec:SVE_FULL_D
1632           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1633            (match_operand:DI 1 "aarch64_sve_gather_offset_d" "Z, vgd, rk, rk")
1634            (match_operand:VNx2DI 2 "register_operand" "w, w, w, w")
1635            (match_operand:DI 3 "const_int_operand")
1636            (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, Ui1, i")
1637            (mem:BLK (scratch))
1638            (reg:VNx16BI FFRT_REGNUM)]
1639           UNSPEC_LDFF1_GATHER))]
1640   "TARGET_SVE"
1641   "@
1642    ldff1d\t%0.d, %5/z, [%2.d]
1643    ldff1d\t%0.d, %5/z, [%2.d, #%1]
1644    ldff1d\t%0.d, %5/z, [%1, %2.d]
1645    ldff1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1646 )
1647
1648 ;; Likewise, but with the offset being sign-extended from 32 bits.
1649 (define_insn_and_rewrite "*aarch64_ldff1_gather<mode>_sxtw"
1650   [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w")
1651         (unspec:SVE_FULL_D
1652           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1653            (match_operand:DI 1 "register_operand" "rk, rk")
1654            (unspec:VNx2DI
1655              [(match_operand 6)
1656               (sign_extend:VNx2DI
1657                 (truncate:VNx2SI
1658                   (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1659              UNSPEC_PRED_X)
1660            (match_operand:DI 3 "const_int_operand")
1661            (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i")
1662            (mem:BLK (scratch))
1663            (reg:VNx16BI FFRT_REGNUM)]
1664           UNSPEC_LDFF1_GATHER))]
1665   "TARGET_SVE"
1666   "@
1667    ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw]
1668    ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1669   "&& !CONSTANT_P (operands[6])"
1670   {
1671     operands[6] = CONSTM1_RTX (VNx2BImode);
1672   }
1673 )
1674
1675 ;; Likewise, but with the offset being zero-extended from 32 bits.
1676 (define_insn "*aarch64_ldff1_gather<mode>_uxtw"
1677   [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w")
1678         (unspec:SVE_FULL_D
1679           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1680            (match_operand:DI 1 "register_operand" "rk, rk")
1681            (and:VNx2DI
1682              (match_operand:VNx2DI 2 "register_operand" "w, w")
1683              (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1684            (match_operand:DI 3 "const_int_operand")
1685            (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i")
1686            (mem:BLK (scratch))
1687            (reg:VNx16BI FFRT_REGNUM)]
1688           UNSPEC_LDFF1_GATHER))]
1689   "TARGET_SVE"
1690   "@
1691    ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw]
1692    ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1693 )
1694
1695 ;; -------------------------------------------------------------------------
1696 ;; ---- First-faulting extending gather loads
1697 ;; -------------------------------------------------------------------------
1698 ;; Includes gather forms of:
1699 ;; - LDFF1B
1700 ;; - LDFF1H
1701 ;; - LDFF1SB
1702 ;; - LDFF1SH
1703 ;; - LDFF1SW
1704 ;; - LDFF1W
1705 ;; -------------------------------------------------------------------------
1706
1707 ;; Predicated extending first-faulting gather loads for 32-bit elements.
1708 ;; Operand 3 is true for unsigned extension and false for signed extension.
1709 (define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>"
1710   [(set (match_operand:VNx4_WIDE 0 "register_operand" "=w, w, w, w, w, w")
1711         (unspec:VNx4_WIDE
1712           [(match_operand:VNx4BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm, UplDnm, UplDnm")
1713            (ANY_EXTEND:VNx4_WIDE
1714              (unspec:VNx4_NARROW
1715                [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1716                 (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk")
1717                 (match_operand:VNx4_WIDE 2 "register_operand" "w, w, w, w, w, w")
1718                 (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
1719                 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1720                 (mem:BLK (scratch))
1721                 (reg:VNx16BI FFRT_REGNUM)]
1722                UNSPEC_LDFF1_GATHER))]
1723           UNSPEC_PRED_X))]
1724   "TARGET_SVE"
1725   "@
1726    ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s]
1727    ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s, #%1]
1728    ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1729    ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1730    ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1731    ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1732   "&& !CONSTANT_P (operands[6])"
1733   {
1734     operands[6] = CONSTM1_RTX (VNx4BImode);
1735   }
1736 )
1737
1738 ;; Predicated extending first-faulting gather loads for 64-bit elements.
1739 ;; The value of operand 3 doesn't matter in this case.
1740 (define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>"
1741   [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w, w, w")
1742         (unspec:VNx2_WIDE
1743           [(match_operand:VNx2BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm")
1744            (ANY_EXTEND:VNx2_WIDE
1745              (unspec:VNx2_NARROW
1746                [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1747                 (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk")
1748                 (match_operand:VNx2_WIDE 2 "register_operand" "w, w, w, w")
1749                 (match_operand:DI 3 "const_int_operand")
1750                 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i")
1751                 (mem:BLK (scratch))
1752                 (reg:VNx16BI FFRT_REGNUM)]
1753                UNSPEC_LDFF1_GATHER))]
1754           UNSPEC_PRED_X))]
1755   "TARGET_SVE"
1756   "@
1757    ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d]
1758    ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d, #%1]
1759    ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d]
1760    ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1761   "&& !CONSTANT_P (operands[6])"
1762   {
1763     operands[6] = CONSTM1_RTX (VNx2BImode);
1764   }
1765 )
1766
1767 ;; Likewise, but with the offset being sign-extended from 32 bits.
1768 (define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_sxtw"
1769   [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w")
1770         (unspec:VNx2_WIDE
1771           [(match_operand 6)
1772            (ANY_EXTEND:VNx2_WIDE
1773              (unspec:VNx2_NARROW
1774                [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1775                 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1776                 (unspec:VNx2DI
1777                   [(match_operand 7)
1778                    (sign_extend:VNx2DI
1779                      (truncate:VNx2SI
1780                        (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1781                   UNSPEC_PRED_X)
1782                 (match_operand:DI 3 "const_int_operand")
1783                 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
1784                 (mem:BLK (scratch))
1785                 (reg:VNx16BI FFRT_REGNUM)]
1786                UNSPEC_LDFF1_GATHER))]
1787           UNSPEC_PRED_X))]
1788   "TARGET_SVE"
1789   "@
1790    ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1791    ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1792   "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1793   {
1794     operands[6] = CONSTM1_RTX (VNx2BImode);
1795     operands[7] = CONSTM1_RTX (VNx2BImode);
1796   }
1797 )
1798
1799 ;; Likewise, but with the offset being zero-extended from 32 bits.
1800 (define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_uxtw"
1801   [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w")
1802         (unspec:VNx2_WIDE
1803           [(match_operand 7)
1804            (ANY_EXTEND:VNx2_WIDE
1805              (unspec:VNx2_NARROW
1806                [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1807                 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1808                 (and:VNx2DI
1809                   (match_operand:VNx2DI 2 "register_operand" "w, w")
1810                   (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1811                 (match_operand:DI 3 "const_int_operand")
1812                 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
1813                 (mem:BLK (scratch))
1814                 (reg:VNx16BI FFRT_REGNUM)]
1815                UNSPEC_LDFF1_GATHER))]
1816           UNSPEC_PRED_X))]
1817   "TARGET_SVE"
1818   "@
1819    ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1820    ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1821   "&& !CONSTANT_P (operands[7])"
1822   {
1823     operands[7] = CONSTM1_RTX (VNx2BImode);
1824   }
1825 )
1826
1827 ;; =========================================================================
1828 ;; == Prefetches
1829 ;; =========================================================================
1830
1831 ;; -------------------------------------------------------------------------
1832 ;; ---- Contiguous prefetches
1833 ;; -------------------------------------------------------------------------
1834 ;; Includes contiguous forms of:
1835 ;; - PRFB
1836 ;; - PRFD
1837 ;; - PRFH
1838 ;; - PRFW
1839 ;; -------------------------------------------------------------------------
1840
1841 ;; Contiguous predicated prefetches.  Operand 2 gives the real prefetch
1842 ;; operation (as an svprfop), with operands 3 and 4 providing distilled
1843 ;; information.
1844 (define_insn "@aarch64_sve_prefetch<mode>"
1845   [(prefetch (unspec:DI
1846                [(match_operand:<VPRED> 0 "register_operand" "Upl")
1847                 (match_operand:SVE_FULL_I 1 "aarch64_sve_prefetch_operand" "UP<Vesize>")
1848                 (match_operand:DI 2 "const_int_operand")]
1849                UNSPEC_SVE_PREFETCH)
1850              (match_operand:DI 3 "const_int_operand")
1851              (match_operand:DI 4 "const_int_operand"))]
1852   "TARGET_SVE"
1853   {
1854     operands[1] = gen_rtx_MEM (<MODE>mode, operands[1]);
1855     return aarch64_output_sve_prefetch ("prf<Vesize>", operands[2], "%0, %1");
1856   }
1857 )
1858
1859 ;; -------------------------------------------------------------------------
1860 ;; ---- Gather prefetches
1861 ;; -------------------------------------------------------------------------
1862 ;; Includes gather forms of:
1863 ;; - PRFB
1864 ;; - PRFD
1865 ;; - PRFH
1866 ;; - PRFW
1867 ;; -------------------------------------------------------------------------
1868
1869 ;; Predicated gather prefetches for 32-bit bases and offsets.  The operands
1870 ;; are:
1871 ;; 0: the governing predicate
1872 ;; 1: the scalar component of the address
1873 ;; 2: the vector component of the address
1874 ;; 3: 1 for zero extension, 0 for sign extension
1875 ;; 4: the scale multiplier
1876 ;; 5: a vector zero that identifies the mode of data being accessed
1877 ;; 6: the prefetch operator (an svprfop)
1878 ;; 7: the normal RTL prefetch rw flag
1879 ;; 8: the normal RTL prefetch locality value
1880 (define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx4SI_ONLY:mode>"
1881   [(prefetch (unspec:DI
1882                [(match_operand:VNx4BI 0 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1883                 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk, rk, rk")
1884                 (match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w, w, w, w")
1885                 (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
1886                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1887                 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
1888                 (match_operand:DI 6 "const_int_operand")]
1889                UNSPEC_SVE_PREFETCH_GATHER)
1890              (match_operand:DI 7 "const_int_operand")
1891              (match_operand:DI 8 "const_int_operand"))]
1892   "TARGET_SVE"
1893   {
1894     static const char *const insns[][2] = {
1895       "prf<SVE_FULL_I:Vesize>", "%0, [%2.s]",
1896       "prf<SVE_FULL_I:Vesize>", "%0, [%2.s, #%1]",
1897       "prfb", "%0, [%1, %2.s, sxtw]",
1898       "prfb", "%0, [%1, %2.s, uxtw]",
1899       "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, sxtw %p4]",
1900       "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, uxtw %p4]"
1901     };
1902     const char *const *parts = insns[which_alternative];
1903     return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
1904   }
1905 )
1906
1907 ;; Predicated gather prefetches for 64-bit elements.  The value of operand 3
1908 ;; doesn't matter in this case.
1909 (define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>"
1910   [(prefetch (unspec:DI
1911                [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl, Upl, Upl")
1912                 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk")
1913                 (match_operand:VNx2DI_ONLY 2 "register_operand" "w, w, w, w")
1914                 (match_operand:DI 3 "const_int_operand")
1915                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, i")
1916                 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
1917                 (match_operand:DI 6 "const_int_operand")]
1918                UNSPEC_SVE_PREFETCH_GATHER)
1919              (match_operand:DI 7 "const_int_operand")
1920              (match_operand:DI 8 "const_int_operand"))]
1921   "TARGET_SVE"
1922   {
1923     static const char *const insns[][2] = {
1924       "prf<SVE_FULL_I:Vesize>", "%0, [%2.d]",
1925       "prf<SVE_FULL_I:Vesize>", "%0, [%2.d, #%1]",
1926       "prfb", "%0, [%1, %2.d]",
1927       "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, lsl %p4]"
1928     };
1929     const char *const *parts = insns[which_alternative];
1930     return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
1931   }
1932 )
1933
1934 ;; Likewise, but with the offset being sign-extended from 32 bits.
1935 (define_insn_and_rewrite "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_sxtw"
1936   [(prefetch (unspec:DI
1937                [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl")
1938                 (match_operand:DI 1 "register_operand" "rk, rk")
1939                 (unspec:VNx2DI_ONLY
1940                   [(match_operand 9)
1941                    (sign_extend:VNx2DI
1942                      (truncate:VNx2SI
1943                        (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1944                   UNSPEC_PRED_X)
1945                 (match_operand:DI 3 "const_int_operand")
1946                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i")
1947                 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
1948                 (match_operand:DI 6 "const_int_operand")]
1949                UNSPEC_SVE_PREFETCH_GATHER)
1950              (match_operand:DI 7 "const_int_operand")
1951              (match_operand:DI 8 "const_int_operand"))]
1952   "TARGET_SVE"
1953   {
1954     static const char *const insns[][2] = {
1955       "prfb", "%0, [%1, %2.d, sxtw]",
1956       "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, sxtw %p4]"
1957     };
1958     const char *const *parts = insns[which_alternative];
1959     return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
1960   }
1961   "&& !rtx_equal_p (operands[0], operands[9])"
1962   {
1963     operands[9] = copy_rtx (operands[0]);
1964   }
1965 )
1966
1967 ;; Likewise, but with the offset being zero-extended from 32 bits.
1968 (define_insn "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_uxtw"
1969   [(prefetch (unspec:DI
1970                [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl")
1971                 (match_operand:DI 1 "register_operand" "rk, rk")
1972                 (and:VNx2DI_ONLY
1973                   (match_operand:VNx2DI 2 "register_operand" "w, w")
1974                   (match_operand:VNx2DI 9 "aarch64_sve_uxtw_immediate"))
1975                 (match_operand:DI 3 "const_int_operand")
1976                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i")
1977                 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
1978                 (match_operand:DI 6 "const_int_operand")]
1979                UNSPEC_SVE_PREFETCH_GATHER)
1980              (match_operand:DI 7 "const_int_operand")
1981              (match_operand:DI 8 "const_int_operand"))]
1982   "TARGET_SVE"
1983   {
1984     static const char *const insns[][2] = {
1985       "prfb", "%0, [%1, %2.d, uxtw]",
1986       "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, uxtw %p4]"
1987     };
1988     const char *const *parts = insns[which_alternative];
1989     return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
1990   }
1991 )
1992
1993 ;; =========================================================================
1994 ;; == Stores
1995 ;; =========================================================================
1996
1997 ;; -------------------------------------------------------------------------
1998 ;; ---- Normal contiguous stores
1999 ;; -------------------------------------------------------------------------
2000 ;; Includes contiguous forms of:
2001 ;; - ST1B
2002 ;; - ST1D
2003 ;; - ST1H
2004 ;; - ST1W
2005 ;; - ST2B
2006 ;; - ST2D
2007 ;; - ST2H
2008 ;; - ST2W
2009 ;; - ST3B
2010 ;; - ST3D
2011 ;; - ST3H
2012 ;; - ST3W
2013 ;; - ST4B
2014 ;; - ST4D
2015 ;; - ST4H
2016 ;; - ST4W
2017 ;; -------------------------------------------------------------------------
2018
2019 ;; Predicated ST1.
2020 (define_insn "maskstore<mode><vpred>"
2021   [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
2022         (unspec:SVE_ALL
2023           [(match_operand:<VPRED> 2 "register_operand" "Upl")
2024            (match_operand:SVE_ALL 1 "register_operand" "w")
2025            (match_dup 0)]
2026           UNSPEC_ST1_SVE))]
2027   "TARGET_SVE"
2028   "st1<Vesize>\t%1.<Vctype>, %2, %0"
2029 )
2030
2031 ;; Unpredicated ST[234].  This is always a full update, so the dependence
2032 ;; on the old value of the memory location (via (match_dup 0)) is redundant.
2033 ;; There doesn't seem to be any obvious benefit to treating the all-true
2034 ;; case differently though.  In particular, it's very unlikely that we'll
2035 ;; only find out during RTL that a store_lanes is dead.
2036 (define_expand "vec_store_lanes<mode><vsingle>"
2037   [(set (match_operand:SVE_STRUCT 0 "memory_operand")
2038         (unspec:SVE_STRUCT
2039           [(match_dup 2)
2040            (match_operand:SVE_STRUCT 1 "register_operand")
2041            (match_dup 0)]
2042           UNSPEC_STN))]
2043   "TARGET_SVE"
2044   {
2045     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2046   }
2047 )
2048
2049 ;; Predicated ST[234].
2050 (define_insn "vec_mask_store_lanes<mode><vsingle>"
2051   [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m")
2052         (unspec:SVE_STRUCT
2053           [(match_operand:<VPRED> 2 "register_operand" "Upl")
2054            (match_operand:SVE_STRUCT 1 "register_operand" "w")
2055            (match_dup 0)]
2056           UNSPEC_STN))]
2057   "TARGET_SVE"
2058   "st<vector_count><Vesize>\t%1, %2, %0"
2059 )
2060
2061 ;; -------------------------------------------------------------------------
2062 ;; ---- Truncating contiguous stores
2063 ;; -------------------------------------------------------------------------
2064 ;; Includes:
2065 ;; - ST1B
2066 ;; - ST1H
2067 ;; - ST1W
2068 ;; -------------------------------------------------------------------------
2069
2070 ;; Predicated truncate and store, with 8 elements per 128-bit block.
2071 (define_insn "@aarch64_store_trunc<VNx8_NARROW:mode><VNx8_WIDE:mode>"
2072   [(set (match_operand:VNx8_NARROW 0 "memory_operand" "+m")
2073         (unspec:VNx8_NARROW
2074           [(match_operand:VNx8BI 2 "register_operand" "Upl")
2075            (truncate:VNx8_NARROW
2076              (match_operand:VNx8_WIDE 1 "register_operand" "w"))
2077            (match_dup 0)]
2078           UNSPEC_ST1_SVE))]
2079   "TARGET_SVE"
2080   "st1<VNx8_NARROW:Vesize>\t%1.<VNx8_WIDE:Vetype>, %2, %0"
2081 )
2082
2083 ;; Predicated truncate and store, with 4 elements per 128-bit block.
2084 (define_insn "@aarch64_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>"
2085   [(set (match_operand:VNx4_NARROW 0 "memory_operand" "+m")
2086         (unspec:VNx4_NARROW
2087           [(match_operand:VNx4BI 2 "register_operand" "Upl")
2088            (truncate:VNx4_NARROW
2089              (match_operand:VNx4_WIDE 1 "register_operand" "w"))
2090            (match_dup 0)]
2091           UNSPEC_ST1_SVE))]
2092   "TARGET_SVE"
2093   "st1<VNx4_NARROW:Vesize>\t%1.<VNx4_WIDE:Vetype>, %2, %0"
2094 )
2095
2096 ;; Predicated truncate and store, with 2 elements per 128-bit block.
2097 (define_insn "@aarch64_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>"
2098   [(set (match_operand:VNx2_NARROW 0 "memory_operand" "+m")
2099         (unspec:VNx2_NARROW
2100           [(match_operand:VNx2BI 2 "register_operand" "Upl")
2101            (truncate:VNx2_NARROW
2102              (match_operand:VNx2_WIDE 1 "register_operand" "w"))
2103            (match_dup 0)]
2104           UNSPEC_ST1_SVE))]
2105   "TARGET_SVE"
2106   "st1<VNx2_NARROW:Vesize>\t%1.<VNx2_WIDE:Vetype>, %2, %0"
2107 )
2108
2109 ;; -------------------------------------------------------------------------
2110 ;; ---- Non-temporal contiguous stores
2111 ;; -------------------------------------------------------------------------
2112 ;; Includes:
2113 ;; - STNT1B
2114 ;; - STNT1D
2115 ;; - STNT1H
2116 ;; - STNT1W
2117 ;; -------------------------------------------------------------------------
2118
2119 (define_insn "@aarch64_stnt1<mode>"
2120   [(set (match_operand:SVE_FULL 0 "memory_operand" "+m")
2121         (unspec:SVE_FULL
2122           [(match_operand:<VPRED> 2 "register_operand" "Upl")
2123            (match_operand:SVE_FULL 1 "register_operand" "w")
2124            (match_dup 0)]
2125           UNSPEC_STNT1_SVE))]
2126   "TARGET_SVE"
2127   "stnt1<Vesize>\t%1.<Vetype>, %2, %0"
2128 )
2129
2130 ;; -------------------------------------------------------------------------
2131 ;; ---- Normal scatter stores
2132 ;; -------------------------------------------------------------------------
2133 ;; Includes scatter forms of:
2134 ;; - ST1D
2135 ;; - ST1W
2136 ;; -------------------------------------------------------------------------
2137
2138 ;; Unpredicated scatter stores.
2139 (define_expand "scatter_store<mode><v_int_container>"
2140   [(set (mem:BLK (scratch))
2141         (unspec:BLK
2142           [(match_dup 5)
2143            (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>")
2144            (match_operand:<V_INT_CONTAINER> 1 "register_operand")
2145            (match_operand:DI 2 "const_int_operand")
2146            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
2147            (match_operand:SVE_24 4 "register_operand")]
2148           UNSPEC_ST1_SCATTER))]
2149   "TARGET_SVE"
2150   {
2151     operands[5] = aarch64_ptrue_reg (<VPRED>mode);
2152   }
2153 )
2154
2155 ;; Predicated scatter stores for 32-bit elements.  Operand 2 is true for
2156 ;; unsigned extension and false for signed extension.
2157 (define_insn "mask_scatter_store<mode><v_int_container>"
2158   [(set (mem:BLK (scratch))
2159         (unspec:BLK
2160           [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
2161            (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk")
2162            (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w")
2163            (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
2164            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
2165            (match_operand:SVE_4 4 "register_operand" "w, w, w, w, w, w")]
2166           UNSPEC_ST1_SCATTER))]
2167   "TARGET_SVE"
2168   "@
2169    st1<Vesize>\t%4.s, %5, [%1.s]
2170    st1<Vesize>\t%4.s, %5, [%1.s, #%0]
2171    st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
2172    st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
2173    st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
2174    st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]"
2175 )
2176
2177 ;; Predicated scatter stores for 64-bit elements.  The value of operand 2
2178 ;; doesn't matter in this case.
2179 (define_insn "mask_scatter_store<mode><v_int_container>"
2180   [(set (mem:BLK (scratch))
2181         (unspec:BLK
2182           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
2183            (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk")
2184            (match_operand:VNx2DI 1 "register_operand" "w, w, w, w")
2185            (match_operand:DI 2 "const_int_operand")
2186            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i")
2187            (match_operand:SVE_2 4 "register_operand" "w, w, w, w")]
2188           UNSPEC_ST1_SCATTER))]
2189   "TARGET_SVE"
2190   "@
2191    st1<Vesize>\t%4.d, %5, [%1.d]
2192    st1<Vesize>\t%4.d, %5, [%1.d, #%0]
2193    st1<Vesize>\t%4.d, %5, [%0, %1.d]
2194    st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]"
2195 )
2196
2197 ;; Likewise, but with the offset being extended from 32 bits.
2198 (define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_<su>xtw_unpacked"
2199   [(set (mem:BLK (scratch))
2200         (unspec:BLK
2201           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2202            (match_operand:DI 0 "register_operand" "rk, rk")
2203            (unspec:VNx2DI
2204              [(match_operand 6)
2205               (ANY_EXTEND:VNx2DI
2206                 (match_operand:VNx2SI 1 "register_operand" "w, w"))]
2207              UNSPEC_PRED_X)
2208            (match_operand:DI 2 "const_int_operand")
2209            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
2210            (match_operand:SVE_2 4 "register_operand" "w, w")]
2211           UNSPEC_ST1_SCATTER))]
2212   "TARGET_SVE"
2213   "@
2214    st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw]
2215    st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw %p3]"
2216   "&& !CONSTANT_P (operands[6])"
2217   {
2218     operands[6] = CONSTM1_RTX (<VPRED>mode);
2219   }
2220 )
2221
2222 ;; Likewise, but with the offset being truncated to 32 bits and then
2223 ;; sign-extended.
2224 (define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_sxtw"
2225   [(set (mem:BLK (scratch))
2226         (unspec:BLK
2227           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2228            (match_operand:DI 0 "register_operand" "rk, rk")
2229            (unspec:VNx2DI
2230              [(match_operand 6)
2231               (sign_extend:VNx2DI
2232                 (truncate:VNx2SI
2233                   (match_operand:VNx2DI 1 "register_operand" "w, w")))]
2234              UNSPEC_PRED_X)
2235            (match_operand:DI 2 "const_int_operand")
2236            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
2237            (match_operand:SVE_2 4 "register_operand" "w, w")]
2238           UNSPEC_ST1_SCATTER))]
2239   "TARGET_SVE"
2240   "@
2241    st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw]
2242    st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]"
2243   "&& !CONSTANT_P (operands[6])"
2244   {
2245     operands[6] = CONSTM1_RTX (<VPRED>mode);
2246   }
2247 )
2248
2249 ;; Likewise, but with the offset being truncated to 32 bits and then
2250 ;; zero-extended.
2251 (define_insn "*mask_scatter_store<mode><v_int_container>_uxtw"
2252   [(set (mem:BLK (scratch))
2253         (unspec:BLK
2254           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2255            (match_operand:DI 0 "aarch64_reg_or_zero" "rk, rk")
2256            (and:VNx2DI
2257              (match_operand:VNx2DI 1 "register_operand" "w, w")
2258              (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
2259            (match_operand:DI 2 "const_int_operand")
2260            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
2261            (match_operand:SVE_2 4 "register_operand" "w, w")]
2262           UNSPEC_ST1_SCATTER))]
2263   "TARGET_SVE"
2264   "@
2265    st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw]
2266    st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]"
2267 )
2268
2269 ;; -------------------------------------------------------------------------
2270 ;; ---- Truncating scatter stores
2271 ;; -------------------------------------------------------------------------
2272 ;; Includes scatter forms of:
2273 ;; - ST1B
2274 ;; - ST1H
2275 ;; - ST1W
2276 ;; -------------------------------------------------------------------------
2277
2278 ;; Predicated truncating scatter stores for 32-bit elements.  Operand 2 is
2279 ;; true for unsigned extension and false for signed extension.
2280 (define_insn "@aarch64_scatter_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>"
2281   [(set (mem:BLK (scratch))
2282         (unspec:BLK
2283           [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
2284            (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk")
2285            (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w")
2286            (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
2287            (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
2288            (truncate:VNx4_NARROW
2289              (match_operand:VNx4_WIDE 4 "register_operand" "w, w, w, w, w, w"))]
2290           UNSPEC_ST1_SCATTER))]
2291   "TARGET_SVE"
2292   "@
2293    st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s]
2294    st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s, #%0]
2295    st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
2296    st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
2297    st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
2298    st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]"
2299 )
2300
2301 ;; Predicated truncating scatter stores for 64-bit elements.  The value of
2302 ;; operand 2 doesn't matter in this case.
2303 (define_insn "@aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>"
2304   [(set (mem:BLK (scratch))
2305         (unspec:BLK
2306           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
2307            (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk")
2308            (match_operand:VNx2DI 1 "register_operand" "w, w, w, w")
2309            (match_operand:DI 2 "const_int_operand")
2310            (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i")
2311            (truncate:VNx2_NARROW
2312              (match_operand:VNx2_WIDE 4 "register_operand" "w, w, w, w"))]
2313           UNSPEC_ST1_SCATTER))]
2314   "TARGET_SVE"
2315   "@
2316    st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d]
2317    st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d, #%0]
2318    st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d]
2319    st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]"
2320 )
2321
2322 ;; Likewise, but with the offset being sign-extended from 32 bits.
2323 (define_insn_and_rewrite "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_sxtw"
2324   [(set (mem:BLK (scratch))
2325         (unspec:BLK
2326           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2327            (match_operand:DI 0 "register_operand" "rk, rk")
2328            (unspec:VNx2DI
2329              [(match_operand 6)
2330               (sign_extend:VNx2DI
2331                 (truncate:VNx2SI
2332                   (match_operand:VNx2DI 1 "register_operand" "w, w")))]
2333              UNSPEC_PRED_X)
2334            (match_operand:DI 2 "const_int_operand")
2335            (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
2336            (truncate:VNx2_NARROW
2337              (match_operand:VNx2_WIDE 4 "register_operand" "w, w"))]
2338           UNSPEC_ST1_SCATTER))]
2339   "TARGET_SVE"
2340   "@
2341    st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw]
2342    st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]"
2343   "&& !rtx_equal_p (operands[5], operands[6])"
2344   {
2345     operands[6] = copy_rtx (operands[5]);
2346   }
2347 )
2348
2349 ;; Likewise, but with the offset being zero-extended from 32 bits.
2350 (define_insn "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_uxtw"
2351   [(set (mem:BLK (scratch))
2352         (unspec:BLK
2353           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2354            (match_operand:DI 0 "aarch64_reg_or_zero" "rk, rk")
2355            (and:VNx2DI
2356              (match_operand:VNx2DI 1 "register_operand" "w, w")
2357              (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
2358            (match_operand:DI 2 "const_int_operand")
2359            (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
2360            (truncate:VNx2_NARROW
2361              (match_operand:VNx2_WIDE 4 "register_operand" "w, w"))]
2362           UNSPEC_ST1_SCATTER))]
2363   "TARGET_SVE"
2364   "@
2365    st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw]
2366    st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]"
2367 )
2368
2369 ;; =========================================================================
2370 ;; == Vector creation
2371 ;; =========================================================================
2372
2373 ;; -------------------------------------------------------------------------
2374 ;; ---- [INT,FP] Duplicate element
2375 ;; -------------------------------------------------------------------------
2376 ;; Includes:
2377 ;; - DUP
2378 ;; - MOV
2379 ;; - LD1RB
2380 ;; - LD1RD
2381 ;; - LD1RH
2382 ;; - LD1RW
2383 ;; - LD1RQB
2384 ;; - LD1RQD
2385 ;; - LD1RQH
2386 ;; - LD1RQW
2387 ;; -------------------------------------------------------------------------
2388
2389 (define_expand "vec_duplicate<mode>"
2390   [(parallel
2391     [(set (match_operand:SVE_ALL 0 "register_operand")
2392           (vec_duplicate:SVE_ALL
2393             (match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
2394      (clobber (scratch:VNx16BI))])]
2395   "TARGET_SVE"
2396   {
2397     if (MEM_P (operands[1]))
2398       {
2399         rtx ptrue = aarch64_ptrue_reg (<VPRED>mode);
2400         emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1],
2401                                        CONST0_RTX (<MODE>mode)));
2402         DONE;
2403       }
2404   }
2405 )
2406
2407 ;; Accept memory operands for the benefit of combine, and also in case
2408 ;; the scalar input gets spilled to memory during RA.  We want to split
2409 ;; the load at the first opportunity in order to allow the PTRUE to be
2410 ;; optimized with surrounding code.
2411 (define_insn_and_split "*vec_duplicate<mode>_reg"
2412   [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w")
2413         (vec_duplicate:SVE_ALL
2414           (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty")))
2415    (clobber (match_scratch:VNx16BI 2 "=X, X, Upl"))]
2416   "TARGET_SVE"
2417   "@
2418    mov\t%0.<Vetype>, %<vwcore>1
2419    mov\t%0.<Vetype>, %<Vetype>1
2420    #"
2421   "&& MEM_P (operands[1])"
2422   [(const_int 0)]
2423   {
2424     if (GET_CODE (operands[2]) == SCRATCH)
2425       operands[2] = gen_reg_rtx (VNx16BImode);
2426     emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
2427     rtx gp = gen_lowpart (<VPRED>mode, operands[2]);
2428     emit_insn (gen_sve_ld1r<mode> (operands[0], gp, operands[1],
2429                                    CONST0_RTX (<MODE>mode)));
2430     DONE;
2431   }
2432   [(set_attr "length" "4,4,8")]
2433 )
2434
2435 ;; Duplicate an Advanced SIMD vector to fill an SVE vector (LE version).
2436 (define_insn "@aarch64_vec_duplicate_vq<mode>_le"
2437   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2438         (vec_duplicate:SVE_FULL
2439           (match_operand:<V128> 1 "register_operand" "w")))]
2440   "TARGET_SVE && !BYTES_BIG_ENDIAN"
2441   {
2442     operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
2443     return "dup\t%0.q, %1.q[0]";
2444   }
2445 )
2446
2447 ;; Duplicate an Advanced SIMD vector to fill an SVE vector (BE version).
2448 ;; The SVE register layout puts memory lane N into (architectural)
2449 ;; register lane N, whereas the Advanced SIMD layout puts the memory
2450 ;; lsb into the register lsb.  We therefore have to describe this in rtl
2451 ;; terms as a reverse of the V128 vector followed by a duplicate.
2452 (define_insn "@aarch64_vec_duplicate_vq<mode>_be"
2453   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2454         (vec_duplicate:SVE_FULL
2455           (vec_select:<V128>
2456             (match_operand:<V128> 1 "register_operand" "w")
2457             (match_operand 2 "descending_int_parallel"))))]
2458   "TARGET_SVE
2459    && BYTES_BIG_ENDIAN
2460    && known_eq (INTVAL (XVECEXP (operands[2], 0, 0)),
2461                 GET_MODE_NUNITS (<V128>mode) - 1)"
2462   {
2463     operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
2464     return "dup\t%0.q, %1.q[0]";
2465   }
2466 )
2467
2468 ;; This is used for vec_duplicate<mode>s from memory, but can also
2469 ;; be used by combine to optimize selects of a a vec_duplicate<mode>
2470 ;; with zero.
2471 (define_insn "sve_ld1r<mode>"
2472   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
2473         (unspec:SVE_ALL
2474           [(match_operand:<VPRED> 1 "register_operand" "Upl")
2475            (vec_duplicate:SVE_ALL
2476              (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty"))
2477            (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")]
2478           UNSPEC_SEL))]
2479   "TARGET_SVE"
2480   "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2"
2481 )
2482
2483 ;; Load 128 bits from memory under predicate control and duplicate to
2484 ;; fill a vector.
2485 (define_insn "@aarch64_sve_ld1rq<mode>"
2486   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2487         (unspec:SVE_FULL
2488           [(match_operand:<VPRED> 2 "register_operand" "Upl")
2489            (match_operand:<V128> 1 "aarch64_sve_ld1rq_operand" "UtQ")]
2490           UNSPEC_LD1RQ))]
2491   "TARGET_SVE"
2492   {
2493     operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0));
2494     return "ld1rq<Vesize>\t%0.<Vetype>, %2/z, %1";
2495   }
2496 )
2497
2498 ;; -------------------------------------------------------------------------
2499 ;; ---- [INT,FP] Initialize from individual elements
2500 ;; -------------------------------------------------------------------------
2501 ;; Includes:
2502 ;; - INSR
2503 ;; -------------------------------------------------------------------------
2504
2505 (define_expand "vec_init<mode><Vel>"
2506   [(match_operand:SVE_FULL 0 "register_operand")
2507     (match_operand 1 "")]
2508   "TARGET_SVE"
2509   {
2510     aarch64_sve_expand_vector_init (operands[0], operands[1]);
2511     DONE;
2512   }
2513 )
2514
2515 ;; Shift an SVE vector left and insert a scalar into element 0.
2516 (define_insn "vec_shl_insert_<mode>"
2517   [(set (match_operand:SVE_FULL 0 "register_operand" "=?w, w, ??&w, ?&w")
2518         (unspec:SVE_FULL
2519           [(match_operand:SVE_FULL 1 "register_operand" "0, 0, w, w")
2520            (match_operand:<VEL> 2 "aarch64_reg_or_zero" "rZ, w, rZ, w")]
2521           UNSPEC_INSR))]
2522   "TARGET_SVE"
2523   "@
2524    insr\t%0.<Vetype>, %<vwcore>2
2525    insr\t%0.<Vetype>, %<Vetype>2
2526    movprfx\t%0, %1\;insr\t%0.<Vetype>, %<vwcore>2
2527    movprfx\t%0, %1\;insr\t%0.<Vetype>, %<Vetype>2"
2528   [(set_attr "movprfx" "*,*,yes,yes")]
2529 )
2530
2531 ;; -------------------------------------------------------------------------
2532 ;; ---- [INT] Linear series
2533 ;; -------------------------------------------------------------------------
2534 ;; Includes:
2535 ;; - INDEX
2536 ;; -------------------------------------------------------------------------
2537
2538 (define_insn "vec_series<mode>"
2539   [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w")
2540         (vec_series:SVE_I
2541           (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r")
2542           (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))]
2543   "TARGET_SVE"
2544   "@
2545    index\t%0.<Vctype>, #%1, %<vccore>2
2546    index\t%0.<Vctype>, %<vccore>1, #%2
2547    index\t%0.<Vctype>, %<vccore>1, %<vccore>2"
2548 )
2549
2550 ;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range
2551 ;; of an INDEX instruction.
2552 (define_insn "*vec_series<mode>_plus"
2553   [(set (match_operand:SVE_I 0 "register_operand" "=w")
2554         (plus:SVE_I
2555           (vec_duplicate:SVE_I
2556             (match_operand:<VEL> 1 "register_operand" "r"))
2557           (match_operand:SVE_I 2 "immediate_operand")))]
2558   "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])"
2559   {
2560     operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]);
2561     return "index\t%0.<Vctype>, %<vccore>1, #%2";
2562   }
2563 )
2564
2565 ;; -------------------------------------------------------------------------
2566 ;; ---- [PRED] Duplicate element
2567 ;; -------------------------------------------------------------------------
2568 ;; The patterns in this section are synthetic.
2569 ;; -------------------------------------------------------------------------
2570
2571 ;; Implement a predicate broadcast by shifting the low bit of the scalar
2572 ;; input into the top bit and using a WHILELO.  An alternative would be to
2573 ;; duplicate the input and do a compare with zero.
2574 (define_expand "vec_duplicate<mode>"
2575   [(set (match_operand:PRED_ALL 0 "register_operand")
2576         (vec_duplicate:PRED_ALL (match_operand:QI 1 "register_operand")))]
2577   "TARGET_SVE"
2578   {
2579     rtx tmp = gen_reg_rtx (DImode);
2580     rtx op1 = gen_lowpart (DImode, operands[1]);
2581     emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
2582     emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
2583     DONE;
2584   }
2585 )
2586
2587 ;; =========================================================================
2588 ;; == Vector decomposition
2589 ;; =========================================================================
2590
2591 ;; -------------------------------------------------------------------------
2592 ;; ---- [INT,FP] Extract index
2593 ;; -------------------------------------------------------------------------
2594 ;; Includes:
2595 ;; - DUP    (Advanced SIMD)
2596 ;; - DUP    (SVE)
2597 ;; - EXT    (SVE)
2598 ;; - ST1    (Advanced SIMD)
2599 ;; - UMOV   (Advanced SIMD)
2600 ;; -------------------------------------------------------------------------
2601
2602 (define_expand "vec_extract<mode><Vel>"
2603   [(set (match_operand:<VEL> 0 "register_operand")
2604         (vec_select:<VEL>
2605           (match_operand:SVE_FULL 1 "register_operand")
2606           (parallel [(match_operand:SI 2 "nonmemory_operand")])))]
2607   "TARGET_SVE"
2608   {
2609     poly_int64 val;
2610     if (poly_int_rtx_p (operands[2], &val)
2611         && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
2612       {
2613         /* The last element can be extracted with a LASTB and a false
2614            predicate.  */
2615         rtx sel = aarch64_pfalse_reg (<VPRED>mode);
2616         emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
2617         DONE;
2618       }
2619     if (!CONST_INT_P (operands[2]))
2620       {
2621         /* Create an index with operand[2] as the base and -1 as the step.
2622            It will then be zero for the element we care about.  */
2623         rtx index = gen_lowpart (<VEL_INT>mode, operands[2]);
2624         index = force_reg (<VEL_INT>mode, index);
2625         rtx series = gen_reg_rtx (<V_INT_EQUIV>mode);
2626         emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx));
2627
2628         /* Get a predicate that is true for only that element.  */
2629         rtx zero = CONST0_RTX (<V_INT_EQUIV>mode);
2630         rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero);
2631         rtx sel = gen_reg_rtx (<VPRED>mode);
2632         emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero));
2633
2634         /* Select the element using LASTB.  */
2635         emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
2636         DONE;
2637       }
2638   }
2639 )
2640
2641 ;; Extract element zero.  This is a special case because we want to force
2642 ;; the registers to be the same for the second alternative, and then
2643 ;; split the instruction into nothing after RA.
2644 (define_insn_and_split "*vec_extract<mode><Vel>_0"
2645   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2646         (vec_select:<VEL>
2647           (match_operand:SVE_FULL 1 "register_operand" "w, 0, w")
2648           (parallel [(const_int 0)])))]
2649   "TARGET_SVE"
2650   {
2651     operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
2652     switch (which_alternative)
2653       {
2654         case 0:
2655           return "umov\\t%<vwcore>0, %1.<Vetype>[0]";
2656         case 1:
2657           return "#";
2658         case 2:
2659           return "st1\\t{%1.<Vetype>}[0], %0";
2660         default:
2661           gcc_unreachable ();
2662       }
2663   }
2664   "&& reload_completed
2665    && REG_P (operands[0])
2666    && REGNO (operands[0]) == REGNO (operands[1])"
2667   [(const_int 0)]
2668   {
2669     emit_note (NOTE_INSN_DELETED);
2670     DONE;
2671   }
2672   [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")]
2673 )
2674
2675 ;; Extract an element from the Advanced SIMD portion of the register.
2676 ;; We don't just reuse the aarch64-simd.md pattern because we don't
2677 ;; want any change in lane number on big-endian targets.
2678 (define_insn "*vec_extract<mode><Vel>_v128"
2679   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2680         (vec_select:<VEL>
2681           (match_operand:SVE_FULL 1 "register_operand" "w, w, w")
2682           (parallel [(match_operand:SI 2 "const_int_operand")])))]
2683   "TARGET_SVE
2684    && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)"
2685   {
2686     operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
2687     switch (which_alternative)
2688       {
2689         case 0:
2690           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2691         case 1:
2692           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2693         case 2:
2694           return "st1\\t{%1.<Vetype>}[%2], %0";
2695         default:
2696           gcc_unreachable ();
2697       }
2698   }
2699   [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")]
2700 )
2701
2702 ;; Extract an element in the range of DUP.  This pattern allows the
2703 ;; source and destination to be different.
2704 (define_insn "*vec_extract<mode><Vel>_dup"
2705   [(set (match_operand:<VEL> 0 "register_operand" "=w")
2706         (vec_select:<VEL>
2707           (match_operand:SVE_FULL 1 "register_operand" "w")
2708           (parallel [(match_operand:SI 2 "const_int_operand")])))]
2709   "TARGET_SVE
2710    && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)"
2711   {
2712     operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
2713     return "dup\t%0.<Vetype>, %1.<Vetype>[%2]";
2714   }
2715 )
2716
2717 ;; Extract an element outside the range of DUP.  This pattern requires the
2718 ;; source and destination to be the same.
2719 (define_insn "*vec_extract<mode><Vel>_ext"
2720   [(set (match_operand:<VEL> 0 "register_operand" "=w, ?&w")
2721         (vec_select:<VEL>
2722           (match_operand:SVE_FULL 1 "register_operand" "0, w")
2723           (parallel [(match_operand:SI 2 "const_int_operand")])))]
2724   "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64"
2725   {
2726     operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
2727     operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode));
2728     return (which_alternative == 0
2729             ? "ext\t%0.b, %0.b, %0.b, #%2"
2730             : "movprfx\t%0, %1\;ext\t%0.b, %0.b, %1.b, #%2");
2731   }
2732   [(set_attr "movprfx" "*,yes")]
2733 )
2734
2735 ;; -------------------------------------------------------------------------
2736 ;; ---- [INT,FP] Extract active element
2737 ;; -------------------------------------------------------------------------
2738 ;; Includes:
2739 ;; - LASTA
2740 ;; - LASTB
2741 ;; -------------------------------------------------------------------------
2742
2743 ;; Extract the last active element of operand 1 into operand 0.
2744 ;; If no elements are active, extract the last inactive element instead.
2745 (define_insn "@extract_<last_op>_<mode>"
2746   [(set (match_operand:<VEL> 0 "register_operand" "=?r, w")
2747         (unspec:<VEL>
2748           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2749            (match_operand:SVE_FULL 2 "register_operand" "w, w")]
2750           LAST))]
2751   "TARGET_SVE"
2752   "@
2753    last<ab>\t%<vwcore>0, %1, %2.<Vetype>
2754    last<ab>\t%<Vetype>0, %1, %2.<Vetype>"
2755 )
2756
2757 ;; -------------------------------------------------------------------------
2758 ;; ---- [PRED] Extract index
2759 ;; -------------------------------------------------------------------------
2760 ;; The patterns in this section are synthetic.
2761 ;; -------------------------------------------------------------------------
2762
2763 ;; Handle extractions from a predicate by converting to an integer vector
2764 ;; and extracting from there.
2765 (define_expand "vec_extract<vpred><Vel>"
2766   [(match_operand:<VEL> 0 "register_operand")
2767    (match_operand:<VPRED> 1 "register_operand")
2768    (match_operand:SI 2 "nonmemory_operand")
2769    ;; Dummy operand to which we can attach the iterator.
2770    (reg:SVE_FULL_I V0_REGNUM)]
2771   "TARGET_SVE"
2772   {
2773     rtx tmp = gen_reg_rtx (<MODE>mode);
2774     emit_insn (gen_vcond_mask_<mode><vpred> (tmp, operands[1],
2775                                              CONST1_RTX (<MODE>mode),
2776                                              CONST0_RTX (<MODE>mode)));
2777     emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
2778     DONE;
2779   }
2780 )
2781
2782 ;; =========================================================================
2783 ;; == Unary arithmetic
2784 ;; =========================================================================
2785
2786 ;; -------------------------------------------------------------------------
2787 ;; ---- [INT] General unary arithmetic corresponding to rtx codes
2788 ;; -------------------------------------------------------------------------
2789 ;; Includes:
2790 ;; - ABS
2791 ;; - CLS (= clrsb)
2792 ;; - CLZ
2793 ;; - CNT (= popcount)
2794 ;; - NEG
2795 ;; - NOT
2796 ;; -------------------------------------------------------------------------
2797
2798 ;; Unpredicated integer unary arithmetic.
2799 (define_expand "<optab><mode>2"
2800   [(set (match_operand:SVE_FULL_I 0 "register_operand")
2801         (unspec:SVE_FULL_I
2802           [(match_dup 2)
2803            (SVE_INT_UNARY:SVE_FULL_I
2804              (match_operand:SVE_FULL_I 1 "register_operand"))]
2805           UNSPEC_PRED_X))]
2806   "TARGET_SVE"
2807   {
2808     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2809   }
2810 )
2811
2812 ;; Integer unary arithmetic predicated with a PTRUE.
2813 (define_insn "@aarch64_pred_<optab><mode>"
2814   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
2815         (unspec:SVE_FULL_I
2816           [(match_operand:<VPRED> 1 "register_operand" "Upl")
2817            (SVE_INT_UNARY:SVE_FULL_I
2818              (match_operand:SVE_FULL_I 2 "register_operand" "w"))]
2819           UNSPEC_PRED_X))]
2820   "TARGET_SVE"
2821   "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2822 )
2823
2824 ;; Predicated integer unary arithmetic with merging.
2825 (define_expand "@cond_<optab><mode>"
2826   [(set (match_operand:SVE_FULL_I 0 "register_operand")
2827         (unspec:SVE_FULL_I
2828           [(match_operand:<VPRED> 1 "register_operand")
2829            (SVE_INT_UNARY:SVE_FULL_I
2830              (match_operand:SVE_FULL_I 2 "register_operand"))
2831            (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")]
2832           UNSPEC_SEL))]
2833   "TARGET_SVE"
2834 )
2835
2836 ;; Predicated integer unary arithmetic, merging with the first input.
2837 (define_insn "*cond_<optab><mode>_2"
2838   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
2839         (unspec:SVE_FULL_I
2840           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2841            (SVE_INT_UNARY:SVE_FULL_I
2842              (match_operand:SVE_FULL_I 2 "register_operand" "0, w"))
2843            (match_dup 2)]
2844           UNSPEC_SEL))]
2845   "TARGET_SVE"
2846   "@
2847    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
2848    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2849   [(set_attr "movprfx" "*,yes")]
2850 )
2851
2852 ;; Predicated integer unary arithmetic, merging with an independent value.
2853 ;;
2854 ;; The earlyclobber isn't needed for the first alternative, but omitting
2855 ;; it would only help the case in which operands 2 and 3 are the same,
2856 ;; which is handled above rather than here.  Marking all the alternatives
2857 ;; as earlyclobber helps to make the instruction more regular to the
2858 ;; register allocator.
2859 (define_insn "*cond_<optab><mode>_any"
2860   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w")
2861         (unspec:SVE_FULL_I
2862           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2863            (SVE_INT_UNARY:SVE_FULL_I
2864              (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w"))
2865            (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
2866           UNSPEC_SEL))]
2867   "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
2868   "@
2869    <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2870    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2871    movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2872   [(set_attr "movprfx" "*,yes,yes")]
2873 )
2874
2875 ;; -------------------------------------------------------------------------
2876 ;; ---- [INT] General unary arithmetic corresponding to unspecs
2877 ;; -------------------------------------------------------------------------
2878 ;; Includes
2879 ;; - RBIT
2880 ;; - REVB
2881 ;; - REVH
2882 ;; - REVW
2883 ;; -------------------------------------------------------------------------
2884
2885 ;; Predicated integer unary operations.
2886 (define_insn "@aarch64_pred_<optab><mode>"
2887   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
2888         (unspec:SVE_FULL_I
2889           [(match_operand:<VPRED> 1 "register_operand" "Upl")
2890            (unspec:SVE_FULL_I
2891              [(match_operand:SVE_FULL_I 2 "register_operand" "w")]
2892              SVE_INT_UNARY)]
2893           UNSPEC_PRED_X))]
2894   "TARGET_SVE && <elem_bits> >= <min_elem_bits>"
2895   "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2896 )
2897
2898 ;; Predicated integer unary operations with merging.
2899 (define_insn "@cond_<optab><mode>"
2900   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w, ?&w")
2901         (unspec:SVE_FULL_I
2902           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2903            (unspec:SVE_FULL_I
2904              [(match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")]
2905              SVE_INT_UNARY)
2906            (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
2907           UNSPEC_SEL))]
2908   "TARGET_SVE && <elem_bits> >= <min_elem_bits>"
2909   "@
2910    <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2911    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2912    movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2913   [(set_attr "movprfx" "*,yes,yes")]
2914 )
2915
2916 ;; -------------------------------------------------------------------------
2917 ;; ---- [INT] Sign and zero extension
2918 ;; -------------------------------------------------------------------------
2919 ;; Includes:
2920 ;; - SXTB
2921 ;; - SXTH
2922 ;; - SXTW
2923 ;; - UXTB
2924 ;; - UXTH
2925 ;; - UXTW
2926 ;; -------------------------------------------------------------------------
2927
2928 ;; Unpredicated sign and zero extension from a narrower mode.
2929 (define_expand "<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
2930   [(set (match_operand:SVE_HSDI 0 "register_operand")
2931         (unspec:SVE_HSDI
2932           [(match_dup 2)
2933            (ANY_EXTEND:SVE_HSDI
2934              (match_operand:SVE_PARTIAL_I 1 "register_operand"))]
2935           UNSPEC_PRED_X))]
2936   "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
2937   {
2938     operands[2] = aarch64_ptrue_reg (<SVE_HSDI:VPRED>mode);
2939   }
2940 )
2941
2942 ;; Predicated sign and zero extension from a narrower mode.
2943 (define_insn "*<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
2944   [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
2945         (unspec:SVE_HSDI
2946           [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand" "Upl")
2947            (ANY_EXTEND:SVE_HSDI
2948              (match_operand:SVE_PARTIAL_I 2 "register_operand" "w"))]
2949           UNSPEC_PRED_X))]
2950   "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
2951   "<su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>"
2952 )
2953
2954 ;; Predicated truncate-and-sign-extend operations.
2955 (define_insn "@aarch64_pred_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
2956   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
2957         (unspec:SVE_FULL_HSDI
2958           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl")
2959            (sign_extend:SVE_FULL_HSDI
2960              (truncate:SVE_PARTIAL_I
2961                (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")))]
2962           UNSPEC_PRED_X))]
2963   "TARGET_SVE
2964    && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
2965   "sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
2966 )
2967
2968 ;; Predicated truncate-and-sign-extend operations with merging.
2969 (define_insn "@aarch64_cond_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
2970   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w, ?&w")
2971         (unspec:SVE_FULL_HSDI
2972           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
2973            (sign_extend:SVE_FULL_HSDI
2974              (truncate:SVE_PARTIAL_I
2975                (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")))
2976            (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
2977           UNSPEC_SEL))]
2978   "TARGET_SVE
2979    && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
2980   "@
2981    sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
2982    movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
2983    movprfx\t%0, %3\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
2984   [(set_attr "movprfx" "*,yes,yes")]
2985 )
2986
2987 ;; Predicated truncate-and-zero-extend operations, merging with the
2988 ;; first input.
2989 ;;
2990 ;; The canonical form of this operation is an AND of a constant rather
2991 ;; than (zero_extend (truncate ...)).
2992 (define_insn "*cond_uxt<mode>_2"
2993   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
2994         (unspec:SVE_FULL_I
2995           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2996            (and:SVE_FULL_I
2997              (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
2998              (match_operand:SVE_FULL_I 3 "aarch64_sve_uxt_immediate"))
2999            (match_dup 2)]
3000           UNSPEC_SEL))]
3001   "TARGET_SVE"
3002   "@
3003    uxt%e3\t%0.<Vetype>, %1/m, %0.<Vetype>
3004    movprfx\t%0, %2\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>"
3005   [(set_attr "movprfx" "*,yes")]
3006 )
3007
3008 ;; Predicated truncate-and-zero-extend operations, merging with an
3009 ;; independent value.
3010 ;;
3011 ;; The earlyclobber isn't needed for the first alternative, but omitting
3012 ;; it would only help the case in which operands 2 and 4 are the same,
3013 ;; which is handled above rather than here.  Marking all the alternatives
3014 ;; as early-clobber helps to make the instruction more regular to the
3015 ;; register allocator.
3016 (define_insn "*cond_uxt<mode>_any"
3017   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w")
3018         (unspec:SVE_FULL_I
3019           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3020            (and:SVE_FULL_I
3021              (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")
3022              (match_operand:SVE_FULL_I 3 "aarch64_sve_uxt_immediate"))
3023            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3024           UNSPEC_SEL))]
3025   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
3026   "@
3027    uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3028    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3029    movprfx\t%0, %4\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>"
3030   [(set_attr "movprfx" "*,yes,yes")]
3031 )
3032
3033 ;; -------------------------------------------------------------------------
3034 ;; ---- [INT] Truncation
3035 ;; -------------------------------------------------------------------------
3036 ;; The patterns in this section are synthetic.
3037 ;; -------------------------------------------------------------------------
3038
3039 ;; Truncate to a partial SVE vector from either a full vector or a
3040 ;; wider partial vector.  This is a no-op, because we can just ignore
3041 ;; the unused upper bits of the source.
3042 (define_insn_and_split "trunc<SVE_HSDI:mode><SVE_PARTIAL_I:mode>2"
3043   [(set (match_operand:SVE_PARTIAL_I 0 "register_operand" "=w")
3044         (truncate:SVE_PARTIAL_I
3045           (match_operand:SVE_HSDI 1 "register_operand" "w")))]
3046   "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3047   "#"
3048   "&& reload_completed"
3049   [(set (match_dup 0) (match_dup 1))]
3050   {
3051     operands[1] = aarch64_replace_reg_mode (operands[1],
3052                                             <SVE_PARTIAL_I:MODE>mode);
3053   }
3054 )
3055
3056 ;; -------------------------------------------------------------------------
3057 ;; ---- [INT] Logical inverse
3058 ;; -------------------------------------------------------------------------
3059 ;; Includes:
3060 ;; - CNOT
3061 ;; -------------------------------------------------------------------------
3062
3063 ;; Predicated logical inverse.
3064 (define_expand "@aarch64_pred_cnot<mode>"
3065   [(set (match_operand:SVE_FULL_I 0 "register_operand")
3066         (unspec:SVE_FULL_I
3067           [(unspec:<VPRED>
3068              [(match_operand:<VPRED> 1 "register_operand")
3069               (match_operand:SI 2 "aarch64_sve_ptrue_flag")
3070               (eq:<VPRED>
3071                 (match_operand:SVE_FULL_I 3 "register_operand")
3072                 (match_dup 4))]
3073              UNSPEC_PRED_Z)
3074            (match_dup 5)
3075            (match_dup 4)]
3076           UNSPEC_SEL))]
3077   "TARGET_SVE"
3078   {
3079     operands[4] = CONST0_RTX (<MODE>mode);
3080     operands[5] = CONST1_RTX (<MODE>mode);
3081   }
3082 )
3083
3084 (define_insn "*cnot<mode>"
3085   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
3086         (unspec:SVE_FULL_I
3087           [(unspec:<VPRED>
3088              [(match_operand:<VPRED> 1 "register_operand" "Upl")
3089               (match_operand:SI 5 "aarch64_sve_ptrue_flag")
3090               (eq:<VPRED>
3091                 (match_operand:SVE_FULL_I 2 "register_operand" "w")
3092                 (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))]
3093              UNSPEC_PRED_Z)
3094            (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one")
3095            (match_dup 3)]
3096           UNSPEC_SEL))]
3097   "TARGET_SVE"
3098   "cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
3099 )
3100
3101 ;; Predicated logical inverse with merging.
3102 (define_expand "@cond_cnot<mode>"
3103   [(set (match_operand:SVE_FULL_I 0 "register_operand")
3104         (unspec:SVE_FULL_I
3105           [(match_operand:<VPRED> 1 "register_operand")
3106            (unspec:SVE_FULL_I
3107              [(unspec:<VPRED>
3108                 [(match_dup 4)
3109                  (const_int SVE_KNOWN_PTRUE)
3110                  (eq:<VPRED>
3111                    (match_operand:SVE_FULL_I 2 "register_operand")
3112                    (match_dup 5))]
3113                 UNSPEC_PRED_Z)
3114               (match_dup 6)
3115               (match_dup 5)]
3116              UNSPEC_SEL)
3117            (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")]
3118           UNSPEC_SEL))]
3119   "TARGET_SVE"
3120   {
3121     operands[4] = CONSTM1_RTX (<VPRED>mode);
3122     operands[5] = CONST0_RTX (<MODE>mode);
3123     operands[6] = CONST1_RTX (<MODE>mode);
3124   }
3125 )
3126
3127 ;; Predicated logical inverse, merging with the first input.
3128 (define_insn_and_rewrite "*cond_cnot<mode>_2"
3129   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3130         (unspec:SVE_FULL_I
3131           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3132            ;; Logical inverse of operand 2 (as above).
3133            (unspec:SVE_FULL_I
3134              [(unspec:<VPRED>
3135                 [(match_operand 5)
3136                  (const_int SVE_KNOWN_PTRUE)
3137                  (eq:<VPRED>
3138                    (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
3139                    (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))]
3140                 UNSPEC_PRED_Z)
3141               (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one")
3142               (match_dup 3)]
3143              UNSPEC_SEL)
3144            (match_dup 2)]
3145           UNSPEC_SEL))]
3146   "TARGET_SVE"
3147   "@
3148    cnot\t%0.<Vetype>, %1/m, %0.<Vetype>
3149    movprfx\t%0, %2\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
3150   "&& !CONSTANT_P (operands[5])"
3151   {
3152     operands[5] = CONSTM1_RTX (<VPRED>mode);
3153   }
3154   [(set_attr "movprfx" "*,yes")]
3155 )
3156
3157 ;; Predicated logical inverse, merging with an independent value.
3158 ;;
3159 ;; The earlyclobber isn't needed for the first alternative, but omitting
3160 ;; it would only help the case in which operands 2 and 6 are the same,
3161 ;; which is handled above rather than here.  Marking all the alternatives
3162 ;; as earlyclobber helps to make the instruction more regular to the
3163 ;; register allocator.
3164 (define_insn_and_rewrite "*cond_cnot<mode>_any"
3165   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w")
3166         (unspec:SVE_FULL_I
3167           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3168            ;; Logical inverse of operand 2 (as above).
3169            (unspec:SVE_FULL_I
3170              [(unspec:<VPRED>
3171                 [(match_operand 5)
3172                  (const_int SVE_KNOWN_PTRUE)
3173                  (eq:<VPRED>
3174                    (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")
3175                    (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))]
3176                 UNSPEC_PRED_Z)
3177               (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one")
3178               (match_dup 3)]
3179              UNSPEC_SEL)
3180            (match_operand:SVE_FULL_I 6 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3181           UNSPEC_SEL))]
3182   "TARGET_SVE && !rtx_equal_p (operands[2], operands[6])"
3183   "@
3184    cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3185    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3186    movprfx\t%0, %6\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
3187   "&& !CONSTANT_P (operands[5])"
3188   {
3189     operands[5] = CONSTM1_RTX (<VPRED>mode);
3190   }
3191   [(set_attr "movprfx" "*,yes,yes")]
3192 )
3193
3194 ;; -------------------------------------------------------------------------
3195 ;; ---- [FP<-INT] General unary arithmetic that maps to unspecs
3196 ;; -------------------------------------------------------------------------
3197 ;; Includes:
3198 ;; - FEXPA
3199 ;; -------------------------------------------------------------------------
3200
3201 ;; Unpredicated unary operations that take an integer and return a float.
3202 (define_insn "@aarch64_sve_<optab><mode>"
3203   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
3204         (unspec:SVE_FULL_F
3205           [(match_operand:<V_INT_EQUIV> 1 "register_operand" "w")]
3206           SVE_FP_UNARY_INT))]
3207   "TARGET_SVE"
3208   "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>"
3209 )
3210
3211 ;; -------------------------------------------------------------------------
3212 ;; ---- [FP] General unary arithmetic corresponding to unspecs
3213 ;; -------------------------------------------------------------------------
3214 ;; Includes:
3215 ;; - FABS
3216 ;; - FNEG
3217 ;; - FRECPE
3218 ;; - FRECPX
3219 ;; - FRINTA
3220 ;; - FRINTI
3221 ;; - FRINTM
3222 ;; - FRINTN
3223 ;; - FRINTP
3224 ;; - FRINTX
3225 ;; - FRINTZ
3226 ;; - FRSQRT
3227 ;; - FSQRT
3228 ;; -------------------------------------------------------------------------
3229
3230 ;; Unpredicated floating-point unary operations.
3231 (define_insn "@aarch64_sve_<optab><mode>"
3232   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
3233         (unspec:SVE_FULL_F
3234           [(match_operand:SVE_FULL_F 1 "register_operand" "w")]
3235           SVE_FP_UNARY))]
3236   "TARGET_SVE"
3237   "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>"
3238 )
3239
3240 ;; Unpredicated floating-point unary operations.
3241 (define_expand "<optab><mode>2"
3242   [(set (match_operand:SVE_FULL_F 0 "register_operand")
3243         (unspec:SVE_FULL_F
3244           [(match_dup 2)
3245            (const_int SVE_RELAXED_GP)
3246            (match_operand:SVE_FULL_F 1 "register_operand")]
3247           SVE_COND_FP_UNARY))]
3248   "TARGET_SVE"
3249   {
3250     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
3251   }
3252 )
3253
3254 ;; Predicated floating-point unary operations.
3255 (define_insn "@aarch64_pred_<optab><mode>"
3256   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
3257         (unspec:SVE_FULL_F
3258           [(match_operand:<VPRED> 1 "register_operand" "Upl")
3259            (match_operand:SI 3 "aarch64_sve_gp_strictness")
3260            (match_operand:SVE_FULL_F 2 "register_operand" "w")]
3261           SVE_COND_FP_UNARY))]
3262   "TARGET_SVE"
3263   "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3264 )
3265
3266 ;; Predicated floating-point unary arithmetic with merging.
3267 (define_expand "@cond_<optab><mode>"
3268   [(set (match_operand:SVE_FULL_F 0 "register_operand")
3269         (unspec:SVE_FULL_F
3270           [(match_operand:<VPRED> 1 "register_operand")
3271            (unspec:SVE_FULL_F
3272              [(match_dup 1)
3273               (const_int SVE_STRICT_GP)
3274               (match_operand:SVE_FULL_F 2 "register_operand")]
3275              SVE_COND_FP_UNARY)
3276            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
3277           UNSPEC_SEL))]
3278   "TARGET_SVE"
3279 )
3280
3281 ;; Predicated floating-point unary arithmetic, merging with the first input.
3282 (define_insn_and_rewrite "*cond_<optab><mode>_2"
3283   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
3284         (unspec:SVE_FULL_F
3285           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3286            (unspec:SVE_FULL_F
3287              [(match_operand 3)
3288               (match_operand:SI 4 "aarch64_sve_gp_strictness")
3289               (match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
3290              SVE_COND_FP_UNARY)
3291            (match_dup 2)]
3292           UNSPEC_SEL))]
3293   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[3], operands[1])"
3294   "@
3295    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
3296    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3297   "&& !rtx_equal_p (operands[1], operands[3])"
3298   {
3299     operands[3] = copy_rtx (operands[1]);
3300   }
3301   [(set_attr "movprfx" "*,yes")]
3302 )
3303
3304 ;; Predicated floating-point unary arithmetic, merging with an independent
3305 ;; value.
3306 ;;
3307 ;; The earlyclobber isn't needed for the first alternative, but omitting
3308 ;; it would only help the case in which operands 2 and 3 are the same,
3309 ;; which is handled above rather than here.  Marking all the alternatives
3310 ;; as earlyclobber helps to make the instruction more regular to the
3311 ;; register allocator.
3312 (define_insn_and_rewrite "*cond_<optab><mode>_any"
3313   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, ?&w, ?&w")
3314         (unspec:SVE_FULL_F
3315           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3316            (unspec:SVE_FULL_F
3317              [(match_operand 4)
3318               (match_operand:SI 5 "aarch64_sve_gp_strictness")
3319               (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
3320              SVE_COND_FP_UNARY)
3321            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3322           UNSPEC_SEL))]
3323   "TARGET_SVE
3324    && !rtx_equal_p (operands[2], operands[3])
3325    && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
3326   "@
3327    <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3328    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3329    movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3330   "&& !rtx_equal_p (operands[1], operands[4])"
3331   {
3332     operands[4] = copy_rtx (operands[1]);
3333   }
3334   [(set_attr "movprfx" "*,yes,yes")]
3335 )
3336
3337 ;; -------------------------------------------------------------------------
3338 ;; ---- [PRED] Inverse
3339 ;; -------------------------------------------------------------------------
3340 ;; Includes:
3341 ;; - NOT
3342 ;; -------------------------------------------------------------------------
3343
3344 ;; Unpredicated predicate inverse.
3345 (define_expand "one_cmpl<mode>2"
3346   [(set (match_operand:PRED_ALL 0 "register_operand")
3347         (and:PRED_ALL
3348           (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand"))
3349           (match_dup 2)))]
3350   "TARGET_SVE"
3351   {
3352     operands[2] = aarch64_ptrue_reg (<MODE>mode);
3353   }
3354 )
3355
3356 ;; Predicated predicate inverse.
3357 (define_insn "*one_cmpl<mode>3"
3358   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
3359         (and:PRED_ALL
3360           (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
3361           (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
3362   "TARGET_SVE"
3363   "not\t%0.b, %1/z, %2.b"
3364 )
3365
3366 ;; =========================================================================
3367 ;; == Binary arithmetic
3368 ;; =========================================================================
3369
3370 ;; -------------------------------------------------------------------------
3371 ;; ---- [INT] General binary arithmetic corresponding to rtx codes
3372 ;; -------------------------------------------------------------------------
3373 ;; Includes:
3374 ;; - ADD    (merging form only)
3375 ;; - AND    (merging form only)
3376 ;; - ASR    (merging form only)
3377 ;; - EOR    (merging form only)
3378 ;; - LSL    (merging form only)
3379 ;; - LSR    (merging form only)
3380 ;; - MUL
3381 ;; - ORR    (merging form only)
3382 ;; - SMAX
3383 ;; - SMIN
3384 ;; - SUB    (merging form only)
3385 ;; - UMAX
3386 ;; - UMIN
3387 ;; -------------------------------------------------------------------------
3388
3389 ;; Unpredicated integer binary operations that have an immediate form.
3390 (define_expand "<optab><mode>3"
3391   [(set (match_operand:SVE_FULL_I 0 "register_operand")
3392         (unspec:SVE_FULL_I
3393           [(match_dup 3)
3394            (SVE_INT_BINARY_IMM:SVE_FULL_I
3395              (match_operand:SVE_FULL_I 1 "register_operand")
3396              (match_operand:SVE_FULL_I 2 "aarch64_sve_<sve_imm_con>_operand"))]
3397           UNSPEC_PRED_X))]
3398   "TARGET_SVE"
3399   {
3400     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
3401   }
3402 )
3403
3404 ;; Integer binary operations that have an immediate form, predicated
3405 ;; with a PTRUE.  We don't actually need the predicate for the first
3406 ;; and third alternatives, but using Upa or X isn't likely to gain much
3407 ;; and would make the instruction seem less uniform to the register
3408 ;; allocator.
3409 (define_insn_and_split "@aarch64_pred_<optab><mode>"
3410   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w")
3411         (unspec:SVE_FULL_I
3412           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
3413            (SVE_INT_BINARY_IMM:SVE_FULL_I
3414              (match_operand:SVE_FULL_I 2 "register_operand" "%0, 0, w, w")
3415              (match_operand:SVE_FULL_I 3 "aarch64_sve_<sve_imm_con>_operand" "<sve_imm_con>, w, <sve_imm_con>, w"))]
3416           UNSPEC_PRED_X))]
3417   "TARGET_SVE"
3418   "@
3419    #
3420    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3421    #
3422    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3423   ; Split the unpredicated form after reload, so that we don't have
3424   ; the unnecessary PTRUE.
3425   "&& reload_completed
3426    && !register_operand (operands[3], <MODE>mode)"
3427   [(set (match_dup 0)
3428         (SVE_INT_BINARY_IMM:SVE_FULL_I (match_dup 2) (match_dup 3)))]
3429   ""
3430   [(set_attr "movprfx" "*,*,yes,yes")]
3431 )
3432
3433 ;; Unpredicated binary operations with a constant (post-RA only).
3434 ;; These are generated by splitting a predicated instruction whose
3435 ;; predicate is unused.
3436 (define_insn "*post_ra_<optab><mode>3"
3437   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3438         (SVE_INT_BINARY_IMM:SVE_FULL_I
3439           (match_operand:SVE_FULL_I 1 "register_operand" "0, w")
3440           (match_operand:SVE_FULL_I 2 "aarch64_sve_<sve_imm_con>_immediate")))]
3441   "TARGET_SVE && reload_completed"
3442   "@
3443    <sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2
3444    movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2"
3445   [(set_attr "movprfx" "*,yes")]
3446 )
3447
3448 ;; Predicated integer operations with merging.
3449 (define_expand "@cond_<optab><mode>"
3450   [(set (match_operand:SVE_FULL_I 0 "register_operand")
3451         (unspec:SVE_FULL_I
3452           [(match_operand:<VPRED> 1 "register_operand")
3453            (SVE_INT_BINARY:SVE_FULL_I
3454              (match_operand:SVE_FULL_I 2 "register_operand")
3455              (match_operand:SVE_FULL_I 3 "<sve_pred_int_rhs2_operand>"))
3456            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
3457           UNSPEC_SEL))]
3458   "TARGET_SVE"
3459 )
3460
3461 ;; Predicated integer operations, merging with the first input.
3462 (define_insn "*cond_<optab><mode>_2"
3463   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3464         (unspec:SVE_FULL_I
3465           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3466            (SVE_INT_BINARY:SVE_FULL_I
3467              (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
3468              (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
3469            (match_dup 2)]
3470           UNSPEC_SEL))]
3471   "TARGET_SVE"
3472   "@
3473    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3474    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3475   [(set_attr "movprfx" "*,yes")]
3476 )
3477
3478 ;; Predicated integer operations, merging with the second input.
3479 (define_insn "*cond_<optab><mode>_3"
3480   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3481         (unspec:SVE_FULL_I
3482           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3483            (SVE_INT_BINARY:SVE_FULL_I
3484              (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
3485              (match_operand:SVE_FULL_I 3 "register_operand" "0, w"))
3486            (match_dup 3)]
3487           UNSPEC_SEL))]
3488   "TARGET_SVE"
3489   "@
3490    <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
3491    movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
3492   [(set_attr "movprfx" "*,yes")]
3493 )
3494
3495 ;; Predicated integer operations, merging with an independent value.
3496 (define_insn_and_rewrite "*cond_<optab><mode>_any"
3497   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
3498         (unspec:SVE_FULL_I
3499           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
3500            (SVE_INT_BINARY:SVE_FULL_I
3501              (match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w, w")
3502              (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w, w, w"))
3503            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
3504           UNSPEC_SEL))]
3505   "TARGET_SVE
3506    && !rtx_equal_p (operands[2], operands[4])
3507    && !rtx_equal_p (operands[3], operands[4])"
3508   "@
3509    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3510    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
3511    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3512    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3513    #"
3514   "&& reload_completed
3515    && register_operand (operands[4], <MODE>mode)
3516    && !rtx_equal_p (operands[0], operands[4])"
3517   {
3518     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
3519                                              operands[4], operands[1]));
3520     operands[4] = operands[2] = operands[0];
3521   }
3522   [(set_attr "movprfx" "yes")]
3523 )
3524
3525 ;; -------------------------------------------------------------------------
3526 ;; ---- [INT] Addition
3527 ;; -------------------------------------------------------------------------
3528 ;; Includes:
3529 ;; - ADD
3530 ;; - DECB
3531 ;; - DECD
3532 ;; - DECH
3533 ;; - DECW
3534 ;; - INCB
3535 ;; - INCD
3536 ;; - INCH
3537 ;; - INCW
3538 ;; - SUB
3539 ;; -------------------------------------------------------------------------
3540
3541 (define_insn "add<mode>3"
3542   [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?w, ?w, w")
3543         (plus:SVE_I
3544           (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w, w, w")
3545           (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, vsa, vsn, w")))]
3546   "TARGET_SVE"
3547   "@
3548    add\t%0.<Vetype>, %0.<Vetype>, #%D2
3549    sub\t%0.<Vetype>, %0.<Vetype>, #%N2
3550    * return aarch64_output_sve_vector_inc_dec (\"%0.<Vetype>\", operands[2]);
3551    movprfx\t%0, %1\;add\t%0.<Vetype>, %0.<Vetype>, #%D2
3552    movprfx\t%0, %1\;sub\t%0.<Vetype>, %0.<Vetype>, #%N2
3553    add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
3554   [(set_attr "movprfx" "*,*,*,yes,yes,*")]
3555 )
3556
3557 ;; Merging forms are handled through SVE_INT_BINARY.
3558
3559 ;; -------------------------------------------------------------------------
3560 ;; ---- [INT] Subtraction
3561 ;; -------------------------------------------------------------------------
3562 ;; Includes:
3563 ;; - SUB
3564 ;; - SUBR
3565 ;; -------------------------------------------------------------------------
3566
3567 (define_insn "sub<mode>3"
3568   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w")
3569         (minus:SVE_FULL_I
3570           (match_operand:SVE_FULL_I 1 "aarch64_sve_arith_operand" "w, vsa, vsa")
3571           (match_operand:SVE_FULL_I 2 "register_operand" "w, 0, w")))]
3572   "TARGET_SVE"
3573   "@
3574    sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
3575    subr\t%0.<Vetype>, %0.<Vetype>, #%D1
3576    movprfx\t%0, %2\;subr\t%0.<Vetype>, %0.<Vetype>, #%D1"
3577   [(set_attr "movprfx" "*,*,yes")]
3578 )
3579
3580 ;; Merging forms are handled through SVE_INT_BINARY.
3581
3582 ;; -------------------------------------------------------------------------
3583 ;; ---- [INT] Take address
3584 ;; -------------------------------------------------------------------------
3585 ;; Includes:
3586 ;; - ADR
3587 ;; -------------------------------------------------------------------------
3588
3589 ;; An unshifted and unscaled ADR.  This is functionally equivalent to an ADD,
3590 ;; but the svadrb intrinsics should preserve the user's choice.
3591 (define_insn "@aarch64_adr<mode>"
3592   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
3593         (unspec:SVE_FULL_SDI
3594           [(match_operand:SVE_FULL_SDI 1 "register_operand" "w")
3595            (match_operand:SVE_FULL_SDI 2 "register_operand" "w")]
3596           UNSPEC_ADR))]
3597   "TARGET_SVE"
3598   "adr\t%0.<Vetype>, [%1.<Vetype>, %2.<Vetype>]"
3599 )
3600
3601 ;; Same, but with the offset being sign-extended from the low 32 bits.
3602 (define_insn_and_rewrite "*aarch64_adr_sxtw"
3603   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3604         (unspec:VNx2DI
3605           [(match_operand:VNx2DI 1 "register_operand" "w")
3606            (unspec:VNx2DI
3607              [(match_operand 3)
3608               (sign_extend:VNx2DI
3609                 (truncate:VNx2SI
3610                   (match_operand:VNx2DI 2 "register_operand" "w")))]
3611              UNSPEC_PRED_X)]
3612           UNSPEC_ADR))]
3613   "TARGET_SVE"
3614   "adr\t%0.d, [%1.d, %2.d, sxtw]"
3615   "&& !CONSTANT_P (operands[3])"
3616   {
3617     operands[3] = CONSTM1_RTX (VNx2BImode);
3618   }
3619 )
3620
3621 ;; Same, but with the offset being zero-extended from the low 32 bits.
3622 (define_insn "*aarch64_adr_uxtw_unspec"
3623   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3624         (unspec:VNx2DI
3625           [(match_operand:VNx2DI 1 "register_operand" "w")
3626            (and:VNx2DI
3627              (match_operand:VNx2DI 2 "register_operand" "w")
3628              (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))]
3629           UNSPEC_ADR))]
3630   "TARGET_SVE"
3631   "adr\t%0.d, [%1.d, %2.d, uxtw]"
3632 )
3633
3634 ;; Same, matching as a PLUS rather than unspec.
3635 (define_insn "*aarch64_adr_uxtw_and"
3636   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3637         (plus:VNx2DI
3638           (and:VNx2DI
3639             (match_operand:VNx2DI 2 "register_operand" "w")
3640             (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))
3641           (match_operand:VNx2DI 1 "register_operand" "w")))]
3642   "TARGET_SVE"
3643   "adr\t%0.d, [%1.d, %2.d, uxtw]"
3644 )
3645
3646 ;; ADR with a nonzero shift.
3647 (define_expand "@aarch64_adr<mode>_shift"
3648   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
3649         (plus:SVE_FULL_SDI
3650           (unspec:SVE_FULL_SDI
3651             [(match_dup 4)
3652              (ashift:SVE_FULL_SDI
3653                (match_operand:SVE_FULL_SDI 2 "register_operand")
3654                (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))]
3655             UNSPEC_PRED_X)
3656           (match_operand:SVE_FULL_SDI 1 "register_operand")))]
3657   "TARGET_SVE"
3658   {
3659     operands[4] = CONSTM1_RTX (<VPRED>mode);
3660   }
3661 )
3662
3663 (define_insn_and_rewrite "*aarch64_adr<mode>_shift"
3664   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
3665         (plus:SVE_FULL_SDI
3666           (unspec:SVE_FULL_SDI
3667             [(match_operand 4)
3668              (ashift:SVE_FULL_SDI
3669                (match_operand:SVE_FULL_SDI 2 "register_operand" "w")
3670                (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))]
3671             UNSPEC_PRED_X)
3672           (match_operand:SVE_FULL_SDI 1 "register_operand" "w")))]
3673   "TARGET_SVE"
3674   "adr\t%0.<Vetype>, [%1.<Vetype>, %2.<Vetype>, lsl %3]"
3675   "&& !CONSTANT_P (operands[4])"
3676   {
3677     operands[4] = CONSTM1_RTX (<VPRED>mode);
3678   }
3679 )
3680
3681 ;; Same, but with the index being sign-extended from the low 32 bits.
3682 (define_insn_and_rewrite "*aarch64_adr_shift_sxtw"
3683   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3684         (plus:VNx2DI
3685           (unspec:VNx2DI
3686             [(match_operand 4)
3687              (ashift:VNx2DI
3688                (unspec:VNx2DI
3689                  [(match_operand 5)
3690                   (sign_extend:VNx2DI
3691                     (truncate:VNx2SI
3692                       (match_operand:VNx2DI 2 "register_operand" "w")))]
3693                  UNSPEC_PRED_X)
3694                (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
3695             UNSPEC_PRED_X)
3696           (match_operand:VNx2DI 1 "register_operand" "w")))]
3697   "TARGET_SVE"
3698   "adr\t%0.d, [%1.d, %2.d, sxtw %3]"
3699   "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
3700   {
3701     operands[5] = operands[4] = CONSTM1_RTX (VNx2BImode);
3702   }
3703 )
3704
3705 ;; Same, but with the index being zero-extended from the low 32 bits.
3706 (define_insn_and_rewrite "*aarch64_adr_shift_uxtw"
3707   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3708         (plus:VNx2DI
3709           (unspec:VNx2DI
3710             [(match_operand 5)
3711              (ashift:VNx2DI
3712                (and:VNx2DI
3713                  (match_operand:VNx2DI 2 "register_operand" "w")
3714                  (match_operand:VNx2DI 4 "aarch64_sve_uxtw_immediate"))
3715                (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
3716             UNSPEC_PRED_X)
3717           (match_operand:VNx2DI 1 "register_operand" "w")))]
3718   "TARGET_SVE"
3719   "adr\t%0.d, [%1.d, %2.d, uxtw %3]"
3720   "&& !CONSTANT_P (operands[5])"
3721   {
3722     operands[5] = CONSTM1_RTX (VNx2BImode);
3723   }
3724 )
3725
3726 ;; -------------------------------------------------------------------------
3727 ;; ---- [INT] Absolute difference
3728 ;; -------------------------------------------------------------------------
3729 ;; Includes:
3730 ;; - SABD
3731 ;; - UABD
3732 ;; -------------------------------------------------------------------------
3733
3734 ;; Unpredicated integer absolute difference.
3735 (define_expand "<su>abd<mode>_3"
3736   [(use (match_operand:SVE_FULL_I 0 "register_operand"))
3737    (USMAX:SVE_FULL_I
3738      (match_operand:SVE_FULL_I 1 "register_operand")
3739      (match_operand:SVE_FULL_I 2 "register_operand"))]
3740   "TARGET_SVE"
3741   {
3742     rtx pred = aarch64_ptrue_reg (<VPRED>mode);
3743     emit_insn (gen_aarch64_pred_<su>abd<mode> (operands[0], pred, operands[1],
3744                                                operands[2]));
3745     DONE;
3746   }
3747 )
3748
3749 ;; Predicated integer absolute difference.
3750 (define_insn "@aarch64_pred_<su>abd<mode>"
3751   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3752         (unspec:SVE_FULL_I
3753           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3754            (minus:SVE_FULL_I
3755              (USMAX:SVE_FULL_I
3756                (match_operand:SVE_FULL_I 2 "register_operand" "%0, w")
3757                (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
3758              (<max_opp>:SVE_FULL_I
3759                (match_dup 2)
3760                (match_dup 3)))]
3761           UNSPEC_PRED_X))]
3762   "TARGET_SVE"
3763   "@
3764    <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3765    movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3766   [(set_attr "movprfx" "*,yes")]
3767 )
3768
3769 (define_expand "@aarch64_cond_<su>abd<mode>"
3770   [(set (match_operand:SVE_FULL_I 0 "register_operand")
3771         (unspec:SVE_FULL_I
3772           [(match_operand:<VPRED> 1 "register_operand")
3773            (minus:SVE_FULL_I
3774              (unspec:SVE_FULL_I
3775                [(match_dup 1)
3776                 (USMAX:SVE_FULL_I
3777                   (match_operand:SVE_FULL_I 2 "register_operand")
3778                   (match_operand:SVE_FULL_I 3 "register_operand"))]
3779                UNSPEC_PRED_X)
3780              (unspec:SVE_FULL_I
3781                [(match_dup 1)
3782                 (<max_opp>:SVE_FULL_I
3783                   (match_dup 2)
3784                   (match_dup 3))]
3785                UNSPEC_PRED_X))
3786            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
3787           UNSPEC_SEL))]
3788   "TARGET_SVE"
3789 {
3790   if (rtx_equal_p (operands[3], operands[4]))
3791     std::swap (operands[2], operands[3]);
3792 })
3793
3794 ;; Predicated integer absolute difference, merging with the first input.
3795 (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_2"
3796   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3797         (unspec:SVE_FULL_I
3798           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3799            (minus:SVE_FULL_I
3800              (unspec:SVE_FULL_I
3801                [(match_operand 4)
3802                 (USMAX:SVE_FULL_I
3803                   (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
3804                   (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))]
3805                UNSPEC_PRED_X)
3806              (unspec:SVE_FULL_I
3807                [(match_operand 5)
3808                 (<max_opp>:SVE_FULL_I
3809                   (match_dup 2)
3810                   (match_dup 3))]
3811                UNSPEC_PRED_X))
3812            (match_dup 2)]
3813           UNSPEC_SEL))]
3814   "TARGET_SVE"
3815   "@
3816    <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3817    movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3818   "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
3819   {
3820     operands[4] = operands[5] = CONSTM1_RTX (<VPRED>mode);
3821   }
3822   [(set_attr "movprfx" "*,yes")]
3823 )
3824
3825 ;; Predicated integer absolute difference, merging with an independent value.
3826 (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_any"
3827   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
3828         (unspec:SVE_FULL_I
3829           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
3830            (minus:SVE_FULL_I
3831              (unspec:SVE_FULL_I
3832                [(match_operand 5)
3833                 (USMAX:SVE_FULL_I
3834                   (match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w, w")
3835                   (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w, w, w"))]
3836                UNSPEC_PRED_X)
3837              (unspec:SVE_FULL_I
3838                [(match_operand 6)
3839                 (<max_opp>:SVE_FULL_I
3840                   (match_dup 2)
3841                   (match_dup 3))]
3842                UNSPEC_PRED_X))
3843            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
3844           UNSPEC_SEL))]
3845   "TARGET_SVE
3846    && !rtx_equal_p (operands[2], operands[4])
3847    && !rtx_equal_p (operands[3], operands[4])"
3848   "@
3849    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3850    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
3851    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3852    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3853    #"
3854   "&& 1"
3855   {
3856     if (!CONSTANT_P (operands[5]) || !CONSTANT_P (operands[6]))
3857       operands[5] = operands[6] = CONSTM1_RTX (<VPRED>mode);
3858     else if (reload_completed
3859              && register_operand (operands[4], <MODE>mode)
3860              && !rtx_equal_p (operands[0], operands[4]))
3861       {
3862         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
3863                                                  operands[4], operands[1]));
3864         operands[4] = operands[2] = operands[0];
3865       }
3866     else
3867       FAIL;
3868   }
3869   [(set_attr "movprfx" "yes")]
3870 )
3871
3872 ;; -------------------------------------------------------------------------
3873 ;; ---- [INT] Saturating addition and subtraction
3874 ;; -------------------------------------------------------------------------
3875 ;; - SQADD
3876 ;; - SQSUB
3877 ;; - UQADD
3878 ;; - UQSUB
3879 ;; -------------------------------------------------------------------------
3880
3881 ;; Unpredicated saturating signed addition and subtraction.
3882 (define_insn "@aarch64_<su_optab><optab><mode>"
3883   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w, w")
3884         (SBINQOPS:SVE_FULL_I
3885           (match_operand:SVE_FULL_I 1 "register_operand" "0, 0, w, w, w")
3886           (match_operand:SVE_FULL_I 2 "aarch64_sve_sqadd_operand" "vsQ, vsS, vsQ, vsS, w")))]
3887   "TARGET_SVE"
3888   "@
3889    <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
3890    <binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2
3891    movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
3892    movprfx\t%0, %1\;<binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2
3893    <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
3894   [(set_attr "movprfx" "*,*,yes,yes,*")]
3895 )
3896
3897 ;; Unpredicated saturating unsigned addition and subtraction.
3898 (define_insn "@aarch64_<su_optab><optab><mode>"
3899   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w, w")
3900         (UBINQOPS:SVE_FULL_I
3901           (match_operand:SVE_FULL_I 1 "register_operand" "0, w, w")
3902           (match_operand:SVE_FULL_I 2 "aarch64_sve_arith_operand" "vsa, vsa, w")))]
3903   "TARGET_SVE"
3904   "@
3905    <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
3906    movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
3907    <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
3908   [(set_attr "movprfx" "*,yes,*")]
3909 )
3910
3911 ;; -------------------------------------------------------------------------
3912 ;; ---- [INT] Highpart multiplication
3913 ;; -------------------------------------------------------------------------
3914 ;; Includes:
3915 ;; - SMULH
3916 ;; - UMULH
3917 ;; -------------------------------------------------------------------------
3918
3919 ;; Unpredicated highpart multiplication.
3920 (define_expand "<su>mul<mode>3_highpart"
3921   [(set (match_operand:SVE_FULL_I 0 "register_operand")
3922         (unspec:SVE_FULL_I
3923           [(match_dup 3)
3924            (unspec:SVE_FULL_I
3925              [(match_operand:SVE_FULL_I 1 "register_operand")
3926               (match_operand:SVE_FULL_I 2 "register_operand")]
3927              MUL_HIGHPART)]
3928           UNSPEC_PRED_X))]
3929   "TARGET_SVE"
3930   {
3931     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
3932   }
3933 )
3934
3935 ;; Predicated highpart multiplication.
3936 (define_insn "@aarch64_pred_<optab><mode>"
3937   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3938         (unspec:SVE_FULL_I
3939           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3940            (unspec:SVE_FULL_I
3941              [(match_operand:SVE_FULL_I 2 "register_operand" "%0, w")
3942               (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
3943              MUL_HIGHPART)]
3944           UNSPEC_PRED_X))]
3945   "TARGET_SVE"
3946   "@
3947    <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3948    movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3949   [(set_attr "movprfx" "*,yes")]
3950 )
3951
3952 ;; Predicated highpart multiplications with merging.
3953 (define_expand "@cond_<optab><mode>"
3954   [(set (match_operand:SVE_FULL_I 0 "register_operand")
3955         (unspec:SVE_FULL_I
3956           [(match_operand:<VPRED> 1 "register_operand")
3957            (unspec:SVE_FULL_I
3958              [(match_operand:SVE_FULL_I 2 "register_operand")
3959               (match_operand:SVE_FULL_I 3 "register_operand")]
3960              MUL_HIGHPART)
3961            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
3962           UNSPEC_SEL))]
3963   "TARGET_SVE"
3964 {
3965   /* Only target code is aware of these operations, so we don't need
3966      to handle the fully-general case.  */
3967   gcc_assert (rtx_equal_p (operands[2], operands[4])
3968               || CONSTANT_P (operands[4]));
3969 })
3970
3971 ;; Predicated highpart multiplications, merging with the first input.
3972 (define_insn "*cond_<optab><mode>_2"
3973   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3974         (unspec:SVE_FULL_I
3975           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3976            (unspec:SVE_FULL_I
3977              [(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
3978               (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
3979              MUL_HIGHPART)
3980            (match_dup 2)]
3981           UNSPEC_SEL))]
3982   "TARGET_SVE"
3983   "@
3984    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3985    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3986   [(set_attr "movprfx" "*,yes")])
3987
3988 ;; Predicated highpart multiplications, merging with zero.
3989 (define_insn "*cond_<optab><mode>_z"
3990   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w")
3991         (unspec:SVE_FULL_I
3992           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3993            (unspec:SVE_FULL_I
3994              [(match_operand:SVE_FULL_I 2 "register_operand" "%0, w")
3995               (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
3996              MUL_HIGHPART)
3997            (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")]
3998           UNSPEC_SEL))]
3999   "TARGET_SVE"
4000   "@
4001    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4002    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4003   [(set_attr "movprfx" "yes")])
4004
4005 ;; -------------------------------------------------------------------------
4006 ;; ---- [INT] Division
4007 ;; -------------------------------------------------------------------------
4008 ;; Includes:
4009 ;; - SDIV
4010 ;; - SDIVR
4011 ;; - UDIV
4012 ;; - UDIVR
4013 ;; -------------------------------------------------------------------------
4014
4015 ;; Unpredicated integer division.
4016 (define_expand "<optab><mode>3"
4017   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4018         (unspec:SVE_FULL_SDI
4019           [(match_dup 3)
4020            (SVE_INT_BINARY_SD:SVE_FULL_SDI
4021              (match_operand:SVE_FULL_SDI 1 "register_operand")
4022              (match_operand:SVE_FULL_SDI 2 "register_operand"))]
4023           UNSPEC_PRED_X))]
4024   "TARGET_SVE"
4025   {
4026     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4027   }
4028 )
4029
4030 ;; Integer division predicated with a PTRUE.
4031 (define_insn "@aarch64_pred_<optab><mode>"
4032   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, w, ?&w")
4033         (unspec:SVE_FULL_SDI
4034           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
4035            (SVE_INT_BINARY_SD:SVE_FULL_SDI
4036              (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w, w")
4037              (match_operand:SVE_FULL_SDI 3 "register_operand" "w, 0, w"))]
4038           UNSPEC_PRED_X))]
4039   "TARGET_SVE"
4040   "@
4041    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4042    <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4043    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4044   [(set_attr "movprfx" "*,*,yes")]
4045 )
4046
4047 ;; Predicated integer division with merging.
4048 (define_expand "@cond_<optab><mode>"
4049   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4050         (unspec:SVE_FULL_SDI
4051           [(match_operand:<VPRED> 1 "register_operand")
4052            (SVE_INT_BINARY_SD:SVE_FULL_SDI
4053              (match_operand:SVE_FULL_SDI 2 "register_operand")
4054              (match_operand:SVE_FULL_SDI 3 "register_operand"))
4055            (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero")]
4056           UNSPEC_SEL))]
4057   "TARGET_SVE"
4058 )
4059
4060 ;; Predicated integer division, merging with the first input.
4061 (define_insn "*cond_<optab><mode>_2"
4062   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
4063         (unspec:SVE_FULL_SDI
4064           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4065            (SVE_INT_BINARY_SD:SVE_FULL_SDI
4066              (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w")
4067              (match_operand:SVE_FULL_SDI 3 "register_operand" "w, w"))
4068            (match_dup 2)]
4069           UNSPEC_SEL))]
4070   "TARGET_SVE"
4071   "@
4072    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4073    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4074   [(set_attr "movprfx" "*,yes")]
4075 )
4076
4077 ;; Predicated integer division, merging with the second input.
4078 (define_insn "*cond_<optab><mode>_3"
4079   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
4080         (unspec:SVE_FULL_SDI
4081           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4082            (SVE_INT_BINARY_SD:SVE_FULL_SDI
4083              (match_operand:SVE_FULL_SDI 2 "register_operand" "w, w")
4084              (match_operand:SVE_FULL_SDI 3 "register_operand" "0, w"))
4085            (match_dup 3)]
4086           UNSPEC_SEL))]
4087   "TARGET_SVE"
4088   "@
4089    <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4090    movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
4091   [(set_attr "movprfx" "*,yes")]
4092 )
4093
4094 ;; Predicated integer division, merging with an independent value.
4095 (define_insn_and_rewrite "*cond_<optab><mode>_any"
4096   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=&w, &w, &w, &w, ?&w")
4097         (unspec:SVE_FULL_SDI
4098           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
4099            (SVE_INT_BINARY_SD:SVE_FULL_SDI
4100              (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w, w, w, w")
4101              (match_operand:SVE_FULL_SDI 3 "register_operand" "w, 0, w, w, w"))
4102            (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
4103           UNSPEC_SEL))]
4104   "TARGET_SVE
4105    && !rtx_equal_p (operands[2], operands[4])
4106    && !rtx_equal_p (operands[3], operands[4])"
4107   "@
4108    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4109    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4110    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4111    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4112    #"
4113   "&& reload_completed
4114    && register_operand (operands[4], <MODE>mode)
4115    && !rtx_equal_p (operands[0], operands[4])"
4116   {
4117     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4118                                              operands[4], operands[1]));
4119     operands[4] = operands[2] = operands[0];
4120   }
4121   [(set_attr "movprfx" "yes")]
4122 )
4123
4124 ;; -------------------------------------------------------------------------
4125 ;; ---- [INT] Binary logical operations
4126 ;; -------------------------------------------------------------------------
4127 ;; Includes:
4128 ;; - AND
4129 ;; - EOR
4130 ;; - ORR
4131 ;; -------------------------------------------------------------------------
4132
4133 ;; Unpredicated integer binary logical operations.
4134 (define_insn "<optab><mode>3"
4135   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?w, w")
4136         (LOGICAL:SVE_FULL_I
4137           (match_operand:SVE_FULL_I 1 "register_operand" "%0, w, w")
4138           (match_operand:SVE_FULL_I 2 "aarch64_sve_logical_operand" "vsl, vsl, w")))]
4139   "TARGET_SVE"
4140   "@
4141    <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
4142    movprfx\t%0, %1\;<logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
4143    <logical>\t%0.d, %1.d, %2.d"
4144   [(set_attr "movprfx" "*,yes,*")]
4145 )
4146
4147 ;; Merging forms are handled through SVE_INT_BINARY.
4148
4149 ;; -------------------------------------------------------------------------
4150 ;; ---- [INT] Binary logical operations (inverted second input)
4151 ;; -------------------------------------------------------------------------
4152 ;; Includes:
4153 ;; - BIC
4154 ;; -------------------------------------------------------------------------
4155
4156 ;; Unpredicated BIC.
4157 (define_expand "@aarch64_bic<mode>"
4158   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4159         (and:SVE_FULL_I
4160           (unspec:SVE_FULL_I
4161             [(match_dup 3)
4162              (not:SVE_FULL_I (match_operand:SVE_FULL_I 2 "register_operand"))]
4163             UNSPEC_PRED_X)
4164           (match_operand:SVE_FULL_I 1 "register_operand")))]
4165   "TARGET_SVE"
4166   {
4167     operands[3] = CONSTM1_RTX (<VPRED>mode);
4168   }
4169 )
4170
4171 ;; Predicated BIC.
4172 (define_insn_and_rewrite "*bic<mode>3"
4173   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
4174         (and:SVE_FULL_I
4175           (unspec:SVE_FULL_I
4176             [(match_operand 3)
4177              (not:SVE_FULL_I
4178                (match_operand:SVE_FULL_I 2 "register_operand" "w"))]
4179             UNSPEC_PRED_X)
4180           (match_operand:SVE_FULL_I 1 "register_operand" "w")))]
4181   "TARGET_SVE"
4182   "bic\t%0.d, %1.d, %2.d"
4183   "&& !CONSTANT_P (operands[3])"
4184   {
4185     operands[3] = CONSTM1_RTX (<VPRED>mode);
4186   }
4187 )
4188
4189 ;; Predicated BIC with merging.
4190 (define_expand "@cond_bic<mode>"
4191   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4192         (unspec:SVE_FULL_I
4193           [(match_operand:<VPRED> 1 "register_operand")
4194            (and:SVE_FULL_I
4195              (not:SVE_FULL_I (match_operand:SVE_FULL_I 3 "register_operand"))
4196              (match_operand:SVE_FULL_I 2 "register_operand"))
4197            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4198           UNSPEC_SEL))]
4199   "TARGET_SVE"
4200 )
4201
4202 ;; Predicated integer BIC, merging with the first input.
4203 (define_insn "*cond_bic<mode>_2"
4204   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
4205         (unspec:SVE_FULL_I
4206           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4207            (and:SVE_FULL_I
4208              (not:SVE_FULL_I
4209                (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
4210              (match_operand:SVE_FULL_I 2 "register_operand" "0, w"))
4211            (match_dup 2)]
4212           UNSPEC_SEL))]
4213   "TARGET_SVE"
4214   "@
4215    bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4216    movprfx\t%0, %2\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4217   [(set_attr "movprfx" "*,yes")]
4218 )
4219
4220 ;; Predicated integer BIC, merging with an independent value.
4221 (define_insn_and_rewrite "*cond_bic<mode>_any"
4222   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, ?&w")
4223         (unspec:SVE_FULL_I
4224           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
4225            (and:SVE_FULL_I
4226              (not:SVE_FULL_I
4227                (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, w"))
4228              (match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w"))
4229            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
4230           UNSPEC_SEL))]
4231   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4232   "@
4233    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4234    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4235    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4236    #"
4237   "&& reload_completed
4238    && register_operand (operands[4], <MODE>mode)
4239    && !rtx_equal_p (operands[0], operands[4])"
4240   {
4241     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4242                                              operands[4], operands[1]));
4243     operands[4] = operands[2] = operands[0];
4244   }
4245   [(set_attr "movprfx" "yes")]
4246 )
4247
4248 ;; -------------------------------------------------------------------------
4249 ;; ---- [INT] Shifts (rounding towards -Inf)
4250 ;; -------------------------------------------------------------------------
4251 ;; Includes:
4252 ;; - ASR
4253 ;; - ASRR
4254 ;; - LSL
4255 ;; - LSLR
4256 ;; - LSR
4257 ;; - LSRR
4258 ;; -------------------------------------------------------------------------
4259
4260 ;; Unpredicated shift by a scalar, which expands into one of the vector
4261 ;; shifts below.
4262 (define_expand "<ASHIFT:optab><mode>3"
4263   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4264         (ASHIFT:SVE_FULL_I
4265           (match_operand:SVE_FULL_I 1 "register_operand")
4266           (match_operand:<VEL> 2 "general_operand")))]
4267   "TARGET_SVE"
4268   {
4269     rtx amount;
4270     if (CONST_INT_P (operands[2]))
4271       {
4272         amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
4273         if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
4274           amount = force_reg (<MODE>mode, amount);
4275       }
4276     else
4277       {
4278         amount = gen_reg_rtx (<MODE>mode);
4279         emit_insn (gen_vec_duplicate<mode> (amount,
4280                                             convert_to_mode (<VEL>mode,
4281                                                              operands[2], 0)));
4282       }
4283     emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount));
4284     DONE;
4285   }
4286 )
4287
4288 ;; Unpredicated shift by a vector.
4289 (define_expand "v<optab><mode>3"
4290   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4291         (unspec:SVE_FULL_I
4292           [(match_dup 3)
4293            (ASHIFT:SVE_FULL_I
4294              (match_operand:SVE_FULL_I 1 "register_operand")
4295              (match_operand:SVE_FULL_I 2 "aarch64_sve_<lr>shift_operand"))]
4296           UNSPEC_PRED_X))]
4297   "TARGET_SVE"
4298   {
4299     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4300   }
4301 )
4302
4303 ;; Shift by a vector, predicated with a PTRUE.  We don't actually need
4304 ;; the predicate for the first alternative, but using Upa or X isn't
4305 ;; likely to gain much and would make the instruction seem less uniform
4306 ;; to the register allocator.
4307 (define_insn_and_split "@aarch64_pred_<optab><mode>"
4308   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?&w")
4309         (unspec:SVE_FULL_I
4310           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
4311            (ASHIFT:SVE_FULL_I
4312              (match_operand:SVE_FULL_I 2 "register_operand" "w, 0, w, w")
4313              (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, w"))]
4314           UNSPEC_PRED_X))]
4315   "TARGET_SVE"
4316   "@
4317    #
4318    <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4319    <shift>r\t%0.<Vetype>, %1/m, %3.<Vetype>, %2.<Vetype>
4320    movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4321   "&& reload_completed
4322    && !register_operand (operands[3], <MODE>mode)"
4323   [(set (match_dup 0) (ASHIFT:SVE_FULL_I (match_dup 2) (match_dup 3)))]
4324   ""
4325   [(set_attr "movprfx" "*,*,*,yes")]
4326 )
4327
4328 ;; Unpredicated shift operations by a constant (post-RA only).
4329 ;; These are generated by splitting a predicated instruction whose
4330 ;; predicate is unused.
4331 (define_insn "*post_ra_v<optab><mode>3"
4332   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
4333         (ASHIFT:SVE_FULL_I
4334           (match_operand:SVE_FULL_I 1 "register_operand" "w")
4335           (match_operand:SVE_FULL_I 2 "aarch64_simd_<lr>shift_imm")))]
4336   "TARGET_SVE && reload_completed"
4337   "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
4338 )
4339
4340 ;; Predicated integer shift, merging with the first input.
4341 (define_insn "*cond_<optab><mode>_2_const"
4342   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
4343         (unspec:SVE_FULL_I
4344           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4345            (ASHIFT:SVE_FULL_I
4346              (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
4347              (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm"))
4348            (match_dup 2)]
4349          UNSPEC_SEL))]
4350   "TARGET_SVE"
4351   "@
4352    <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4353    movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4354   [(set_attr "movprfx" "*,yes")]
4355 )
4356
4357 ;; Predicated integer shift, merging with an independent value.
4358 (define_insn_and_rewrite "*cond_<optab><mode>_any_const"
4359   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, &w, ?&w")
4360         (unspec:SVE_FULL_I
4361           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
4362            (ASHIFT:SVE_FULL_I
4363              (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")
4364              (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm"))
4365            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
4366          UNSPEC_SEL))]
4367   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4368   "@
4369    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4370    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4371    #"
4372   "&& reload_completed
4373    && register_operand (operands[4], <MODE>mode)
4374    && !rtx_equal_p (operands[0], operands[4])"
4375   {
4376     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4377                                              operands[4], operands[1]));
4378     operands[4] = operands[2] = operands[0];
4379   }
4380   [(set_attr "movprfx" "yes")]
4381 )
4382
4383 ;; Unpredicated shifts of narrow elements by 64-bit amounts.
4384 (define_insn "@aarch64_sve_<sve_int_op><mode>"
4385   [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
4386         (unspec:SVE_FULL_BHSI
4387           [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")
4388            (match_operand:VNx2DI 2 "register_operand" "w")]
4389           SVE_SHIFT_WIDE))]
4390   "TARGET_SVE"
4391   "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.d"
4392 )
4393
4394 ;; Merging predicated shifts of narrow elements by 64-bit amounts.
4395 (define_expand "@cond_<sve_int_op><mode>"
4396   [(set (match_operand:SVE_FULL_BHSI 0 "register_operand")
4397         (unspec:SVE_FULL_BHSI
4398           [(match_operand:<VPRED> 1 "register_operand")
4399            (unspec:SVE_FULL_BHSI
4400              [(match_operand:SVE_FULL_BHSI 2 "register_operand")
4401               (match_operand:VNx2DI 3 "register_operand")]
4402              SVE_SHIFT_WIDE)
4403            (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_reg_or_zero")]
4404           UNSPEC_SEL))]
4405   "TARGET_SVE"
4406 )
4407
4408 ;; Predicated shifts of narrow elements by 64-bit amounts, merging with
4409 ;; the first input.
4410 (define_insn "*cond_<sve_int_op><mode>_m"
4411   [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w, ?&w")
4412         (unspec:SVE_FULL_BHSI
4413           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4414            (unspec:SVE_FULL_BHSI
4415              [(match_operand:SVE_FULL_BHSI 2 "register_operand" "0, w")
4416               (match_operand:VNx2DI 3 "register_operand" "w, w")]
4417              SVE_SHIFT_WIDE)
4418            (match_dup 2)]
4419          UNSPEC_SEL))]
4420   "TARGET_SVE"
4421   "@
4422    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
4423    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d"
4424   [(set_attr "movprfx" "*, yes")])
4425
4426 ;; Predicated shifts of narrow elements by 64-bit amounts, merging with zero.
4427 (define_insn "*cond_<sve_int_op><mode>_z"
4428   [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=&w, &w")
4429         (unspec:SVE_FULL_BHSI
4430           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4431            (unspec:SVE_FULL_BHSI
4432              [(match_operand:SVE_FULL_BHSI 2 "register_operand" "0, w")
4433               (match_operand:VNx2DI 3 "register_operand" "w, w")]
4434              SVE_SHIFT_WIDE)
4435            (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_imm_zero")]
4436          UNSPEC_SEL))]
4437   "TARGET_SVE"
4438   "@
4439    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
4440    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d"
4441   [(set_attr "movprfx" "yes")])
4442
4443 ;; -------------------------------------------------------------------------
4444 ;; ---- [INT] Shifts (rounding towards 0)
4445 ;; -------------------------------------------------------------------------
4446 ;; Includes:
4447 ;; - ASRD
4448 ;; -------------------------------------------------------------------------
4449
4450 ;; Unpredicated ASRD.
4451 (define_expand "sdiv_pow2<mode>3"
4452   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4453         (unspec:SVE_FULL_I
4454           [(match_dup 3)
4455            (unspec:SVE_FULL_I
4456              [(match_operand:SVE_FULL_I 1 "register_operand")
4457               (match_operand 2 "aarch64_simd_rshift_imm")]
4458              UNSPEC_ASRD)
4459            (match_dup 1)]
4460          UNSPEC_SEL))]
4461   "TARGET_SVE"
4462   {
4463     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4464   }
4465 )
4466
4467 ;; Predicated ASRD with merging.
4468 (define_expand "@cond_asrd<mode>"
4469   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4470         (unspec:SVE_FULL_I
4471           [(match_operand:<VPRED> 1 "register_operand")
4472            (unspec:SVE_FULL_I
4473              [(match_operand:SVE_FULL_I 2 "register_operand")
4474               (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")]
4475              UNSPEC_ASRD)
4476            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4477           UNSPEC_SEL))]
4478   "TARGET_SVE"
4479 )
4480
4481 ;; Predicated ASRD, merging with the first input.
4482 (define_insn "*cond_asrd<mode>_2"
4483   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
4484         (unspec:SVE_FULL_I
4485           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4486            (unspec:SVE_FULL_I
4487              [(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
4488               (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")]
4489              UNSPEC_ASRD)
4490            (match_dup 2)]
4491           UNSPEC_SEL))]
4492   "TARGET_SVE"
4493   "@
4494    asrd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4495    movprfx\t%0, %2\;asrd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4496   [(set_attr "movprfx" "*,yes")])
4497
4498 ;; Predicated ASRD, merging with zero.
4499 (define_insn "*cond_asrd<mode>_z"
4500   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
4501         (unspec:SVE_FULL_I
4502           [(match_operand:<VPRED> 1 "register_operand" "Upl")
4503            (unspec:SVE_FULL_I
4504              [(match_operand:SVE_FULL_I 2 "register_operand" "w")
4505               (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")]
4506              UNSPEC_ASRD)
4507            (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")]
4508           UNSPEC_SEL))]
4509   "TARGET_SVE"
4510   "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;asrd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4511   [(set_attr "movprfx" "yes")])
4512
4513 ;; -------------------------------------------------------------------------
4514 ;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs
4515 ;; -------------------------------------------------------------------------
4516 ;; Includes:
4517 ;; - FSCALE
4518 ;; - FTSMUL
4519 ;; - FTSSEL
4520 ;; -------------------------------------------------------------------------
4521
4522 ;; Unpredicated floating-point binary operations that take an integer as
4523 ;; their second operand.
4524 (define_insn "@aarch64_sve_<optab><mode>"
4525   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
4526         (unspec:SVE_FULL_F
4527           [(match_operand:SVE_FULL_F 1 "register_operand" "w")
4528            (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
4529           SVE_FP_BINARY_INT))]
4530   "TARGET_SVE"
4531   "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
4532 )
4533
4534 ;; Predicated floating-point binary operations that take an integer
4535 ;; as their second operand.
4536 (define_insn "@aarch64_pred_<optab><mode>"
4537   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4538         (unspec:SVE_FULL_F
4539           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4540            (match_operand:SI 4 "aarch64_sve_gp_strictness")
4541            (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
4542            (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")]
4543           SVE_COND_FP_BINARY_INT))]
4544   "TARGET_SVE"
4545   "@
4546    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4547    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4548   [(set_attr "movprfx" "*,yes")]
4549 )
4550
4551 ;; Predicated floating-point binary operations with merging, taking an
4552 ;; integer as their second operand.
4553 (define_expand "@cond_<optab><mode>"
4554   [(set (match_operand:SVE_FULL_F 0 "register_operand")
4555         (unspec:SVE_FULL_F
4556           [(match_operand:<VPRED> 1 "register_operand")
4557            (unspec:SVE_FULL_F
4558              [(match_dup 1)
4559               (const_int SVE_STRICT_GP)
4560               (match_operand:SVE_FULL_F 2 "register_operand")
4561               (match_operand:<V_INT_EQUIV> 3 "register_operand")]
4562              SVE_COND_FP_BINARY_INT)
4563            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
4564           UNSPEC_SEL))]
4565   "TARGET_SVE"
4566 )
4567
4568 ;; Predicated floating-point binary operations that take an integer as their
4569 ;; second operand, with inactive lanes coming from the first operand.
4570 (define_insn_and_rewrite "*cond_<optab><mode>_2"
4571   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4572         (unspec:SVE_FULL_F
4573           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4574            (unspec:SVE_FULL_F
4575              [(match_operand 4)
4576               (match_operand:SI 5 "aarch64_sve_gp_strictness")
4577               (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
4578               (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")]
4579              SVE_COND_FP_BINARY_INT)
4580            (match_dup 2)]
4581           UNSPEC_SEL))]
4582   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
4583   "@
4584    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4585    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4586   "&& !rtx_equal_p (operands[1], operands[4])"
4587   {
4588     operands[4] = copy_rtx (operands[1]);
4589   }
4590   [(set_attr "movprfx" "*,yes")]
4591 )
4592
4593 ;; Predicated floating-point binary operations that take an integer as
4594 ;; their second operand, with the values of inactive lanes being distinct
4595 ;; from the other inputs.
4596 (define_insn_and_rewrite "*cond_<optab><mode>_any"
4597   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
4598         (unspec:SVE_FULL_F
4599           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
4600            (unspec:SVE_FULL_F
4601              [(match_operand 5)
4602               (match_operand:SI 6 "aarch64_sve_gp_strictness")
4603               (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w")
4604               (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w, w, w")]
4605              SVE_COND_FP_BINARY_INT)
4606            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
4607           UNSPEC_SEL))]
4608   "TARGET_SVE
4609    && !rtx_equal_p (operands[2], operands[4])
4610    && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
4611   "@
4612    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4613    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4614    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4615    #"
4616   "&& 1"
4617   {
4618     if (reload_completed
4619         && register_operand (operands[4], <MODE>mode)
4620         && !rtx_equal_p (operands[0], operands[4]))
4621       {
4622         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4623                                                  operands[4], operands[1]));
4624         operands[4] = operands[2] = operands[0];
4625       }
4626     else if (!rtx_equal_p (operands[1], operands[5]))
4627       operands[5] = copy_rtx (operands[1]);
4628     else
4629       FAIL;
4630   }
4631   [(set_attr "movprfx" "yes")]
4632 )
4633
4634 ;; -------------------------------------------------------------------------
4635 ;; ---- [FP] General binary arithmetic corresponding to rtx codes
4636 ;; -------------------------------------------------------------------------
4637 ;; Includes post-RA forms of:
4638 ;; - FADD
4639 ;; - FMUL
4640 ;; - FSUB
4641 ;; -------------------------------------------------------------------------
4642
4643 ;; Unpredicated floating-point binary operations (post-RA only).
4644 ;; These are generated by splitting a predicated instruction whose
4645 ;; predicate is unused.
4646 (define_insn "*post_ra_<sve_fp_op><mode>3"
4647   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
4648         (SVE_UNPRED_FP_BINARY:SVE_FULL_F
4649           (match_operand:SVE_FULL_F 1 "register_operand" "w")
4650           (match_operand:SVE_FULL_F 2 "register_operand" "w")))]
4651   "TARGET_SVE && reload_completed"
4652   "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
4653
4654 ;; -------------------------------------------------------------------------
4655 ;; ---- [FP] General binary arithmetic corresponding to unspecs
4656 ;; -------------------------------------------------------------------------
4657 ;; Includes merging forms of:
4658 ;; - FADD    (constant forms handled in the "Addition" section)
4659 ;; - FDIV
4660 ;; - FDIVR
4661 ;; - FMAX
4662 ;; - FMAXNM  (including #0.0 and #1.0)
4663 ;; - FMIN
4664 ;; - FMINNM  (including #0.0 and #1.0)
4665 ;; - FMUL    (including #0.5 and #2.0)
4666 ;; - FMULX
4667 ;; - FRECPS
4668 ;; - FRSQRTS
4669 ;; - FSUB    (constant forms handled in the "Addition" section)
4670 ;; - FSUBR   (constant forms handled in the "Subtraction" section)
4671 ;; -------------------------------------------------------------------------
4672
4673 ;; Unpredicated floating-point binary operations.
4674 (define_insn "@aarch64_sve_<optab><mode>"
4675   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
4676         (unspec:SVE_FULL_F
4677           [(match_operand:SVE_FULL_F 1 "register_operand" "w")
4678            (match_operand:SVE_FULL_F 2 "register_operand" "w")]
4679           SVE_FP_BINARY))]
4680   "TARGET_SVE"
4681   "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
4682 )
4683
4684 ;; Unpredicated floating-point binary operations that need to be predicated
4685 ;; for SVE.
4686 (define_expand "<optab><mode>3"
4687   [(set (match_operand:SVE_FULL_F 0 "register_operand")
4688         (unspec:SVE_FULL_F
4689           [(match_dup 3)
4690            (const_int SVE_RELAXED_GP)
4691            (match_operand:SVE_FULL_F 1 "<sve_pred_fp_rhs1_operand>")
4692            (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs2_operand>")]
4693           SVE_COND_FP_BINARY))]
4694   "TARGET_SVE"
4695   {
4696     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4697   }
4698 )
4699
4700 ;; Predicated floating-point binary operations that have no immediate forms.
4701 (define_insn "@aarch64_pred_<optab><mode>"
4702   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w")
4703         (unspec:SVE_FULL_F
4704           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
4705            (match_operand:SI 4 "aarch64_sve_gp_strictness")
4706            (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w")
4707            (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w")]
4708           SVE_COND_FP_BINARY_REG))]
4709   "TARGET_SVE"
4710   "@
4711    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4712    <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4713    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4714   [(set_attr "movprfx" "*,*,yes")]
4715 )
4716
4717 ;; Predicated floating-point operations with merging.
4718 (define_expand "@cond_<optab><mode>"
4719   [(set (match_operand:SVE_FULL_F 0 "register_operand")
4720         (unspec:SVE_FULL_F
4721           [(match_operand:<VPRED> 1 "register_operand")
4722            (unspec:SVE_FULL_F
4723              [(match_dup 1)
4724               (const_int SVE_STRICT_GP)
4725               (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs1_operand>")
4726               (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_operand>")]
4727              SVE_COND_FP_BINARY)
4728            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
4729           UNSPEC_SEL))]
4730   "TARGET_SVE"
4731 )
4732
4733 ;; Predicated floating-point operations, merging with the first input.
4734 (define_insn_and_rewrite "*cond_<optab><mode>_2"
4735   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4736         (unspec:SVE_FULL_F
4737           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4738            (unspec:SVE_FULL_F
4739              [(match_operand 4)
4740               (match_operand:SI 5 "aarch64_sve_gp_strictness")
4741               (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
4742               (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
4743              SVE_COND_FP_BINARY)
4744            (match_dup 2)]
4745           UNSPEC_SEL))]
4746   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
4747   "@
4748    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4749    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4750   "&& !rtx_equal_p (operands[1], operands[4])"
4751   {
4752     operands[4] = copy_rtx (operands[1]);
4753   }
4754   [(set_attr "movprfx" "*,yes")]
4755 )
4756
4757 ;; Same for operations that take a 1-bit constant.
4758 (define_insn_and_rewrite "*cond_<optab><mode>_2_const"
4759   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
4760         (unspec:SVE_FULL_F
4761           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4762            (unspec:SVE_FULL_F
4763              [(match_operand 4)
4764               (match_operand:SI 5 "aarch64_sve_gp_strictness")
4765               (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
4766               (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
4767              SVE_COND_FP_BINARY_I1)
4768            (match_dup 2)]
4769           UNSPEC_SEL))]
4770   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
4771   "@
4772    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4773    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4774   "&& !rtx_equal_p (operands[1], operands[4])"
4775   {
4776     operands[4] = copy_rtx (operands[1]);
4777   }
4778   [(set_attr "movprfx" "*,yes")]
4779 )
4780
4781 ;; Predicated floating-point operations, merging with the second input.
4782 (define_insn_and_rewrite "*cond_<optab><mode>_3"
4783   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4784         (unspec:SVE_FULL_F
4785           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4786            (unspec:SVE_FULL_F
4787              [(match_operand 4)
4788               (match_operand:SI 5 "aarch64_sve_gp_strictness")
4789               (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
4790               (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
4791              SVE_COND_FP_BINARY)
4792            (match_dup 3)]
4793           UNSPEC_SEL))]
4794   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
4795   "@
4796    <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4797    movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
4798   "&& !rtx_equal_p (operands[1], operands[4])"
4799   {
4800     operands[4] = copy_rtx (operands[1]);
4801   }
4802   [(set_attr "movprfx" "*,yes")]
4803 )
4804
4805 ;; Predicated floating-point operations, merging with an independent value.
4806 (define_insn_and_rewrite "*cond_<optab><mode>_any"
4807   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
4808         (unspec:SVE_FULL_F
4809           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
4810            (unspec:SVE_FULL_F
4811              [(match_operand 5)
4812               (match_operand:SI 6 "aarch64_sve_gp_strictness")
4813               (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
4814               (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
4815              SVE_COND_FP_BINARY)
4816            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
4817           UNSPEC_SEL))]
4818   "TARGET_SVE
4819    && !rtx_equal_p (operands[2], operands[4])
4820    && !rtx_equal_p (operands[3], operands[4])
4821    && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
4822   "@
4823    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4824    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4825    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4826    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4827    #"
4828   "&& 1"
4829   {
4830     if (reload_completed
4831         && register_operand (operands[4], <MODE>mode)
4832         && !rtx_equal_p (operands[0], operands[4]))
4833       {
4834         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4835                                                  operands[4], operands[1]));
4836         operands[4] = operands[2] = operands[0];
4837       }
4838     else if (!rtx_equal_p (operands[1], operands[5]))
4839       operands[5] = copy_rtx (operands[1]);
4840     else
4841       FAIL;
4842   }
4843   [(set_attr "movprfx" "yes")]
4844 )
4845
4846 ;; Same for operations that take a 1-bit constant.
4847 (define_insn_and_rewrite "*cond_<optab><mode>_any_const"
4848   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
4849         (unspec:SVE_FULL_F
4850           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
4851            (unspec:SVE_FULL_F
4852              [(match_operand 5)
4853               (match_operand:SI 6 "aarch64_sve_gp_strictness")
4854               (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")
4855               (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
4856              SVE_COND_FP_BINARY_I1)
4857            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
4858           UNSPEC_SEL))]
4859   "TARGET_SVE
4860    && !rtx_equal_p (operands[2], operands[4])
4861    && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
4862   "@
4863    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4864    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4865    #"
4866   "&& 1"
4867   {
4868     if (reload_completed
4869         && register_operand (operands[4], <MODE>mode)
4870         && !rtx_equal_p (operands[0], operands[4]))
4871       {
4872         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4873                                                  operands[4], operands[1]));
4874         operands[4] = operands[2] = operands[0];
4875       }
4876     else if (!rtx_equal_p (operands[1], operands[5]))
4877       operands[5] = copy_rtx (operands[1]);
4878     else
4879       FAIL;
4880   }
4881   [(set_attr "movprfx" "yes")]
4882 )
4883
4884 ;; -------------------------------------------------------------------------
4885 ;; ---- [FP] Addition
4886 ;; -------------------------------------------------------------------------
4887 ;; Includes:
4888 ;; - FADD
4889 ;; - FSUB
4890 ;; -------------------------------------------------------------------------
4891
4892 ;; Predicated floating-point addition.
4893 (define_insn_and_split "@aarch64_pred_<optab><mode>"
4894   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?&w, ?&w, ?&w")
4895         (unspec:SVE_FULL_F
4896           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl, Upl")
4897            (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, i, Z, Ui1, i, i, Ui1")
4898            (match_operand:SVE_FULL_F 2 "register_operand" "%0, 0, w, 0, w, w, w")
4899            (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w, w, vsA, vsN, w")]
4900           SVE_COND_FP_ADD))]
4901   "TARGET_SVE"
4902   "@
4903    fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4904    fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
4905    #
4906    fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4907    movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4908    movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
4909    movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4910   ; Split the unpredicated form after reload, so that we don't have
4911   ; the unnecessary PTRUE.
4912   "&& reload_completed
4913    && register_operand (operands[3], <MODE>mode)
4914    && INTVAL (operands[4]) == SVE_RELAXED_GP"
4915   [(set (match_dup 0) (plus:SVE_FULL_F (match_dup 2) (match_dup 3)))]
4916   ""
4917   [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")]
4918 )
4919
4920 ;; Predicated floating-point addition of a constant, merging with the
4921 ;; first input.
4922 (define_insn_and_rewrite "*cond_add<mode>_2_const"
4923   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w, ?w")
4924         (unspec:SVE_FULL_F
4925           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
4926            (unspec:SVE_FULL_F
4927              [(match_operand 4)
4928               (match_operand:SI 5 "aarch64_sve_gp_strictness")
4929               (match_operand:SVE_FULL_F 2 "register_operand" "0, 0, w, w")
4930               (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN")]
4931              UNSPEC_COND_FADD)
4932            (match_dup 2)]
4933           UNSPEC_SEL))]
4934   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
4935   "@
4936    fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4937    fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
4938    movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4939    movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3"
4940   "&& !rtx_equal_p (operands[1], operands[4])"
4941   {
4942     operands[4] = copy_rtx (operands[1]);
4943   }
4944   [(set_attr "movprfx" "*,*,yes,yes")]
4945 )
4946
4947 ;; Predicated floating-point addition of a constant, merging with an
4948 ;; independent value.
4949 (define_insn_and_rewrite "*cond_add<mode>_any_const"
4950   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?w, ?w")
4951         (unspec:SVE_FULL_F
4952           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
4953            (unspec:SVE_FULL_F
4954              [(match_operand 5)
4955               (match_operand:SI 6 "aarch64_sve_gp_strictness")
4956               (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w, w, w")
4957               (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN, vsA, vsN")]
4958              UNSPEC_COND_FADD)
4959            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 0, w, w")]
4960           UNSPEC_SEL))]
4961   "TARGET_SVE
4962    && !rtx_equal_p (operands[2], operands[4])
4963    && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
4964   "@
4965    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4966    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
4967    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4968    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
4969    #
4970    #"
4971   "&& 1"
4972   {
4973     if (reload_completed
4974         && register_operand (operands[4], <MODE>mode)
4975         && !rtx_equal_p (operands[0], operands[4]))
4976       {
4977         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4978                                                  operands[4], operands[1]));
4979         operands[4] = operands[2] = operands[0];
4980       }
4981     else if (!rtx_equal_p (operands[1], operands[5]))
4982       operands[5] = copy_rtx (operands[1]);
4983     else
4984       FAIL;
4985   }
4986   [(set_attr "movprfx" "yes")]
4987 )
4988
4989 ;; Register merging forms are handled through SVE_COND_FP_BINARY.
4990
4991 ;; -------------------------------------------------------------------------
4992 ;; ---- [FP] Complex addition
4993 ;; -------------------------------------------------------------------------
4994 ;; Includes:
4995 ;; - FCADD
4996 ;; -------------------------------------------------------------------------
4997
4998 ;; Predicated FCADD.
4999 (define_insn "@aarch64_pred_<optab><mode>"
5000   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5001         (unspec:SVE_FULL_F
5002           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5003            (match_operand:SI 4 "aarch64_sve_gp_strictness")
5004            (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5005            (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5006           SVE_COND_FCADD))]
5007   "TARGET_SVE"
5008   "@
5009    fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5010    movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>"
5011   [(set_attr "movprfx" "*,yes")]
5012 )
5013
5014 ;; Predicated FCADD with merging.
5015 (define_expand "@cond_<optab><mode>"
5016   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5017         (unspec:SVE_FULL_F
5018           [(match_operand:<VPRED> 1 "register_operand")
5019            (unspec:SVE_FULL_F
5020              [(match_dup 1)
5021               (const_int SVE_STRICT_GP)
5022               (match_operand:SVE_FULL_F 2 "register_operand")
5023               (match_operand:SVE_FULL_F 3 "register_operand")]
5024              SVE_COND_FCADD)
5025            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5026           UNSPEC_SEL))]
5027   "TARGET_SVE"
5028 )
5029
5030 ;; Predicated FCADD, merging with the first input.
5031 (define_insn_and_rewrite "*cond_<optab><mode>_2"
5032   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5033         (unspec:SVE_FULL_F
5034           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5035            (unspec:SVE_FULL_F
5036              [(match_operand 4)
5037               (match_operand:SI 5 "aarch64_sve_gp_strictness")
5038               (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5039               (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5040              SVE_COND_FCADD)
5041            (match_dup 2)]
5042           UNSPEC_SEL))]
5043   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
5044   "@
5045    fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5046    movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>"
5047   "&& !rtx_equal_p (operands[1], operands[4])"
5048   {
5049     operands[4] = copy_rtx (operands[1]);
5050   }
5051   [(set_attr "movprfx" "*,yes")]
5052 )
5053
5054 ;; Predicated FCADD, merging with an independent value.
5055 (define_insn_and_rewrite "*cond_<optab><mode>_any"
5056   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
5057         (unspec:SVE_FULL_F
5058           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
5059            (unspec:SVE_FULL_F
5060              [(match_operand 5)
5061               (match_operand:SI 6 "aarch64_sve_gp_strictness")
5062               (match_operand:SVE_FULL_F 2 "register_operand" "w, 0, w, w")
5063               (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")]
5064              SVE_COND_FCADD)
5065            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
5066           UNSPEC_SEL))]
5067   "TARGET_SVE
5068    && !rtx_equal_p (operands[2], operands[4])
5069    && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
5070   "@
5071    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5072    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5073    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5074    #"
5075   "&& 1"
5076   {
5077     if (reload_completed
5078         && register_operand (operands[4], <MODE>mode)
5079         && !rtx_equal_p (operands[0], operands[4]))
5080       {
5081         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5082                                                  operands[4], operands[1]));
5083         operands[4] = operands[2] = operands[0];
5084       }
5085     else if (!rtx_equal_p (operands[1], operands[5]))
5086       operands[5] = copy_rtx (operands[1]);
5087     else
5088       FAIL;
5089   }
5090   [(set_attr "movprfx" "yes")]
5091 )
5092
5093 ;; -------------------------------------------------------------------------
5094 ;; ---- [FP] Subtraction
5095 ;; -------------------------------------------------------------------------
5096 ;; Includes:
5097 ;; - FSUB
5098 ;; - FSUBR
5099 ;; -------------------------------------------------------------------------
5100
5101 ;; Predicated floating-point subtraction.
5102 (define_insn_and_split "@aarch64_pred_<optab><mode>"
5103   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?&w, ?&w")
5104         (unspec:SVE_FULL_F
5105           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
5106            (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, Ui1, Ui1, i, Ui1")
5107            (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_operand" "vsA, w, 0, w, vsA, w")
5108            (match_operand:SVE_FULL_F 3 "register_operand" "0, w, w, 0, w, w")]
5109           SVE_COND_FP_SUB))]
5110   "TARGET_SVE"
5111   "@
5112    fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5113    #
5114    fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5115    fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5116    movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5117    movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5118   ; Split the unpredicated form after reload, so that we don't have
5119   ; the unnecessary PTRUE.
5120   "&& reload_completed
5121    && register_operand (operands[2], <MODE>mode)
5122    && INTVAL (operands[4]) == SVE_RELAXED_GP"
5123   [(set (match_dup 0) (minus:SVE_FULL_F (match_dup 2) (match_dup 3)))]
5124   ""
5125   [(set_attr "movprfx" "*,*,*,*,yes,yes")]
5126 )
5127
5128 ;; Predicated floating-point subtraction from a constant, merging with the
5129 ;; second input.
5130 (define_insn_and_rewrite "*cond_sub<mode>_3_const"
5131   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
5132         (unspec:SVE_FULL_F
5133           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5134            (unspec:SVE_FULL_F
5135              [(match_operand 4)
5136               (match_operand:SI 5 "aarch64_sve_gp_strictness")
5137               (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
5138               (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
5139              UNSPEC_COND_FSUB)
5140            (match_dup 3)]
5141           UNSPEC_SEL))]
5142   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
5143   "@
5144    fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5145    movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2"
5146   "&& !rtx_equal_p (operands[1], operands[4])"
5147   {
5148     operands[4] = copy_rtx (operands[1]);
5149   }
5150   [(set_attr "movprfx" "*,yes")]
5151 )
5152
5153 ;; Predicated floating-point subtraction from a constant, merging with an
5154 ;; independent value.
5155 (define_insn_and_rewrite "*cond_sub<mode>_any_const"
5156   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
5157         (unspec:SVE_FULL_F
5158           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
5159            (unspec:SVE_FULL_F
5160              [(match_operand 5)
5161               (match_operand:SI 6 "aarch64_sve_gp_strictness")
5162               (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
5163               (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")]
5164              UNSPEC_COND_FSUB)
5165            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
5166           UNSPEC_SEL))]
5167   "TARGET_SVE
5168    && !rtx_equal_p (operands[3], operands[4])
5169    && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
5170   "@
5171    movprfx\t%0.<Vetype>, %1/z, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5172    movprfx\t%0.<Vetype>, %1/m, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5173    #"
5174   "&& 1"
5175   {
5176     if (reload_completed
5177         && register_operand (operands[4], <MODE>mode)
5178         && !rtx_equal_p (operands[0], operands[4]))
5179       {
5180         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
5181                                                  operands[4], operands[1]));
5182         operands[4] = operands[3] = operands[0];
5183       }
5184     else if (!rtx_equal_p (operands[1], operands[5]))
5185       operands[5] = copy_rtx (operands[1]);
5186     else
5187       FAIL;
5188   }
5189   [(set_attr "movprfx" "yes")]
5190 )
5191
5192 ;; Register merging forms are handled through SVE_COND_FP_BINARY.
5193
5194 ;; -------------------------------------------------------------------------
5195 ;; ---- [FP] Absolute difference
5196 ;; -------------------------------------------------------------------------
5197 ;; Includes:
5198 ;; - FABD
5199 ;; -------------------------------------------------------------------------
5200
5201 ;; Predicated floating-point absolute difference.
5202 (define_expand "@aarch64_pred_abd<mode>"
5203   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5204         (unspec:SVE_FULL_F
5205           [(match_operand:<VPRED> 1 "register_operand")
5206            (match_operand:SI 4 "aarch64_sve_gp_strictness")
5207            (unspec:SVE_FULL_F
5208              [(match_dup 1)
5209               (match_dup 4)
5210               (match_operand:SVE_FULL_F 2 "register_operand")
5211               (match_operand:SVE_FULL_F 3 "register_operand")]
5212              UNSPEC_COND_FSUB)]
5213           UNSPEC_COND_FABS))]
5214   "TARGET_SVE"
5215 )
5216
5217 ;; Predicated floating-point absolute difference.
5218 (define_insn_and_rewrite "*aarch64_pred_abd<mode>"
5219   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5220         (unspec:SVE_FULL_F
5221           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5222            (match_operand:SI 4 "aarch64_sve_gp_strictness")
5223            (unspec:SVE_FULL_F
5224              [(match_operand 5)
5225               (match_operand:SI 6 "aarch64_sve_gp_strictness")
5226               (match_operand:SVE_FULL_F 2 "register_operand" "%0, w")
5227               (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5228              UNSPEC_COND_FSUB)]
5229           UNSPEC_COND_FABS))]
5230   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
5231   "@
5232    fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5233    movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5234   "&& !rtx_equal_p (operands[1], operands[5])"
5235   {
5236     operands[5] = copy_rtx (operands[1]);
5237   }
5238   [(set_attr "movprfx" "*,yes")]
5239 )
5240
5241 (define_expand "@aarch64_cond_abd<mode>"
5242   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5243         (unspec:SVE_FULL_F
5244           [(match_operand:<VPRED> 1 "register_operand")
5245            (unspec:SVE_FULL_F
5246              [(match_dup 1)
5247               (const_int SVE_STRICT_GP)
5248               (unspec:SVE_FULL_F
5249                 [(match_dup 1)
5250                  (const_int SVE_STRICT_GP)
5251                  (match_operand:SVE_FULL_F 2 "register_operand")
5252                  (match_operand:SVE_FULL_F 3 "register_operand")]
5253                 UNSPEC_COND_FSUB)]
5254              UNSPEC_COND_FABS)
5255            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5256           UNSPEC_SEL))]
5257   "TARGET_SVE"
5258 {
5259   if (rtx_equal_p (operands[3], operands[4]))
5260     std::swap (operands[2], operands[3]);
5261 })
5262
5263 ;; Predicated floating-point absolute difference, merging with the first
5264 ;; input.
5265 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_2"
5266   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5267         (unspec:SVE_FULL_F
5268           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5269            (unspec:SVE_FULL_F
5270              [(match_operand 4)
5271               (match_operand:SI 5 "aarch64_sve_gp_strictness")
5272               (unspec:SVE_FULL_F
5273                 [(match_operand 6)
5274                  (match_operand:SI 7 "aarch64_sve_gp_strictness")
5275                  (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5276                  (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5277                 UNSPEC_COND_FSUB)]
5278              UNSPEC_COND_FABS)
5279            (match_dup 2)]
5280           UNSPEC_SEL))]
5281   "TARGET_SVE
5282    && aarch64_sve_pred_dominates_p (&operands[4], operands[1])
5283    && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
5284   "@
5285    fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5286    movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5287   "&& (!rtx_equal_p (operands[1], operands[4])
5288        || !rtx_equal_p (operands[1], operands[6]))"
5289   {
5290     operands[4] = copy_rtx (operands[1]);
5291     operands[6] = copy_rtx (operands[1]);
5292   }
5293   [(set_attr "movprfx" "*,yes")]
5294 )
5295
5296 ;; Predicated floating-point absolute difference, merging with the second
5297 ;; input.
5298 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_3"
5299   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5300         (unspec:SVE_FULL_F
5301           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5302            (unspec:SVE_FULL_F
5303              [(match_operand 4)
5304               (match_operand:SI 5 "aarch64_sve_gp_strictness")
5305               (unspec:SVE_FULL_F
5306                 [(match_operand 6)
5307                  (match_operand:SI 7 "aarch64_sve_gp_strictness")
5308                  (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
5309                  (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
5310                 UNSPEC_COND_FSUB)]
5311              UNSPEC_COND_FABS)
5312            (match_dup 3)]
5313           UNSPEC_SEL))]
5314   "TARGET_SVE
5315    && aarch64_sve_pred_dominates_p (&operands[4], operands[1])
5316    && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
5317   "@
5318    fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5319    movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
5320   "&& (!rtx_equal_p (operands[1], operands[4])
5321        || !rtx_equal_p (operands[1], operands[6]))"
5322   {
5323     operands[4] = copy_rtx (operands[1]);
5324     operands[6] = copy_rtx (operands[1]);
5325   }
5326   [(set_attr "movprfx" "*,yes")]
5327 )
5328
5329 ;; Predicated floating-point absolute difference, merging with an
5330 ;; independent value.
5331 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_any"
5332   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
5333         (unspec:SVE_FULL_F
5334           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
5335            (unspec:SVE_FULL_F
5336              [(match_operand 5)
5337               (match_operand:SI 6 "aarch64_sve_gp_strictness")
5338               (unspec:SVE_FULL_F
5339                 [(match_operand 7)
5340                  (match_operand:SI 8 "aarch64_sve_gp_strictness")
5341                  (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
5342                  (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
5343                 UNSPEC_COND_FSUB)]
5344              UNSPEC_COND_FABS)
5345            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
5346           UNSPEC_SEL))]
5347   "TARGET_SVE
5348    && !rtx_equal_p (operands[2], operands[4])
5349    && !rtx_equal_p (operands[3], operands[4])
5350    && aarch64_sve_pred_dominates_p (&operands[5], operands[1])
5351    && aarch64_sve_pred_dominates_p (&operands[7], operands[1])"
5352   "@
5353    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5354    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5355    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5356    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5357    #"
5358   "&& 1"
5359   {
5360     if (reload_completed
5361         && register_operand (operands[4], <MODE>mode)
5362         && !rtx_equal_p (operands[0], operands[4]))
5363       {
5364         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
5365                                                  operands[4], operands[1]));
5366         operands[4] = operands[3] = operands[0];
5367       }
5368     else if (!rtx_equal_p (operands[1], operands[5])
5369              || !rtx_equal_p (operands[1], operands[7]))
5370       {
5371         operands[5] = copy_rtx (operands[1]);
5372         operands[7] = copy_rtx (operands[1]);
5373       }
5374     else
5375       FAIL;
5376   }
5377   [(set_attr "movprfx" "yes")]
5378 )
5379
5380 ;; -------------------------------------------------------------------------
5381 ;; ---- [FP] Multiplication
5382 ;; -------------------------------------------------------------------------
5383 ;; Includes:
5384 ;; - FMUL
5385 ;; -------------------------------------------------------------------------
5386
5387 ;; Predicated floating-point multiplication.
5388 (define_insn_and_split "@aarch64_pred_<optab><mode>"
5389   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, ?&w, ?&w")
5390         (unspec:SVE_FULL_F
5391           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
5392            (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, Ui1, i, Ui1")
5393            (match_operand:SVE_FULL_F 2 "register_operand" "%0, w, 0, w, w")
5394            (match_operand:SVE_FULL_F 3 "aarch64_sve_float_mul_operand" "vsM, w, w, vsM, w")]
5395           SVE_COND_FP_MUL))]
5396   "TARGET_SVE"
5397   "@
5398    fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5399    #
5400    fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5401    movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5402    movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5403   ; Split the unpredicated form after reload, so that we don't have
5404   ; the unnecessary PTRUE.
5405   "&& reload_completed
5406    && register_operand (operands[3], <MODE>mode)
5407    && INTVAL (operands[4]) == SVE_RELAXED_GP"
5408   [(set (match_dup 0) (mult:SVE_FULL_F (match_dup 2) (match_dup 3)))]
5409   ""
5410   [(set_attr "movprfx" "*,*,*,yes,yes")]
5411 )
5412
5413 ;; Merging forms are handled through SVE_COND_FP_BINARY and
5414 ;; SVE_COND_FP_BINARY_I1.
5415
5416 ;; Unpredicated multiplication by selected lanes.
5417 (define_insn "@aarch64_mul_lane_<mode>"
5418   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
5419         (mult:SVE_FULL_F
5420           (unspec:SVE_FULL_F
5421             [(match_operand:SVE_FULL_F 2 "register_operand" "<sve_lane_con>")
5422              (match_operand:SI 3 "const_int_operand")]
5423             UNSPEC_SVE_LANE_SELECT)
5424           (match_operand:SVE_FULL_F 1 "register_operand" "w")))]
5425   "TARGET_SVE"
5426   "fmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
5427 )
5428
5429 ;; -------------------------------------------------------------------------
5430 ;; ---- [FP] Binary logical operations
5431 ;; -------------------------------------------------------------------------
5432 ;; Includes
5433 ;; - AND
5434 ;; - EOR
5435 ;; - ORR
5436 ;; -------------------------------------------------------------------------
5437
5438 ;; Binary logical operations on floating-point modes.  We avoid subregs
5439 ;; by providing this, but we need to use UNSPECs since rtx logical ops
5440 ;; aren't defined for floating-point modes.
5441 (define_insn "*<optab><mode>3"
5442   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
5443         (unspec:SVE_FULL_F
5444           [(match_operand:SVE_FULL_F 1 "register_operand" "w")
5445            (match_operand:SVE_FULL_F 2 "register_operand" "w")]
5446           LOGICALF))]
5447   "TARGET_SVE"
5448   "<logicalf_op>\t%0.d, %1.d, %2.d"
5449 )
5450
5451 ;; -------------------------------------------------------------------------
5452 ;; ---- [FP] Sign copying
5453 ;; -------------------------------------------------------------------------
5454 ;; The patterns in this section are synthetic.
5455 ;; -------------------------------------------------------------------------
5456
5457 (define_expand "copysign<mode>3"
5458   [(match_operand:SVE_FULL_F 0 "register_operand")
5459    (match_operand:SVE_FULL_F 1 "register_operand")
5460    (match_operand:SVE_FULL_F 2 "register_operand")]
5461   "TARGET_SVE"
5462   {
5463     rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
5464     rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode);
5465     rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
5466     int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
5467
5468     rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
5469     rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
5470
5471     emit_insn (gen_and<v_int_equiv>3
5472                (sign, arg2,
5473                 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
5474                                                    HOST_WIDE_INT_M1U
5475                                                    << bits)));
5476     emit_insn (gen_and<v_int_equiv>3
5477                (mant, arg1,
5478                 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
5479                                                    ~(HOST_WIDE_INT_M1U
5480                                                      << bits))));
5481     emit_insn (gen_ior<v_int_equiv>3 (int_res, sign, mant));
5482     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
5483     DONE;
5484   }
5485 )
5486
5487 (define_expand "xorsign<mode>3"
5488   [(match_operand:SVE_FULL_F 0 "register_operand")
5489    (match_operand:SVE_FULL_F 1 "register_operand")
5490    (match_operand:SVE_FULL_F 2 "register_operand")]
5491   "TARGET_SVE"
5492   {
5493     rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
5494     rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
5495     int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
5496
5497     rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
5498     rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
5499
5500     emit_insn (gen_and<v_int_equiv>3
5501                (sign, arg2,
5502                 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
5503                                                    HOST_WIDE_INT_M1U
5504                                                    << bits)));
5505     emit_insn (gen_xor<v_int_equiv>3 (int_res, arg1, sign));
5506     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
5507     DONE;
5508   }
5509 )
5510
5511 ;; -------------------------------------------------------------------------
5512 ;; ---- [FP] Maximum and minimum
5513 ;; -------------------------------------------------------------------------
5514 ;; Includes:
5515 ;; - FMAX
5516 ;; - FMAXNM
5517 ;; - FMIN
5518 ;; - FMINNM
5519 ;; -------------------------------------------------------------------------
5520
5521 ;; Unpredicated fmax/fmin (the libm functions).  The optabs for the
5522 ;; smin/smax rtx codes are handled in the generic section above.
5523 (define_expand "<maxmin_uns><mode>3"
5524   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5525         (unspec:SVE_FULL_F
5526           [(match_dup 3)
5527            (const_int SVE_RELAXED_GP)
5528            (match_operand:SVE_FULL_F 1 "register_operand")
5529            (match_operand:SVE_FULL_F 2 "aarch64_sve_float_maxmin_operand")]
5530           SVE_COND_FP_MAXMIN_PUBLIC))]
5531   "TARGET_SVE"
5532   {
5533     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
5534   }
5535 )
5536
5537 ;; Predicated floating-point maximum/minimum.
5538 (define_insn "@aarch64_pred_<optab><mode>"
5539   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w, ?&w")
5540         (unspec:SVE_FULL_F
5541           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
5542            (match_operand:SI 4 "aarch64_sve_gp_strictness")
5543            (match_operand:SVE_FULL_F 2 "register_operand" "%0, 0, w, w")
5544            (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand" "vsB, w, vsB, w")]
5545           SVE_COND_FP_MAXMIN))]
5546   "TARGET_SVE"
5547   "@
5548    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5549    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5550    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5551    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5552   [(set_attr "movprfx" "*,*,yes,yes")]
5553 )
5554
5555 ;; Merging forms are handled through SVE_COND_FP_BINARY and
5556 ;; SVE_COND_FP_BINARY_I1.
5557
5558 ;; -------------------------------------------------------------------------
5559 ;; ---- [PRED] Binary logical operations
5560 ;; -------------------------------------------------------------------------
5561 ;; Includes:
5562 ;; - AND
5563 ;; - ANDS
5564 ;; - EOR
5565 ;; - EORS
5566 ;; - ORR
5567 ;; - ORRS
5568 ;; -------------------------------------------------------------------------
5569
5570 ;; Predicate AND.  We can reuse one of the inputs as the GP.
5571 ;; Doubling the second operand is the preferred implementation
5572 ;; of the MOV alias, so we use that instead of %1/z, %1, %2.
5573 (define_insn "and<mode>3"
5574   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5575         (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa")
5576                       (match_operand:PRED_ALL 2 "register_operand" "Upa")))]
5577   "TARGET_SVE"
5578   "and\t%0.b, %1/z, %2.b, %2.b"
5579 )
5580
5581 ;; Unpredicated predicate EOR and ORR.
5582 (define_expand "<optab><mode>3"
5583   [(set (match_operand:PRED_ALL 0 "register_operand")
5584         (and:PRED_ALL
5585           (LOGICAL_OR:PRED_ALL
5586             (match_operand:PRED_ALL 1 "register_operand")
5587             (match_operand:PRED_ALL 2 "register_operand"))
5588           (match_dup 3)))]
5589   "TARGET_SVE"
5590   {
5591     operands[3] = aarch64_ptrue_reg (<MODE>mode);
5592   }
5593 )
5594
5595 ;; Predicated predicate AND, EOR and ORR.
5596 (define_insn "@aarch64_pred_<optab><mode>_z"
5597   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5598         (and:PRED_ALL
5599           (LOGICAL:PRED_ALL
5600             (match_operand:PRED_ALL 2 "register_operand" "Upa")
5601             (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5602           (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
5603   "TARGET_SVE"
5604   "<logical>\t%0.b, %1/z, %2.b, %3.b"
5605 )
5606
5607 ;; Perform a logical operation on operands 2 and 3, using operand 1 as
5608 ;; the GP.  Store the result in operand 0 and set the flags in the same
5609 ;; way as for PTEST.
5610 (define_insn "*<optab><mode>3_cc"
5611   [(set (reg:CC_NZC CC_REGNUM)
5612         (unspec:CC_NZC
5613           [(match_operand:VNx16BI 1 "register_operand" "Upa")
5614            (match_operand 4)
5615            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5616            (and:PRED_ALL
5617              (LOGICAL:PRED_ALL
5618                (match_operand:PRED_ALL 2 "register_operand" "Upa")
5619                (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5620              (match_dup 4))]
5621           UNSPEC_PTEST))
5622    (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5623         (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3))
5624                       (match_dup 4)))]
5625   "TARGET_SVE"
5626   "<logical>s\t%0.b, %1/z, %2.b, %3.b"
5627 )
5628
5629 ;; Same with just the flags result.
5630 (define_insn "*<optab><mode>3_ptest"
5631   [(set (reg:CC_NZC CC_REGNUM)
5632         (unspec:CC_NZC
5633           [(match_operand:VNx16BI 1 "register_operand" "Upa")
5634            (match_operand 4)
5635            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5636            (and:PRED_ALL
5637              (LOGICAL:PRED_ALL
5638                (match_operand:PRED_ALL 2 "register_operand" "Upa")
5639                (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5640              (match_dup 4))]
5641           UNSPEC_PTEST))
5642    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
5643   "TARGET_SVE"
5644   "<logical>s\t%0.b, %1/z, %2.b, %3.b"
5645 )
5646
5647 ;; -------------------------------------------------------------------------
5648 ;; ---- [PRED] Binary logical operations (inverted second input)
5649 ;; -------------------------------------------------------------------------
5650 ;; Includes:
5651 ;; - BIC
5652 ;; - ORN
5653 ;; -------------------------------------------------------------------------
5654
5655 ;; Predicated predicate BIC and ORN.
5656 (define_insn "aarch64_pred_<nlogical><mode>_z"
5657   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5658         (and:PRED_ALL
5659           (NLOGICAL:PRED_ALL
5660             (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5661             (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5662           (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
5663   "TARGET_SVE"
5664   "<nlogical>\t%0.b, %1/z, %2.b, %3.b"
5665 )
5666
5667 ;; Same, but set the flags as a side-effect.
5668 (define_insn "*<nlogical><mode>3_cc"
5669   [(set (reg:CC_NZC CC_REGNUM)
5670         (unspec:CC_NZC
5671           [(match_operand:VNx16BI 1 "register_operand" "Upa")
5672            (match_operand 4)
5673            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5674            (and:PRED_ALL
5675              (NLOGICAL:PRED_ALL
5676                (not:PRED_ALL
5677                  (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5678                (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5679              (match_dup 4))]
5680           UNSPEC_PTEST))
5681    (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5682         (and:PRED_ALL (NLOGICAL:PRED_ALL
5683                         (not:PRED_ALL (match_dup 3))
5684                         (match_dup 2))
5685                       (match_dup 4)))]
5686   "TARGET_SVE"
5687   "<nlogical>s\t%0.b, %1/z, %2.b, %3.b"
5688 )
5689
5690 ;; Same with just the flags result.
5691 (define_insn "*<nlogical><mode>3_ptest"
5692   [(set (reg:CC_NZC CC_REGNUM)
5693         (unspec:CC_NZC
5694           [(match_operand:VNx16BI 1 "register_operand" "Upa")
5695            (match_operand 4)
5696            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5697            (and:PRED_ALL
5698              (NLOGICAL:PRED_ALL
5699                (not:PRED_ALL
5700                  (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5701                (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5702              (match_dup 4))]
5703           UNSPEC_PTEST))
5704    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
5705   "TARGET_SVE"
5706   "<nlogical>s\t%0.b, %1/z, %2.b, %3.b"
5707 )
5708
5709 ;; -------------------------------------------------------------------------
5710 ;; ---- [PRED] Binary logical operations (inverted result)
5711 ;; -------------------------------------------------------------------------
5712 ;; Includes:
5713 ;; - NAND
5714 ;; - NOR
5715 ;; -------------------------------------------------------------------------
5716
5717 ;; Predicated predicate NAND and NOR.
5718 (define_insn "aarch64_pred_<logical_nn><mode>_z"
5719   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5720         (and:PRED_ALL
5721           (NLOGICAL:PRED_ALL
5722             (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5723             (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa")))
5724           (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
5725   "TARGET_SVE"
5726   "<logical_nn>\t%0.b, %1/z, %2.b, %3.b"
5727 )
5728
5729 ;; Same, but set the flags as a side-effect.
5730 (define_insn "*<logical_nn><mode>3_cc"
5731   [(set (reg:CC_NZC CC_REGNUM)
5732         (unspec:CC_NZC
5733           [(match_operand:VNx16BI 1 "register_operand" "Upa")
5734            (match_operand 4)
5735            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5736            (and:PRED_ALL
5737              (NLOGICAL:PRED_ALL
5738                (not:PRED_ALL
5739                  (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5740                (not:PRED_ALL
5741                  (match_operand:PRED_ALL 3 "register_operand" "Upa")))
5742              (match_dup 4))]
5743           UNSPEC_PTEST))
5744    (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5745         (and:PRED_ALL (NLOGICAL:PRED_ALL
5746                         (not:PRED_ALL (match_dup 2))
5747                         (not:PRED_ALL (match_dup 3)))
5748                       (match_dup 4)))]
5749   "TARGET_SVE"
5750   "<logical_nn>s\t%0.b, %1/z, %2.b, %3.b"
5751 )
5752
5753 ;; Same with just the flags result.
5754 (define_insn "*<logical_nn><mode>3_ptest"
5755   [(set (reg:CC_NZC CC_REGNUM)
5756         (unspec:CC_NZC
5757           [(match_operand:VNx16BI 1 "register_operand" "Upa")
5758            (match_operand 4)
5759            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5760            (and:PRED_ALL
5761              (NLOGICAL:PRED_ALL
5762                (not:PRED_ALL
5763                  (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5764                (not:PRED_ALL
5765                  (match_operand:PRED_ALL 3 "register_operand" "Upa")))
5766              (match_dup 4))]
5767           UNSPEC_PTEST))
5768    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
5769   "TARGET_SVE"
5770   "<logical_nn>s\t%0.b, %1/z, %2.b, %3.b"
5771 )
5772
5773 ;; =========================================================================
5774 ;; == Ternary arithmetic
5775 ;; =========================================================================
5776
5777 ;; -------------------------------------------------------------------------
5778 ;; ---- [INT] MLA and MAD
5779 ;; -------------------------------------------------------------------------
5780 ;; Includes:
5781 ;; - MAD
5782 ;; - MLA
5783 ;; -------------------------------------------------------------------------
5784
5785 ;; Unpredicated integer addition of product.
5786 (define_expand "fma<mode>4"
5787   [(set (match_operand:SVE_FULL_I 0 "register_operand")
5788         (plus:SVE_FULL_I
5789           (unspec:SVE_FULL_I
5790             [(match_dup 4)
5791              (mult:SVE_FULL_I
5792                (match_operand:SVE_FULL_I 1 "register_operand")
5793                (match_operand:SVE_FULL_I 2 "nonmemory_operand"))]
5794             UNSPEC_PRED_X)
5795           (match_operand:SVE_FULL_I 3 "register_operand")))]
5796   "TARGET_SVE"
5797   {
5798     if (aarch64_prepare_sve_int_fma (operands, PLUS))
5799       DONE;
5800     operands[4] = aarch64_ptrue_reg (<VPRED>mode);
5801   }
5802 )
5803
5804 ;; Predicated integer addition of product.
5805 (define_insn "@aarch64_pred_fma<mode>"
5806   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w")
5807         (plus:SVE_FULL_I
5808           (unspec:SVE_FULL_I
5809             [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
5810              (mult:SVE_FULL_I
5811                (match_operand:SVE_FULL_I 2 "register_operand" "%0, w, w")
5812                (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w"))]
5813             UNSPEC_PRED_X)
5814           (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w")))]
5815   "TARGET_SVE"
5816   "@
5817    mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
5818    mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
5819    movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
5820   [(set_attr "movprfx" "*,*,yes")]
5821 )
5822
5823 ;; Predicated integer addition of product with merging.
5824 (define_expand "cond_fma<mode>"
5825   [(set (match_operand:SVE_FULL_I 0 "register_operand")
5826         (unspec:SVE_FULL_I
5827           [(match_operand:<VPRED> 1 "register_operand")
5828            (plus:SVE_FULL_I
5829              (mult:SVE_FULL_I
5830                (match_operand:SVE_FULL_I 2 "register_operand")
5831                (match_operand:SVE_FULL_I 3 "general_operand"))
5832              (match_operand:SVE_FULL_I 4 "register_operand"))
5833            (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero")]
5834           UNSPEC_SEL))]
5835   "TARGET_SVE"
5836   {
5837     if (aarch64_prepare_sve_cond_int_fma (operands, PLUS))
5838       DONE;
5839     /* Swap the multiplication operands if the fallback value is the
5840        second of the two.  */
5841     if (rtx_equal_p (operands[3], operands[5]))
5842       std::swap (operands[2], operands[3]);
5843   }
5844 )
5845
5846 ;; Predicated integer addition of product, merging with the first input.
5847 (define_insn "*cond_fma<mode>_2"
5848   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
5849         (unspec:SVE_FULL_I
5850           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5851            (plus:SVE_FULL_I
5852              (mult:SVE_FULL_I
5853                (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
5854                (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
5855              (match_operand:SVE_FULL_I 4 "register_operand" "w, w"))
5856            (match_dup 2)]
5857           UNSPEC_SEL))]
5858   "TARGET_SVE"
5859   "@
5860    mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
5861    movprfx\t%0, %2\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
5862   [(set_attr "movprfx" "*,yes")]
5863 )
5864
5865 ;; Predicated integer addition of product, merging with the third input.
5866 (define_insn "*cond_fma<mode>_4"
5867   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
5868         (unspec:SVE_FULL_I
5869           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5870            (plus:SVE_FULL_I
5871              (mult:SVE_FULL_I
5872                (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
5873                (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
5874              (match_operand:SVE_FULL_I 4 "register_operand" "0, w"))
5875            (match_dup 4)]
5876           UNSPEC_SEL))]
5877   "TARGET_SVE"
5878   "@
5879    mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
5880    movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
5881   [(set_attr "movprfx" "*,yes")]
5882 )
5883
5884 ;; Predicated integer addition of product, merging with an independent value.
5885 (define_insn_and_rewrite "*cond_fma<mode>_any"
5886   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
5887         (unspec:SVE_FULL_I
5888           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
5889            (plus:SVE_FULL_I
5890              (mult:SVE_FULL_I
5891                (match_operand:SVE_FULL_I 2 "register_operand" "w, w, 0, w, w, w")
5892                (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, 0, w, w"))
5893              (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w, w, w, w"))
5894            (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
5895           UNSPEC_SEL))]
5896   "TARGET_SVE
5897    && !rtx_equal_p (operands[2], operands[5])
5898    && !rtx_equal_p (operands[3], operands[5])
5899    && !rtx_equal_p (operands[4], operands[5])"
5900   "@
5901    movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
5902    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
5903    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
5904    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
5905    movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
5906    #"
5907   "&& reload_completed
5908    && register_operand (operands[5], <MODE>mode)
5909    && !rtx_equal_p (operands[0], operands[5])"
5910   {
5911     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
5912                                              operands[5], operands[1]));
5913     operands[5] = operands[4] = operands[0];
5914   }
5915   [(set_attr "movprfx" "yes")]
5916 )
5917
5918 ;; -------------------------------------------------------------------------
5919 ;; ---- [INT] MLS and MSB
5920 ;; -------------------------------------------------------------------------
5921 ;; Includes:
5922 ;; - MLS
5923 ;; - MSB
5924 ;; -------------------------------------------------------------------------
5925
5926 ;; Unpredicated integer subtraction of product.
5927 (define_expand "fnma<mode>4"
5928   [(set (match_operand:SVE_FULL_I 0 "register_operand")
5929         (minus:SVE_FULL_I
5930           (match_operand:SVE_FULL_I 3 "register_operand")
5931           (unspec:SVE_FULL_I
5932             [(match_dup 4)
5933              (mult:SVE_FULL_I
5934                (match_operand:SVE_FULL_I 1 "register_operand")
5935                (match_operand:SVE_FULL_I 2 "general_operand"))]
5936             UNSPEC_PRED_X)))]
5937   "TARGET_SVE"
5938   {
5939     if (aarch64_prepare_sve_int_fma (operands, MINUS))
5940       DONE;
5941     operands[4] = aarch64_ptrue_reg (<VPRED>mode);
5942   }
5943 )
5944
5945 ;; Predicated integer subtraction of product.
5946 (define_insn "@aarch64_pred_fnma<mode>"
5947   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w")
5948         (minus:SVE_FULL_I
5949           (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w")
5950           (unspec:SVE_FULL_I
5951             [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
5952              (mult:SVE_FULL_I
5953                (match_operand:SVE_FULL_I 2 "register_operand" "%0, w, w")
5954                (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w"))]
5955             UNSPEC_PRED_X)))]
5956   "TARGET_SVE"
5957   "@
5958    msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
5959    mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
5960    movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
5961   [(set_attr "movprfx" "*,*,yes")]
5962 )
5963
5964 ;; Predicated integer subtraction of product with merging.
5965 (define_expand "cond_fnma<mode>"
5966   [(set (match_operand:SVE_FULL_I 0 "register_operand")
5967    (unspec:SVE_FULL_I
5968         [(match_operand:<VPRED> 1 "register_operand")
5969          (minus:SVE_FULL_I
5970            (match_operand:SVE_FULL_I 4 "register_operand")
5971            (mult:SVE_FULL_I
5972              (match_operand:SVE_FULL_I 2 "register_operand")
5973              (match_operand:SVE_FULL_I 3 "general_operand")))
5974          (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero")]
5975         UNSPEC_SEL))]
5976   "TARGET_SVE"
5977   {
5978     if (aarch64_prepare_sve_cond_int_fma (operands, MINUS))
5979       DONE;
5980     /* Swap the multiplication operands if the fallback value is the
5981        second of the two.  */
5982     if (rtx_equal_p (operands[3], operands[5]))
5983       std::swap (operands[2], operands[3]);
5984   }
5985 )
5986
5987 ;; Predicated integer subtraction of product, merging with the first input.
5988 (define_insn "*cond_fnma<mode>_2"
5989   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
5990         (unspec:SVE_FULL_I
5991           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5992            (minus:SVE_FULL_I
5993              (match_operand:SVE_FULL_I 4 "register_operand" "w, w")
5994              (mult:SVE_FULL_I
5995                (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
5996                (match_operand:SVE_FULL_I 3 "register_operand" "w, w")))
5997            (match_dup 2)]
5998           UNSPEC_SEL))]
5999   "TARGET_SVE"
6000   "@
6001    msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6002    movprfx\t%0, %2\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
6003   [(set_attr "movprfx" "*,yes")]
6004 )
6005
6006 ;; Predicated integer subtraction of product, merging with the third input.
6007 (define_insn "*cond_fnma<mode>_4"
6008   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
6009         (unspec:SVE_FULL_I
6010           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6011            (minus:SVE_FULL_I
6012              (match_operand:SVE_FULL_I 4 "register_operand" "0, w")
6013              (mult:SVE_FULL_I
6014                (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
6015                (match_operand:SVE_FULL_I 3 "register_operand" "w, w")))
6016            (match_dup 4)]
6017           UNSPEC_SEL))]
6018   "TARGET_SVE"
6019   "@
6020    mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6021    movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6022   [(set_attr "movprfx" "*,yes")]
6023 )
6024
6025 ;; Predicated integer subtraction of product, merging with an
6026 ;; independent value.
6027 (define_insn_and_rewrite "*cond_fnma<mode>_any"
6028   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
6029         (unspec:SVE_FULL_I
6030           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
6031            (minus:SVE_FULL_I
6032              (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w, w, w, w")
6033              (mult:SVE_FULL_I
6034                (match_operand:SVE_FULL_I 2 "register_operand" "w, w, 0, w, w, w")
6035                (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, 0, w, w")))
6036            (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
6037           UNSPEC_SEL))]
6038   "TARGET_SVE
6039    && !rtx_equal_p (operands[2], operands[5])
6040    && !rtx_equal_p (operands[3], operands[5])
6041    && !rtx_equal_p (operands[4], operands[5])"
6042   "@
6043    movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6044    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6045    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6046    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
6047    movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6048    #"
6049   "&& reload_completed
6050    && register_operand (operands[5], <MODE>mode)
6051    && !rtx_equal_p (operands[0], operands[5])"
6052   {
6053     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
6054                                              operands[5], operands[1]));
6055     operands[5] = operands[4] = operands[0];
6056   }
6057   [(set_attr "movprfx" "yes")]
6058 )
6059
6060 ;; -------------------------------------------------------------------------
6061 ;; ---- [INT] Dot product
6062 ;; -------------------------------------------------------------------------
6063 ;; Includes:
6064 ;; - SDOT
6065 ;; - UDOT
6066 ;; -------------------------------------------------------------------------
6067
6068 ;; Four-element integer dot-product with accumulation.
6069 (define_insn "<sur>dot_prod<vsi2qi>"
6070   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
6071         (plus:SVE_FULL_SDI
6072           (unspec:SVE_FULL_SDI
6073             [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
6074              (match_operand:<VSI2QI> 2 "register_operand" "w, w")]
6075             DOTPROD)
6076           (match_operand:SVE_FULL_SDI 3 "register_operand" "0, w")))]
6077   "TARGET_SVE"
6078   "@
6079    <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>
6080    movprfx\t%0, %3\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>"
6081   [(set_attr "movprfx" "*,yes")]
6082 )
6083
6084 ;; Four-element integer dot-product by selected lanes with accumulation.
6085 (define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>"
6086   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
6087         (plus:SVE_FULL_SDI
6088           (unspec:SVE_FULL_SDI
6089             [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
6090              (unspec:<VSI2QI>
6091                [(match_operand:<VSI2QI> 2 "register_operand" "<sve_lane_con>, <sve_lane_con>")
6092                 (match_operand:SI 3 "const_int_operand")]
6093                UNSPEC_SVE_LANE_SELECT)]
6094             DOTPROD)
6095           (match_operand:SVE_FULL_SDI 4 "register_operand" "0, w")))]
6096   "TARGET_SVE"
6097   "@
6098    <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>[%3]
6099    movprfx\t%0, %4\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>[%3]"
6100   [(set_attr "movprfx" "*,yes")]
6101 )
6102
6103 ;; -------------------------------------------------------------------------
6104 ;; ---- [INT] Sum of absolute differences
6105 ;; -------------------------------------------------------------------------
6106 ;; The patterns in this section are synthetic.
6107 ;; -------------------------------------------------------------------------
6108
6109 ;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in
6110 ;; operands 1 and 2.  The sequence also has to perform a widening reduction of
6111 ;; the difference into a vector and accumulate that into operand 3 before
6112 ;; copying that into the result operand 0.
6113 ;; Perform that with a sequence of:
6114 ;; MOV          ones.b, #1
6115 ;; [SU]ABD      diff.b, p0/m, op1.b, op2.b
6116 ;; MOVPRFX      op0, op3        // If necessary
6117 ;; UDOT         op0.s, diff.b, ones.b
6118 (define_expand "<sur>sad<vsi2qi>"
6119   [(use (match_operand:SVE_FULL_SDI 0 "register_operand"))
6120    (unspec:<VSI2QI> [(use (match_operand:<VSI2QI> 1 "register_operand"))
6121                     (use (match_operand:<VSI2QI> 2 "register_operand"))] ABAL)
6122    (use (match_operand:SVE_FULL_SDI 3 "register_operand"))]
6123   "TARGET_SVE"
6124   {
6125     rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode));
6126     rtx diff = gen_reg_rtx (<VSI2QI>mode);
6127     emit_insn (gen_<sur>abd<vsi2qi>_3 (diff, operands[1], operands[2]));
6128     emit_insn (gen_udot_prod<vsi2qi> (operands[0], diff, ones, operands[3]));
6129     DONE;
6130   }
6131 )
6132
6133 ;; -------------------------------------------------------------------------
6134 ;; ---- [FP] General ternary arithmetic corresponding to unspecs
6135 ;; -------------------------------------------------------------------------
6136 ;; Includes merging patterns for:
6137 ;; - FMAD
6138 ;; - FMLA
6139 ;; - FMLS
6140 ;; - FMSB
6141 ;; - FNMAD
6142 ;; - FNMLA
6143 ;; - FNMLS
6144 ;; - FNMSB
6145 ;; -------------------------------------------------------------------------
6146
6147 ;; Unpredicated floating-point ternary operations.
6148 (define_expand "<optab><mode>4"
6149   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6150         (unspec:SVE_FULL_F
6151           [(match_dup 4)
6152            (const_int SVE_RELAXED_GP)
6153            (match_operand:SVE_FULL_F 1 "register_operand")
6154            (match_operand:SVE_FULL_F 2 "register_operand")
6155            (match_operand:SVE_FULL_F 3 "register_operand")]
6156           SVE_COND_FP_TERNARY))]
6157   "TARGET_SVE"
6158   {
6159     operands[4] = aarch64_ptrue_reg (<VPRED>mode);
6160   }
6161 )
6162
6163 ;; Predicated floating-point ternary operations.
6164 (define_insn "@aarch64_pred_<optab><mode>"
6165   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w")
6166         (unspec:SVE_FULL_F
6167           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
6168            (match_operand:SI 5 "aarch64_sve_gp_strictness")
6169            (match_operand:SVE_FULL_F 2 "register_operand" "%w, 0, w")
6170            (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")
6171            (match_operand:SVE_FULL_F 4 "register_operand" "0, w, w")]
6172           SVE_COND_FP_TERNARY))]
6173   "TARGET_SVE"
6174   "@
6175    <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6176    <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6177    movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6178   [(set_attr "movprfx" "*,*,yes")]
6179 )
6180
6181 ;; Predicated floating-point ternary operations with merging.
6182 (define_expand "@cond_<optab><mode>"
6183   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6184         (unspec:SVE_FULL_F
6185           [(match_operand:<VPRED> 1 "register_operand")
6186            (unspec:SVE_FULL_F
6187              [(match_dup 1)
6188               (const_int SVE_STRICT_GP)
6189               (match_operand:SVE_FULL_F 2 "register_operand")
6190               (match_operand:SVE_FULL_F 3 "register_operand")
6191               (match_operand:SVE_FULL_F 4 "register_operand")]
6192              SVE_COND_FP_TERNARY)
6193            (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
6194           UNSPEC_SEL))]
6195   "TARGET_SVE"
6196 {
6197   /* Swap the multiplication operands if the fallback value is the
6198      second of the two.  */
6199   if (rtx_equal_p (operands[3], operands[5]))
6200     std::swap (operands[2], operands[3]);
6201 })
6202
6203 ;; Predicated floating-point ternary operations, merging with the
6204 ;; first input.
6205 (define_insn_and_rewrite "*cond_<optab><mode>_2"
6206   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6207         (unspec:SVE_FULL_F
6208           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6209            (unspec:SVE_FULL_F
6210              [(match_operand 5)
6211               (match_operand:SI 6 "aarch64_sve_gp_strictness")
6212               (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
6213               (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
6214               (match_operand:SVE_FULL_F 4 "register_operand" "w, w")]
6215              SVE_COND_FP_TERNARY)
6216            (match_dup 2)]
6217           UNSPEC_SEL))]
6218   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
6219   "@
6220    <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6221    movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
6222   "&& !rtx_equal_p (operands[1], operands[5])"
6223   {
6224     operands[5] = copy_rtx (operands[1]);
6225   }
6226   [(set_attr "movprfx" "*,yes")]
6227 )
6228
6229 ;; Predicated floating-point ternary operations, merging with the
6230 ;; third input.
6231 (define_insn_and_rewrite "*cond_<optab><mode>_4"
6232   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6233         (unspec:SVE_FULL_F
6234           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6235            (unspec:SVE_FULL_F
6236              [(match_operand 5)
6237               (match_operand:SI 6 "aarch64_sve_gp_strictness")
6238               (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
6239               (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
6240               (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
6241              SVE_COND_FP_TERNARY)
6242            (match_dup 4)]
6243           UNSPEC_SEL))]
6244   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
6245   "@
6246    <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6247    movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6248   "&& !rtx_equal_p (operands[1], operands[5])"
6249   {
6250     operands[5] = copy_rtx (operands[1]);
6251   }
6252   [(set_attr "movprfx" "*,yes")]
6253 )
6254
6255 ;; Predicated floating-point ternary operations, merging with an
6256 ;; independent value.
6257 (define_insn_and_rewrite "*cond_<optab><mode>_any"
6258   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
6259         (unspec:SVE_FULL_F
6260           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
6261            (unspec:SVE_FULL_F
6262              [(match_operand 6)
6263               (match_operand:SI 7 "aarch64_sve_gp_strictness")
6264               (match_operand:SVE_FULL_F 2 "register_operand" "w, w, 0, w, w, w")
6265               (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, 0, w, w")
6266               (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w, w, w")]
6267              SVE_COND_FP_TERNARY)
6268            (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
6269           UNSPEC_SEL))]
6270   "TARGET_SVE
6271    && !rtx_equal_p (operands[2], operands[5])
6272    && !rtx_equal_p (operands[3], operands[5])
6273    && !rtx_equal_p (operands[4], operands[5])
6274    && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
6275   "@
6276    movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6277    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6278    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6279    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
6280    movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6281    #"
6282   "&& 1"
6283   {
6284     if (reload_completed
6285         && register_operand (operands[5], <MODE>mode)
6286         && !rtx_equal_p (operands[0], operands[5]))
6287       {
6288         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
6289                                                  operands[5], operands[1]));
6290         operands[5] = operands[4] = operands[0];
6291       }
6292     else if (!rtx_equal_p (operands[1], operands[6]))
6293       operands[6] = copy_rtx (operands[1]);
6294     else
6295       FAIL;
6296   }
6297   [(set_attr "movprfx" "yes")]
6298 )
6299
6300 ;; Unpredicated FMLA and FMLS by selected lanes.  It doesn't seem worth using
6301 ;; (fma ...) since target-independent code won't understand the indexing.
6302 (define_insn "@aarch64_<optab>_lane_<mode>"
6303   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6304         (unspec:SVE_FULL_F
6305           [(match_operand:SVE_FULL_F 1 "register_operand" "w, w")
6306            (unspec:SVE_FULL_F
6307              [(match_operand:SVE_FULL_F 2 "register_operand" "<sve_lane_con>, <sve_lane_con>")
6308               (match_operand:SI 3 "const_int_operand")]
6309              UNSPEC_SVE_LANE_SELECT)
6310            (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
6311           SVE_FP_TERNARY_LANE))]
6312   "TARGET_SVE"
6313   "@
6314    <sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]
6315    movprfx\t%0, %4\;<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
6316   [(set_attr "movprfx" "*,yes")]
6317 )
6318
6319 ;; -------------------------------------------------------------------------
6320 ;; ---- [FP] Complex multiply-add
6321 ;; -------------------------------------------------------------------------
6322 ;; Includes merging patterns for:
6323 ;; - FCMLA
6324 ;; -------------------------------------------------------------------------
6325
6326 ;; Predicated FCMLA.
6327 (define_insn "@aarch64_pred_<optab><mode>"
6328   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6329         (unspec:SVE_FULL_F
6330           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6331            (match_operand:SI 5 "aarch64_sve_gp_strictness")
6332            (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
6333            (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
6334            (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
6335           SVE_COND_FCMLA))]
6336   "TARGET_SVE"
6337   "@
6338    fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
6339    movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>"
6340   [(set_attr "movprfx" "*,yes")]
6341 )
6342
6343 ;; Predicated FCMLA with merging.
6344 (define_expand "@cond_<optab><mode>"
6345   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6346         (unspec:SVE_FULL_F
6347           [(match_operand:<VPRED> 1 "register_operand")
6348            (unspec:SVE_FULL_F
6349              [(match_dup 1)
6350               (const_int SVE_STRICT_GP)
6351               (match_operand:SVE_FULL_F 2 "register_operand")
6352               (match_operand:SVE_FULL_F 3 "register_operand")
6353               (match_operand:SVE_FULL_F 4 "register_operand")]
6354              SVE_COND_FCMLA)
6355            (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
6356           UNSPEC_SEL))]
6357   "TARGET_SVE"
6358 )
6359
6360 ;; Predicated FCMLA, merging with the third input.
6361 (define_insn_and_rewrite "*cond_<optab><mode>_4"
6362   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6363         (unspec:SVE_FULL_F
6364           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6365            (unspec:SVE_FULL_F
6366              [(match_operand 5)
6367               (match_operand:SI 6 "aarch64_sve_gp_strictness")
6368               (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
6369               (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
6370               (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
6371              SVE_COND_FCMLA)
6372            (match_dup 4)]
6373           UNSPEC_SEL))]
6374   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
6375   "@
6376    fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
6377    movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>"
6378   "&& !rtx_equal_p (operands[1], operands[5])"
6379   {
6380     operands[5] = copy_rtx (operands[1]);
6381   }
6382   [(set_attr "movprfx" "*,yes")]
6383 )
6384
6385 ;; Predicated FCMLA, merging with an independent value.
6386 (define_insn_and_rewrite "*cond_<optab><mode>_any"
6387   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
6388         (unspec:SVE_FULL_F
6389           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
6390            (unspec:SVE_FULL_F
6391              [(match_operand 6)
6392               (match_operand:SI 7 "aarch64_sve_gp_strictness")
6393               (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w")
6394               (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")
6395               (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w")]
6396              SVE_COND_FCMLA)
6397            (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
6398           UNSPEC_SEL))]
6399   "TARGET_SVE
6400    && !rtx_equal_p (operands[4], operands[5])
6401    && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
6402   "@
6403    movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
6404    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
6405    movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
6406    #"
6407   "&& 1"
6408   {
6409     if (reload_completed
6410         && register_operand (operands[5], <MODE>mode)
6411         && !rtx_equal_p (operands[0], operands[5]))
6412       {
6413         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
6414                                                  operands[5], operands[1]));
6415         operands[5] = operands[4] = operands[0];
6416       }
6417     else if (!rtx_equal_p (operands[1], operands[6]))
6418       operands[6] = copy_rtx (operands[1]);
6419     else
6420       FAIL;
6421   }
6422   [(set_attr "movprfx" "yes")]
6423 )
6424
6425 ;; Unpredicated FCMLA with indexing.
6426 (define_insn "@aarch64_<optab>_lane_<mode>"
6427   [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w")
6428         (unspec:SVE_FULL_HSF
6429           [(match_operand:SVE_FULL_HSF 1 "register_operand" "w, w")
6430            (unspec:SVE_FULL_HSF
6431              [(match_operand:SVE_FULL_HSF 2 "register_operand" "<sve_lane_pair_con>, <sve_lane_pair_con>")
6432               (match_operand:SI 3 "const_int_operand")]
6433              UNSPEC_SVE_LANE_SELECT)
6434            (match_operand:SVE_FULL_HSF 4 "register_operand" "0, w")]
6435           FCMLA))]
6436   "TARGET_SVE"
6437   "@
6438    fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot>
6439    movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot>"
6440   [(set_attr "movprfx" "*,yes")]
6441 )
6442
6443 ;; -------------------------------------------------------------------------
6444 ;; ---- [FP] Trigonometric multiply-add
6445 ;; -------------------------------------------------------------------------
6446 ;; Includes:
6447 ;; - FTMAD
6448 ;; -------------------------------------------------------------------------
6449
6450 (define_insn "@aarch64_sve_tmad<mode>"
6451   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6452         (unspec:SVE_FULL_F
6453           [(match_operand:SVE_FULL_F 1 "register_operand" "0, w")
6454            (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
6455            (match_operand:DI 3 "const_int_operand")]
6456           UNSPEC_FTMAD))]
6457   "TARGET_SVE"
6458   "@
6459    ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3
6460    movprfx\t%0, %1\;ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3"
6461   [(set_attr "movprfx" "*,yes")]
6462 )
6463
6464 ;; =========================================================================
6465 ;; == Comparisons and selects
6466 ;; =========================================================================
6467
6468 ;; -------------------------------------------------------------------------
6469 ;; ---- [INT,FP] Select based on predicates
6470 ;; -------------------------------------------------------------------------
6471 ;; Includes merging patterns for:
6472 ;; - FMOV
6473 ;; - MOV
6474 ;; - SEL
6475 ;; -------------------------------------------------------------------------
6476
6477 ;; vcond_mask operand order: true, false, mask
6478 ;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
6479 ;; SEL operand order:        mask, true, false
6480 (define_expand "@vcond_mask_<mode><vpred>"
6481   [(set (match_operand:SVE_FULL 0 "register_operand")
6482         (unspec:SVE_FULL
6483           [(match_operand:<VPRED> 3 "register_operand")
6484            (match_operand:SVE_FULL 1 "aarch64_sve_reg_or_dup_imm")
6485            (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero")]
6486           UNSPEC_SEL))]
6487   "TARGET_SVE"
6488   {
6489     if (register_operand (operands[1], <MODE>mode))
6490       operands[2] = force_reg (<MODE>mode, operands[2]);
6491   }
6492 )
6493
6494 ;; Selects between:
6495 ;; - two registers
6496 ;; - a duplicated immediate and a register
6497 ;; - a duplicated immediate and zero
6498 (define_insn "*vcond_mask_<mode><vpred>"
6499   [(set (match_operand:SVE_FULL 0 "register_operand" "=w, w, w, w, ?w, ?&w, ?&w")
6500         (unspec:SVE_FULL
6501           [(match_operand:<VPRED> 3 "register_operand" "Upa, Upa, Upa, Upa, Upl, Upl, Upl")
6502            (match_operand:SVE_FULL 1 "aarch64_sve_reg_or_dup_imm" "w, vss, vss, Ufc, Ufc, vss, Ufc")
6503            (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero" "w, 0, Dz, 0, Dz, w, w")]
6504           UNSPEC_SEL))]
6505   "TARGET_SVE
6506    && (!register_operand (operands[1], <MODE>mode)
6507        || register_operand (operands[2], <MODE>mode))"
6508   "@
6509    sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>
6510    mov\t%0.<Vetype>, %3/m, #%I1
6511    mov\t%0.<Vetype>, %3/z, #%I1
6512    fmov\t%0.<Vetype>, %3/m, #%1
6513    movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;fmov\t%0.<Vetype>, %3/m, #%1
6514    movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, #%I1
6515    movprfx\t%0, %2\;fmov\t%0.<Vetype>, %3/m, #%1"
6516   [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")]
6517 )
6518
6519 ;; Optimize selects between a duplicated scalar variable and another vector,
6520 ;; the latter of which can be a zero constant or a variable.  Treat duplicates
6521 ;; of GPRs as being more expensive than duplicates of FPRs, since they
6522 ;; involve a cross-file move.
6523 (define_insn "@aarch64_sel_dup<mode>"
6524   [(set (match_operand:SVE_FULL 0 "register_operand" "=?w, w, ??w, ?&w, ??&w, ?&w")
6525         (unspec:SVE_FULL
6526           [(match_operand:<VPRED> 3 "register_operand" "Upa, Upa, Upl, Upl, Upl, Upl")
6527            (vec_duplicate:SVE_FULL
6528              (match_operand:<VEL> 1 "register_operand" "r, w, r, w, r, w"))
6529            (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero" "0, 0, Dz, Dz, w, w")]
6530           UNSPEC_SEL))]
6531   "TARGET_SVE"
6532   "@
6533    mov\t%0.<Vetype>, %3/m, %<vwcore>1
6534    mov\t%0.<Vetype>, %3/m, %<Vetype>1
6535    movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<vwcore>1
6536    movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<Vetype>1
6537    movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<vwcore>1
6538    movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<Vetype>1"
6539   [(set_attr "movprfx" "*,*,yes,yes,yes,yes")]
6540 )
6541
6542 ;; -------------------------------------------------------------------------
6543 ;; ---- [INT,FP] Compare and select
6544 ;; -------------------------------------------------------------------------
6545 ;; The patterns in this section are synthetic.
6546 ;; -------------------------------------------------------------------------
6547
6548 ;; Integer (signed) vcond.  Don't enforce an immediate range here, since it
6549 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
6550 (define_expand "vcond<mode><v_int_equiv>"
6551   [(set (match_operand:SVE_FULL 0 "register_operand")
6552         (if_then_else:SVE_FULL
6553           (match_operator 3 "comparison_operator"
6554             [(match_operand:<V_INT_EQUIV> 4 "register_operand")
6555              (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
6556           (match_operand:SVE_FULL 1 "nonmemory_operand")
6557           (match_operand:SVE_FULL 2 "nonmemory_operand")))]
6558   "TARGET_SVE"
6559   {
6560     aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
6561     DONE;
6562   }
6563 )
6564
6565 ;; Integer vcondu.  Don't enforce an immediate range here, since it
6566 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
6567 (define_expand "vcondu<mode><v_int_equiv>"
6568   [(set (match_operand:SVE_FULL 0 "register_operand")
6569         (if_then_else:SVE_FULL
6570           (match_operator 3 "comparison_operator"
6571             [(match_operand:<V_INT_EQUIV> 4 "register_operand")
6572              (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
6573           (match_operand:SVE_FULL 1 "nonmemory_operand")
6574           (match_operand:SVE_FULL 2 "nonmemory_operand")))]
6575   "TARGET_SVE"
6576   {
6577     aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
6578     DONE;
6579   }
6580 )
6581
6582 ;; Floating-point vcond.  All comparisons except FCMUO allow a zero operand;
6583 ;; aarch64_expand_sve_vcond handles the case of an FCMUO with zero.
6584 (define_expand "vcond<mode><v_fp_equiv>"
6585   [(set (match_operand:SVE_FULL_HSD 0 "register_operand")
6586         (if_then_else:SVE_FULL_HSD
6587           (match_operator 3 "comparison_operator"
6588             [(match_operand:<V_FP_EQUIV> 4 "register_operand")
6589              (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
6590           (match_operand:SVE_FULL_HSD 1 "nonmemory_operand")
6591           (match_operand:SVE_FULL_HSD 2 "nonmemory_operand")))]
6592   "TARGET_SVE"
6593   {
6594     aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
6595     DONE;
6596   }
6597 )
6598
6599 ;; -------------------------------------------------------------------------
6600 ;; ---- [INT] Comparisons
6601 ;; -------------------------------------------------------------------------
6602 ;; Includes:
6603 ;; - CMPEQ
6604 ;; - CMPGE
6605 ;; - CMPGT
6606 ;; - CMPHI
6607 ;; - CMPHS
6608 ;; - CMPLE
6609 ;; - CMPLO
6610 ;; - CMPLS
6611 ;; - CMPLT
6612 ;; - CMPNE
6613 ;; -------------------------------------------------------------------------
6614
6615 ;; Signed integer comparisons.  Don't enforce an immediate range here, since
6616 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
6617 ;; instead.
6618 (define_expand "vec_cmp<mode><vpred>"
6619   [(parallel
6620     [(set (match_operand:<VPRED> 0 "register_operand")
6621           (match_operator:<VPRED> 1 "comparison_operator"
6622             [(match_operand:SVE_FULL_I 2 "register_operand")
6623              (match_operand:SVE_FULL_I 3 "nonmemory_operand")]))
6624      (clobber (reg:CC_NZC CC_REGNUM))])]
6625   "TARGET_SVE"
6626   {
6627     aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
6628                                     operands[2], operands[3]);
6629     DONE;
6630   }
6631 )
6632
6633 ;; Unsigned integer comparisons.  Don't enforce an immediate range here, since
6634 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
6635 ;; instead.
6636 (define_expand "vec_cmpu<mode><vpred>"
6637   [(parallel
6638     [(set (match_operand:<VPRED> 0 "register_operand")
6639           (match_operator:<VPRED> 1 "comparison_operator"
6640             [(match_operand:SVE_FULL_I 2 "register_operand")
6641              (match_operand:SVE_FULL_I 3 "nonmemory_operand")]))
6642      (clobber (reg:CC_NZC CC_REGNUM))])]
6643   "TARGET_SVE"
6644   {
6645     aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
6646                                     operands[2], operands[3]);
6647     DONE;
6648   }
6649 )
6650
6651 ;; Predicated integer comparisons.
6652 (define_insn "@aarch64_pred_cmp<cmp_op><mode>"
6653   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
6654         (unspec:<VPRED>
6655           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6656            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
6657            (SVE_INT_CMP:<VPRED>
6658              (match_operand:SVE_FULL_I 3 "register_operand" "w, w")
6659              (match_operand:SVE_FULL_I 4 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
6660           UNSPEC_PRED_Z))
6661    (clobber (reg:CC_NZC CC_REGNUM))]
6662   "TARGET_SVE"
6663   "@
6664    cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #%4
6665    cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
6666 )
6667
6668 ;; Predicated integer comparisons in which both the flag and predicate
6669 ;; results are interesting.
6670 (define_insn_and_rewrite "*cmp<cmp_op><mode>_cc"
6671   [(set (reg:CC_NZC CC_REGNUM)
6672         (unspec:CC_NZC
6673           [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl")
6674            (match_operand 4)
6675            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6676            (unspec:<VPRED>
6677              [(match_operand 6)
6678               (match_operand:SI 7 "aarch64_sve_ptrue_flag")
6679               (SVE_INT_CMP:<VPRED>
6680                 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
6681                 (match_operand:SVE_FULL_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
6682              UNSPEC_PRED_Z)]
6683           UNSPEC_PTEST))
6684    (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
6685         (unspec:<VPRED>
6686           [(match_dup 6)
6687            (match_dup 7)
6688            (SVE_INT_CMP:<VPRED>
6689              (match_dup 2)
6690              (match_dup 3))]
6691           UNSPEC_PRED_Z))]
6692   "TARGET_SVE
6693    && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
6694   "@
6695    cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
6696    cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
6697   "&& !rtx_equal_p (operands[4], operands[6])"
6698   {
6699     operands[6] = copy_rtx (operands[4]);
6700     operands[7] = operands[5];
6701   }
6702 )
6703
6704 ;; Predicated integer comparisons in which only the flags result is
6705 ;; interesting.
6706 (define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest"
6707   [(set (reg:CC_NZC CC_REGNUM)
6708         (unspec:CC_NZC
6709           [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl")
6710            (match_operand 4)
6711            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6712            (unspec:<VPRED>
6713              [(match_operand 6)
6714               (match_operand:SI 7 "aarch64_sve_ptrue_flag")
6715               (SVE_INT_CMP:<VPRED>
6716                 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
6717                 (match_operand:SVE_FULL_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
6718              UNSPEC_PRED_Z)]
6719           UNSPEC_PTEST))
6720    (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))]
6721   "TARGET_SVE
6722    && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
6723   "@
6724    cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
6725    cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
6726   "&& !rtx_equal_p (operands[4], operands[6])"
6727   {
6728     operands[6] = copy_rtx (operands[4]);
6729     operands[7] = operands[5];
6730   }
6731 )
6732
6733 ;; Predicated integer comparisons, formed by combining a PTRUE-predicated
6734 ;; comparison with an AND.  Split the instruction into its preferred form
6735 ;; at the earliest opportunity, in order to get rid of the redundant
6736 ;; operand 4.
6737 (define_insn_and_split "*cmp<cmp_op><mode>_and"
6738   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
6739         (and:<VPRED>
6740           (unspec:<VPRED>
6741             [(match_operand 4)
6742              (const_int SVE_KNOWN_PTRUE)
6743              (SVE_INT_CMP:<VPRED>
6744                (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
6745                (match_operand:SVE_FULL_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
6746             UNSPEC_PRED_Z)
6747           (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))
6748    (clobber (reg:CC_NZC CC_REGNUM))]
6749   "TARGET_SVE"
6750   "#"
6751   "&& 1"
6752   [(parallel
6753      [(set (match_dup 0)
6754            (unspec:<VPRED>
6755              [(match_dup 1)
6756               (const_int SVE_MAYBE_NOT_PTRUE)
6757               (SVE_INT_CMP:<VPRED>
6758                 (match_dup 2)
6759                 (match_dup 3))]
6760              UNSPEC_PRED_Z))
6761       (clobber (reg:CC_NZC CC_REGNUM))])]
6762 )
6763
6764 ;; Predicated integer wide comparisons.
6765 (define_insn "@aarch64_pred_cmp<cmp_op><mode>_wide"
6766   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
6767         (unspec:<VPRED>
6768           [(match_operand:VNx16BI 1 "register_operand" "Upl")
6769            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
6770            (unspec:<VPRED>
6771              [(match_operand:SVE_FULL_BHSI 3 "register_operand" "w")
6772               (match_operand:VNx2DI 4 "register_operand" "w")]
6773              SVE_COND_INT_CMP_WIDE)]
6774           UNSPEC_PRED_Z))
6775    (clobber (reg:CC_NZC CC_REGNUM))]
6776   "TARGET_SVE"
6777   "cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.d"
6778 )
6779
6780 ;; Predicated integer wide comparisons in which both the flag and
6781 ;; predicate results are interesting.
6782 (define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_cc"
6783   [(set (reg:CC_NZC CC_REGNUM)
6784         (unspec:CC_NZC
6785           [(match_operand:VNx16BI 1 "register_operand" "Upl")
6786            (match_operand 4)
6787            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6788            (unspec:<VPRED>
6789              [(match_operand:VNx16BI 6 "register_operand" "Upl")
6790               (match_operand:SI 7 "aarch64_sve_ptrue_flag")
6791               (unspec:<VPRED>
6792                 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "w")
6793                  (match_operand:VNx2DI 3 "register_operand" "w")]
6794                 SVE_COND_INT_CMP_WIDE)]
6795              UNSPEC_PRED_Z)]
6796           UNSPEC_PTEST))
6797    (set (match_operand:<VPRED> 0 "register_operand" "=Upa")
6798         (unspec:<VPRED>
6799           [(match_dup 6)
6800            (match_dup 7)
6801            (unspec:<VPRED>
6802              [(match_dup 2)
6803               (match_dup 3)]
6804              SVE_COND_INT_CMP_WIDE)]
6805           UNSPEC_PRED_Z))]
6806   "TARGET_SVE
6807    && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
6808   "cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d"
6809 )
6810
6811 ;; Predicated integer wide comparisons in which only the flags result
6812 ;; is interesting.
6813 (define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_ptest"
6814   [(set (reg:CC_NZC CC_REGNUM)
6815         (unspec:CC_NZC
6816           [(match_operand:VNx16BI 1 "register_operand" "Upl")
6817            (match_operand 4)
6818            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6819            (unspec:<VPRED>
6820              [(match_operand:VNx16BI 6 "register_operand" "Upl")
6821               (match_operand:SI 7 "aarch64_sve_ptrue_flag")
6822               (unspec:<VPRED>
6823                 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "w")
6824                  (match_operand:VNx2DI 3 "register_operand" "w")]
6825                 SVE_COND_INT_CMP_WIDE)]
6826              UNSPEC_PRED_Z)]
6827           UNSPEC_PTEST))
6828    (clobber (match_scratch:<VPRED> 0 "=Upa"))]
6829   "TARGET_SVE
6830    && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
6831   "cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d"
6832 )
6833
6834 ;; -------------------------------------------------------------------------
6835 ;; ---- [INT] While tests
6836 ;; -------------------------------------------------------------------------
6837 ;; Includes:
6838 ;; - WHILELE
6839 ;; - WHILELO
6840 ;; - WHILELS
6841 ;; - WHILELT
6842 ;; -------------------------------------------------------------------------
6843
6844 ;; Set element I of the result if (cmp (plus operand1 J) operand2) is
6845 ;; true for all J in [0, I].
6846 (define_insn "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>"
6847   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
6848         (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
6849                           (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
6850                          SVE_WHILE))
6851    (clobber (reg:CC_NZC CC_REGNUM))]
6852   "TARGET_SVE"
6853   "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
6854 )
6855
6856 ;; The WHILE instructions set the flags in the same way as a PTEST with
6857 ;; a PTRUE GP.  Handle the case in which both results are useful.  The GP
6858 ;; operands to the PTEST aren't needed, so we allow them to be anything.
6859 (define_insn_and_rewrite "*while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_cc"
6860   [(set (reg:CC_NZC CC_REGNUM)
6861         (unspec:CC_NZC
6862           [(match_operand 3)
6863            (match_operand 4)
6864            (const_int SVE_KNOWN_PTRUE)
6865            (unspec:PRED_ALL
6866              [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
6867               (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
6868              SVE_WHILE)]
6869           UNSPEC_PTEST))
6870    (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
6871         (unspec:PRED_ALL [(match_dup 1)
6872                           (match_dup 2)]
6873                          SVE_WHILE))]
6874   "TARGET_SVE"
6875   "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
6876   ;; Force the compiler to drop the unused predicate operand, so that we
6877   ;; don't have an unnecessary PTRUE.
6878   "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
6879   {
6880     operands[3] = CONSTM1_RTX (VNx16BImode);
6881     operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
6882   }
6883 )
6884
6885 ;; Same, but handle the case in which only the flags result is useful.
6886 (define_insn_and_rewrite "*while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_ptest"
6887   [(set (reg:CC_NZC CC_REGNUM)
6888         (unspec:CC_NZC
6889           [(match_operand 3)
6890            (match_operand 4)
6891            (const_int SVE_KNOWN_PTRUE)
6892            (unspec:PRED_ALL
6893              [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
6894               (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
6895              SVE_WHILE)]
6896           UNSPEC_PTEST))
6897    (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
6898   "TARGET_SVE"
6899   "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
6900   ;; Force the compiler to drop the unused predicate operand, so that we
6901   ;; don't have an unnecessary PTRUE.
6902   "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
6903   {
6904     operands[3] = CONSTM1_RTX (VNx16BImode);
6905     operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
6906   }
6907 )
6908
6909 ;; -------------------------------------------------------------------------
6910 ;; ---- [FP] Direct comparisons
6911 ;; -------------------------------------------------------------------------
6912 ;; Includes:
6913 ;; - FCMEQ
6914 ;; - FCMGE
6915 ;; - FCMGT
6916 ;; - FCMLE
6917 ;; - FCMLT
6918 ;; - FCMNE
6919 ;; - FCMUO
6920 ;; -------------------------------------------------------------------------
6921
6922 ;; Floating-point comparisons.  All comparisons except FCMUO allow a zero
6923 ;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO
6924 ;; with zero.
6925 (define_expand "vec_cmp<mode><vpred>"
6926   [(set (match_operand:<VPRED> 0 "register_operand")
6927         (match_operator:<VPRED> 1 "comparison_operator"
6928           [(match_operand:SVE_FULL_F 2 "register_operand")
6929            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]))]
6930   "TARGET_SVE"
6931   {
6932     aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
6933                                       operands[2], operands[3], false);
6934     DONE;
6935   }
6936 )
6937
6938 ;; Predicated floating-point comparisons.
6939 (define_insn "@aarch64_pred_fcm<cmp_op><mode>"
6940   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
6941         (unspec:<VPRED>
6942           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6943            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
6944            (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
6945            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, w")]
6946           SVE_COND_FP_CMP_I0))]
6947   "TARGET_SVE"
6948   "@
6949    fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #0.0
6950    fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
6951 )
6952
6953 ;; Same for unordered comparisons.
6954 (define_insn "@aarch64_pred_fcmuo<mode>"
6955   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
6956         (unspec:<VPRED>
6957           [(match_operand:<VPRED> 1 "register_operand" "Upl")
6958            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
6959            (match_operand:SVE_FULL_F 3 "register_operand" "w")
6960            (match_operand:SVE_FULL_F 4 "register_operand" "w")]
6961           UNSPEC_COND_FCMUO))]
6962   "TARGET_SVE"
6963   "fcmuo\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
6964 )
6965
6966 ;; Floating-point comparisons predicated on a PTRUE, with the results ANDed
6967 ;; with another predicate P.  This does not have the same trapping behavior
6968 ;; as predicating the comparison itself on P, but it's a legitimate fold,
6969 ;; since we can drop any potentially-trapping operations whose results
6970 ;; are not needed.
6971 ;;
6972 ;; Split the instruction into its preferred form (below) at the earliest
6973 ;; opportunity, in order to get rid of the redundant operand 1.
6974 (define_insn_and_split "*fcm<cmp_op><mode>_and_combine"
6975   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
6976         (and:<VPRED>
6977           (unspec:<VPRED>
6978             [(match_operand:<VPRED> 1)
6979              (const_int SVE_KNOWN_PTRUE)
6980              (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
6981              (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
6982             SVE_COND_FP_CMP_I0)
6983           (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
6984   "TARGET_SVE"
6985   "#"
6986   "&& 1"
6987   [(set (match_dup 0)
6988         (unspec:<VPRED>
6989           [(match_dup 4)
6990            (const_int SVE_MAYBE_NOT_PTRUE)
6991            (match_dup 2)
6992            (match_dup 3)]
6993           SVE_COND_FP_CMP_I0))]
6994 )
6995
6996 ;; Same for unordered comparisons.
6997 (define_insn_and_split "*fcmuo<mode>_and_combine"
6998   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
6999         (and:<VPRED>
7000           (unspec:<VPRED>
7001             [(match_operand:<VPRED> 1)
7002              (const_int SVE_KNOWN_PTRUE)
7003              (match_operand:SVE_FULL_F 2 "register_operand" "w")
7004              (match_operand:SVE_FULL_F 3 "register_operand" "w")]
7005             UNSPEC_COND_FCMUO)
7006           (match_operand:<VPRED> 4 "register_operand" "Upl")))]
7007   "TARGET_SVE"
7008   "#"
7009   "&& 1"
7010   [(set (match_dup 0)
7011         (unspec:<VPRED>
7012           [(match_dup 4)
7013            (const_int SVE_MAYBE_NOT_PTRUE)
7014            (match_dup 2)
7015            (match_dup 3)]
7016           UNSPEC_COND_FCMUO))]
7017 )
7018
7019 ;; -------------------------------------------------------------------------
7020 ;; ---- [FP] Absolute comparisons
7021 ;; -------------------------------------------------------------------------
7022 ;; Includes:
7023 ;; - FACGE
7024 ;; - FACGT
7025 ;; - FACLE
7026 ;; - FACLT
7027 ;; -------------------------------------------------------------------------
7028
7029 ;; Predicated floating-point absolute comparisons.
7030 (define_expand "@aarch64_pred_fac<cmp_op><mode>"
7031   [(set (match_operand:<VPRED> 0 "register_operand")
7032         (unspec:<VPRED>
7033           [(match_operand:<VPRED> 1 "register_operand")
7034            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
7035            (unspec:SVE_FULL_F
7036              [(match_dup 1)
7037               (match_dup 2)
7038               (match_operand:SVE_FULL_F 3 "register_operand")]
7039              UNSPEC_COND_FABS)
7040            (unspec:SVE_FULL_F
7041              [(match_dup 1)
7042               (match_dup 2)
7043               (match_operand:SVE_FULL_F 4 "register_operand")]
7044              UNSPEC_COND_FABS)]
7045           SVE_COND_FP_ABS_CMP))]
7046   "TARGET_SVE"
7047 )
7048
7049 (define_insn_and_rewrite "*aarch64_pred_fac<cmp_op><mode>"
7050   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
7051         (unspec:<VPRED>
7052           [(match_operand:<VPRED> 1 "register_operand" "Upl")
7053            (match_operand:SI 4 "aarch64_sve_ptrue_flag")
7054            (unspec:SVE_FULL_F
7055              [(match_operand 5)
7056               (match_operand:SI 6 "aarch64_sve_gp_strictness")
7057               (match_operand:SVE_FULL_F 2 "register_operand" "w")]
7058              UNSPEC_COND_FABS)
7059            (unspec:SVE_FULL_F
7060              [(match_operand 7)
7061               (match_operand:SI 8 "aarch64_sve_gp_strictness")
7062               (match_operand:SVE_FULL_F 3 "register_operand" "w")]
7063              UNSPEC_COND_FABS)]
7064           SVE_COND_FP_ABS_CMP))]
7065   "TARGET_SVE
7066    && aarch64_sve_pred_dominates_p (&operands[5], operands[1])
7067    && aarch64_sve_pred_dominates_p (&operands[7], operands[1])"
7068   "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
7069   "&& (!rtx_equal_p (operands[1], operands[5])
7070        || !rtx_equal_p (operands[1], operands[7]))"
7071   {
7072     operands[5] = copy_rtx (operands[1]);
7073     operands[7] = copy_rtx (operands[1]);
7074   }
7075 )
7076
7077 ;; -------------------------------------------------------------------------
7078 ;; ---- [PRED] Select
7079 ;; -------------------------------------------------------------------------
7080 ;; Includes:
7081 ;; - SEL
7082 ;; -------------------------------------------------------------------------
7083
7084 (define_insn "@vcond_mask_<mode><mode>"
7085   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
7086         (ior:PRED_ALL
7087           (and:PRED_ALL
7088             (match_operand:PRED_ALL 3 "register_operand" "Upa")
7089             (match_operand:PRED_ALL 1 "register_operand" "Upa"))
7090           (and:PRED_ALL
7091             (not (match_dup 3))
7092             (match_operand:PRED_ALL 2 "register_operand" "Upa"))))]
7093   "TARGET_SVE"
7094   "sel\t%0.b, %3, %1.b, %2.b"
7095 )
7096
7097 ;; -------------------------------------------------------------------------
7098 ;; ---- [PRED] Test bits
7099 ;; -------------------------------------------------------------------------
7100 ;; Includes:
7101 ;; - PTEST
7102 ;; -------------------------------------------------------------------------
7103
7104 ;; Branch based on predicate equality or inequality.
7105 (define_expand "cbranch<mode>4"
7106   [(set (pc)
7107         (if_then_else
7108           (match_operator 0 "aarch64_equality_operator"
7109             [(match_operand:PRED_ALL 1 "register_operand")
7110              (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")])
7111           (label_ref (match_operand 3 ""))
7112           (pc)))]
7113   ""
7114   {
7115     rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all (<data_bytes>));
7116     rtx cast_ptrue = gen_lowpart (<MODE>mode, ptrue);
7117     rtx ptrue_flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode);
7118     rtx pred;
7119     if (operands[2] == CONST0_RTX (<MODE>mode))
7120       pred = operands[1];
7121     else
7122       {
7123         pred = gen_reg_rtx (<MODE>mode);
7124         emit_insn (gen_aarch64_pred_xor<mode>_z (pred, cast_ptrue, operands[1],
7125                                                  operands[2]));
7126       }
7127     emit_insn (gen_aarch64_ptest<mode> (ptrue, cast_ptrue, ptrue_flag, pred));
7128     operands[1] = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
7129     operands[2] = const0_rtx;
7130   }
7131 )
7132
7133 ;; See "Description of UNSPEC_PTEST" above for details.
7134 (define_insn "aarch64_ptest<mode>"
7135   [(set (reg:CC_NZC CC_REGNUM)
7136         (unspec:CC_NZC [(match_operand:VNx16BI 0 "register_operand" "Upa")
7137                         (match_operand 1)
7138                         (match_operand:SI 2 "aarch64_sve_ptrue_flag")
7139                         (match_operand:PRED_ALL 3 "register_operand" "Upa")]
7140                        UNSPEC_PTEST))]
7141   "TARGET_SVE"
7142   "ptest\t%0, %3.b"
7143 )
7144
7145 ;; =========================================================================
7146 ;; == Reductions
7147 ;; =========================================================================
7148
7149 ;; -------------------------------------------------------------------------
7150 ;; ---- [INT,FP] Conditional reductions
7151 ;; -------------------------------------------------------------------------
7152 ;; Includes:
7153 ;; - CLASTA
7154 ;; - CLASTB
7155 ;; -------------------------------------------------------------------------
7156
7157 ;; Set operand 0 to the last active element in operand 3, or to tied
7158 ;; operand 1 if no elements are active.
7159 (define_insn "@fold_extract_<last_op>_<mode>"
7160   [(set (match_operand:<VEL> 0 "register_operand" "=?r, w")
7161         (unspec:<VEL>
7162           [(match_operand:<VEL> 1 "register_operand" "0, 0")
7163            (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
7164            (match_operand:SVE_FULL 3 "register_operand" "w, w")]
7165           CLAST))]
7166   "TARGET_SVE"
7167   "@
7168    clast<ab>\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype>
7169    clast<ab>\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>"
7170 )
7171
7172 (define_insn "@aarch64_fold_extract_vector_<last_op>_<mode>"
7173   [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w")
7174         (unspec:SVE_FULL
7175           [(match_operand:SVE_FULL 1 "register_operand" "0, w")
7176            (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
7177            (match_operand:SVE_FULL 3 "register_operand" "w, w")]
7178           CLAST))]
7179   "TARGET_SVE"
7180   "@
7181    clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>
7182    movprfx\t%0, %1\;clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>"
7183 )
7184
7185 ;; -------------------------------------------------------------------------
7186 ;; ---- [INT] Tree reductions
7187 ;; -------------------------------------------------------------------------
7188 ;; Includes:
7189 ;; - ANDV
7190 ;; - EORV
7191 ;; - ORV
7192 ;; - SADDV
7193 ;; - SMAXV
7194 ;; - SMINV
7195 ;; - UADDV
7196 ;; - UMAXV
7197 ;; - UMINV
7198 ;; -------------------------------------------------------------------------
7199
7200 ;; Unpredicated integer add reduction.
7201 (define_expand "reduc_plus_scal_<mode>"
7202   [(match_operand:<VEL> 0 "register_operand")
7203    (match_operand:SVE_FULL_I 1 "register_operand")]
7204   "TARGET_SVE"
7205   {
7206     rtx pred = aarch64_ptrue_reg (<VPRED>mode);
7207     rtx tmp = <VEL>mode == DImode ? operands[0] : gen_reg_rtx (DImode);
7208     emit_insn (gen_aarch64_pred_reduc_uadd_<mode> (tmp, pred, operands[1]));
7209     if (tmp != operands[0])
7210       emit_move_insn (operands[0], gen_lowpart (<VEL>mode, tmp));
7211     DONE;
7212   }
7213 )
7214
7215 ;; Predicated integer add reduction.  The result is always 64-bits.
7216 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
7217   [(set (match_operand:DI 0 "register_operand" "=w")
7218         (unspec:DI [(match_operand:<VPRED> 1 "register_operand" "Upl")
7219                     (match_operand:SVE_FULL_I 2 "register_operand" "w")]
7220                    SVE_INT_ADDV))]
7221   "TARGET_SVE && <max_elem_bits> >= <elem_bits>"
7222   "<su>addv\t%d0, %1, %2.<Vetype>"
7223 )
7224
7225 ;; Unpredicated integer reductions.
7226 (define_expand "reduc_<optab>_scal_<mode>"
7227   [(set (match_operand:<VEL> 0 "register_operand")
7228         (unspec:<VEL> [(match_dup 2)
7229                        (match_operand:SVE_FULL_I 1 "register_operand")]
7230                       SVE_INT_REDUCTION))]
7231   "TARGET_SVE"
7232   {
7233     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
7234   }
7235 )
7236
7237 ;; Predicated integer reductions.
7238 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
7239   [(set (match_operand:<VEL> 0 "register_operand" "=w")
7240         (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
7241                        (match_operand:SVE_FULL_I 2 "register_operand" "w")]
7242                       SVE_INT_REDUCTION))]
7243   "TARGET_SVE"
7244   "<sve_int_op>\t%<Vetype>0, %1, %2.<Vetype>"
7245 )
7246
7247 ;; -------------------------------------------------------------------------
7248 ;; ---- [FP] Tree reductions
7249 ;; -------------------------------------------------------------------------
7250 ;; Includes:
7251 ;; - FADDV
7252 ;; - FMAXNMV
7253 ;; - FMAXV
7254 ;; - FMINNMV
7255 ;; - FMINV
7256 ;; -------------------------------------------------------------------------
7257
7258 ;; Unpredicated floating-point tree reductions.
7259 (define_expand "reduc_<optab>_scal_<mode>"
7260   [(set (match_operand:<VEL> 0 "register_operand")
7261         (unspec:<VEL> [(match_dup 2)
7262                        (match_operand:SVE_FULL_F 1 "register_operand")]
7263                       SVE_FP_REDUCTION))]
7264   "TARGET_SVE"
7265   {
7266     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
7267   }
7268 )
7269
7270 ;; Predicated floating-point tree reductions.
7271 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
7272   [(set (match_operand:<VEL> 0 "register_operand" "=w")
7273         (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
7274                        (match_operand:SVE_FULL_F 2 "register_operand" "w")]
7275                       SVE_FP_REDUCTION))]
7276   "TARGET_SVE"
7277   "<sve_fp_op>\t%<Vetype>0, %1, %2.<Vetype>"
7278 )
7279
7280 ;; -------------------------------------------------------------------------
7281 ;; ---- [FP] Left-to-right reductions
7282 ;; -------------------------------------------------------------------------
7283 ;; Includes:
7284 ;; - FADDA
7285 ;; -------------------------------------------------------------------------
7286
7287 ;; Unpredicated in-order FP reductions.
7288 (define_expand "fold_left_plus_<mode>"
7289   [(set (match_operand:<VEL> 0 "register_operand")
7290         (unspec:<VEL> [(match_dup 3)
7291                        (match_operand:<VEL> 1 "register_operand")
7292                        (match_operand:SVE_FULL_F 2 "register_operand")]
7293                       UNSPEC_FADDA))]
7294   "TARGET_SVE"
7295   {
7296     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
7297   }
7298 )
7299
7300 ;; Predicated in-order FP reductions.
7301 (define_insn "mask_fold_left_plus_<mode>"
7302   [(set (match_operand:<VEL> 0 "register_operand" "=w")
7303         (unspec:<VEL> [(match_operand:<VPRED> 3 "register_operand" "Upl")
7304                        (match_operand:<VEL> 1 "register_operand" "0")
7305                        (match_operand:SVE_FULL_F 2 "register_operand" "w")]
7306                       UNSPEC_FADDA))]
7307   "TARGET_SVE"
7308   "fadda\t%<Vetype>0, %3, %<Vetype>0, %2.<Vetype>"
7309 )
7310
7311 ;; =========================================================================
7312 ;; == Permutes
7313 ;; =========================================================================
7314
7315 ;; -------------------------------------------------------------------------
7316 ;; ---- [INT,FP] General permutes
7317 ;; -------------------------------------------------------------------------
7318 ;; Includes:
7319 ;; - TBL
7320 ;; -------------------------------------------------------------------------
7321
7322 (define_expand "vec_perm<mode>"
7323   [(match_operand:SVE_FULL 0 "register_operand")
7324    (match_operand:SVE_FULL 1 "register_operand")
7325    (match_operand:SVE_FULL 2 "register_operand")
7326    (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")]
7327   "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
7328   {
7329     aarch64_expand_sve_vec_perm (operands[0], operands[1],
7330                                  operands[2], operands[3]);
7331     DONE;
7332   }
7333 )
7334
7335 (define_insn "@aarch64_sve_tbl<mode>"
7336   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
7337         (unspec:SVE_FULL
7338           [(match_operand:SVE_FULL 1 "register_operand" "w")
7339            (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
7340           UNSPEC_TBL))]
7341   "TARGET_SVE"
7342   "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
7343 )
7344
7345 ;; -------------------------------------------------------------------------
7346 ;; ---- [INT,FP] Special-purpose unary permutes
7347 ;; -------------------------------------------------------------------------
7348 ;; Includes:
7349 ;; - COMPACT
7350 ;; - DUP
7351 ;; - REV
7352 ;; -------------------------------------------------------------------------
7353
7354 ;; Compact active elements and pad with zeros.
7355 (define_insn "@aarch64_sve_compact<mode>"
7356   [(set (match_operand:SVE_FULL_SD 0 "register_operand" "=w")
7357         (unspec:SVE_FULL_SD
7358           [(match_operand:<VPRED> 1 "register_operand" "Upl")
7359            (match_operand:SVE_FULL_SD 2 "register_operand" "w")]
7360           UNSPEC_SVE_COMPACT))]
7361   "TARGET_SVE"
7362   "compact\t%0.<Vetype>, %1, %2.<Vetype>"
7363 )
7364
7365 ;; Duplicate one element of a vector.
7366 (define_insn "@aarch64_sve_dup_lane<mode>"
7367   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
7368         (vec_duplicate:SVE_FULL
7369           (vec_select:<VEL>
7370             (match_operand:SVE_FULL 1 "register_operand" "w")
7371             (parallel [(match_operand:SI 2 "const_int_operand")]))))]
7372   "TARGET_SVE
7373    && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 63)"
7374   "dup\t%0.<Vetype>, %1.<Vetype>[%2]"
7375 )
7376
7377 ;; Use DUP.Q to duplicate a 128-bit segment of a register.
7378 ;;
7379 ;; The vec_select:<V128> sets memory lane number N of the V128 to lane
7380 ;; number op2 + N of op1.  (We don't need to distinguish between memory
7381 ;; and architectural register lane numbering for op1 or op0, since the
7382 ;; two numbering schemes are the same for SVE.)
7383 ;;
7384 ;; The vec_duplicate:SVE_FULL then copies memory lane number N of the
7385 ;; V128 (and thus lane number op2 + N of op1) to lane numbers N + I * STEP
7386 ;; of op0.  We therefore get the correct result for both endiannesses.
7387 ;;
7388 ;; The wrinkle is that for big-endian V128 registers, memory lane numbering
7389 ;; is in the opposite order to architectural register lane numbering.
7390 ;; Thus if we were to do this operation via a V128 temporary register,
7391 ;; the vec_select and vec_duplicate would both involve a reverse operation
7392 ;; for big-endian targets.  In this fused pattern the two reverses cancel
7393 ;; each other out.
7394 (define_insn "@aarch64_sve_dupq_lane<mode>"
7395   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
7396         (vec_duplicate:SVE_FULL
7397           (vec_select:<V128>
7398             (match_operand:SVE_FULL 1 "register_operand" "w")
7399             (match_operand 2 "ascending_int_parallel"))))]
7400   "TARGET_SVE
7401    && (INTVAL (XVECEXP (operands[2], 0, 0))
7402        * GET_MODE_SIZE (<VEL>mode)) % 16 == 0
7403    && IN_RANGE (INTVAL (XVECEXP (operands[2], 0, 0))
7404                 * GET_MODE_SIZE (<VEL>mode), 0, 63)"
7405   {
7406     unsigned int byte = (INTVAL (XVECEXP (operands[2], 0, 0))
7407                          * GET_MODE_SIZE (<VEL>mode));
7408     operands[2] = gen_int_mode (byte / 16, DImode);
7409     return "dup\t%0.q, %1.q[%2]";
7410   }
7411 )
7412
7413 ;; Reverse the order of elements within a full vector.
7414 (define_insn "@aarch64_sve_rev<mode>"
7415   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
7416         (unspec:SVE_FULL
7417           [(match_operand:SVE_FULL 1 "register_operand" "w")]
7418           UNSPEC_REV))]
7419   "TARGET_SVE"
7420   "rev\t%0.<Vetype>, %1.<Vetype>")
7421
7422 ;; -------------------------------------------------------------------------
7423 ;; ---- [INT,FP] Special-purpose binary permutes
7424 ;; -------------------------------------------------------------------------
7425 ;; Includes:
7426 ;; - SPLICE
7427 ;; - TRN1
7428 ;; - TRN2
7429 ;; - UZP1
7430 ;; - UZP2
7431 ;; - ZIP1
7432 ;; - ZIP2
7433 ;; -------------------------------------------------------------------------
7434
7435 ;; Like EXT, but start at the first active element.
7436 (define_insn "@aarch64_sve_splice<mode>"
7437   [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w")
7438         (unspec:SVE_FULL
7439           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7440            (match_operand:SVE_FULL 2 "register_operand" "0, w")
7441            (match_operand:SVE_FULL 3 "register_operand" "w, w")]
7442           UNSPEC_SVE_SPLICE))]
7443   "TARGET_SVE"
7444   "@
7445    splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype>
7446    movprfx\t%0, %2\;splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype>"
7447   [(set_attr "movprfx" "*, yes")]
7448 )
7449
7450 ;; Permutes that take half the elements from one vector and half the
7451 ;; elements from the other.
7452 (define_insn "@aarch64_sve_<perm_insn><mode>"
7453   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
7454         (unspec:SVE_FULL
7455           [(match_operand:SVE_FULL 1 "register_operand" "w")
7456            (match_operand:SVE_FULL 2 "register_operand" "w")]
7457           PERMUTE))]
7458   "TARGET_SVE"
7459   "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
7460 )
7461
7462 ;; Concatenate two vectors and extract a subvector.  Note that the
7463 ;; immediate (third) operand is the lane index not the byte index.
7464 (define_insn "@aarch64_sve_ext<mode>"
7465   [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w")
7466         (unspec:SVE_FULL
7467           [(match_operand:SVE_FULL 1 "register_operand" "0, w")
7468            (match_operand:SVE_FULL 2 "register_operand" "w, w")
7469            (match_operand:SI 3 "const_int_operand")]
7470           UNSPEC_EXT))]
7471   "TARGET_SVE
7472    && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode), 0, 255)"
7473   {
7474     operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode));
7475     return (which_alternative == 0
7476             ? "ext\\t%0.b, %0.b, %2.b, #%3"
7477             : "movprfx\t%0, %1\;ext\\t%0.b, %0.b, %2.b, #%3");
7478   }
7479   [(set_attr "movprfx" "*,yes")]
7480 )
7481
7482 ;; -------------------------------------------------------------------------
7483 ;; ---- [PRED] Special-purpose unary permutes
7484 ;; -------------------------------------------------------------------------
7485 ;; Includes:
7486 ;; - REV
7487 ;; -------------------------------------------------------------------------
7488
7489 (define_insn "@aarch64_sve_rev<mode>"
7490   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
7491         (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")]
7492                          UNSPEC_REV))]
7493   "TARGET_SVE"
7494   "rev\t%0.<Vetype>, %1.<Vetype>")
7495
7496 ;; -------------------------------------------------------------------------
7497 ;; ---- [PRED] Special-purpose binary permutes
7498 ;; -------------------------------------------------------------------------
7499 ;; Includes:
7500 ;; - TRN1
7501 ;; - TRN2
7502 ;; - UZP1
7503 ;; - UZP2
7504 ;; - ZIP1
7505 ;; - ZIP2
7506 ;; -------------------------------------------------------------------------
7507
7508 ;; Permutes that take half the elements from one vector and half the
7509 ;; elements from the other.
7510 (define_insn "@aarch64_sve_<perm_insn><mode>"
7511   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
7512         (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")
7513                           (match_operand:PRED_ALL 2 "register_operand" "Upa")]
7514                          PERMUTE))]
7515   "TARGET_SVE"
7516   "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
7517 )
7518
7519 ;; =========================================================================
7520 ;; == Conversions
7521 ;; =========================================================================
7522
7523 ;; -------------------------------------------------------------------------
7524 ;; ---- [INT<-INT] Packs
7525 ;; -------------------------------------------------------------------------
7526 ;; Includes:
7527 ;; - UZP1
7528 ;; -------------------------------------------------------------------------
7529
7530 ;; Integer pack.  Use UZP1 on the narrower type, which discards
7531 ;; the high part of each wide element.
7532 (define_insn "vec_pack_trunc_<Vwide>"
7533   [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
7534         (unspec:SVE_FULL_BHSI
7535           [(match_operand:<VWIDE> 1 "register_operand" "w")
7536            (match_operand:<VWIDE> 2 "register_operand" "w")]
7537           UNSPEC_PACK))]
7538   "TARGET_SVE"
7539   "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
7540 )
7541
7542 ;; -------------------------------------------------------------------------
7543 ;; ---- [INT<-INT] Unpacks
7544 ;; -------------------------------------------------------------------------
7545 ;; Includes:
7546 ;; - SUNPKHI
7547 ;; - SUNPKLO
7548 ;; - UUNPKHI
7549 ;; - UUNPKLO
7550 ;; -------------------------------------------------------------------------
7551
7552 ;; Unpack the low or high half of a vector, where "high" refers to
7553 ;; the low-numbered lanes for big-endian and the high-numbered lanes
7554 ;; for little-endian.
7555 (define_expand "vec_unpack<su>_<perm_hilo>_<SVE_FULL_BHSI:mode>"
7556   [(match_operand:<VWIDE> 0 "register_operand")
7557    (unspec:<VWIDE>
7558      [(match_operand:SVE_FULL_BHSI 1 "register_operand")] UNPACK)]
7559   "TARGET_SVE"
7560   {
7561     emit_insn ((<hi_lanes_optab>
7562                 ? gen_aarch64_sve_<su>unpkhi_<SVE_FULL_BHSI:mode>
7563                 : gen_aarch64_sve_<su>unpklo_<SVE_FULL_BHSI:mode>)
7564                (operands[0], operands[1]));
7565     DONE;
7566   }
7567 )
7568
7569 (define_insn "@aarch64_sve_<su>unpk<perm_hilo>_<SVE_FULL_BHSI:mode>"
7570   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
7571         (unspec:<VWIDE>
7572           [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")]
7573           UNPACK))]
7574   "TARGET_SVE"
7575   "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
7576 )
7577
7578 ;; -------------------------------------------------------------------------
7579 ;; ---- [INT<-FP] Conversions
7580 ;; -------------------------------------------------------------------------
7581 ;; Includes:
7582 ;; - FCVTZS
7583 ;; - FCVTZU
7584 ;; -------------------------------------------------------------------------
7585
7586 ;; Unpredicated conversion of floats to integers of the same size (HF to HI,
7587 ;; SF to SI or DF to DI).
7588 (define_expand "<optab><mode><v_int_equiv>2"
7589   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
7590         (unspec:<V_INT_EQUIV>
7591           [(match_dup 2)
7592            (const_int SVE_RELAXED_GP)
7593            (match_operand:SVE_FULL_F 1 "register_operand")]
7594           SVE_COND_FCVTI))]
7595   "TARGET_SVE"
7596   {
7597     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
7598   }
7599 )
7600
7601 ;; Predicated float-to-integer conversion, either to the same width or wider.
7602 (define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
7603   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
7604         (unspec:SVE_FULL_HSDI
7605           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl")
7606            (match_operand:SI 3 "aarch64_sve_gp_strictness")
7607            (match_operand:SVE_FULL_F 2 "register_operand" "w")]
7608           SVE_COND_FCVTI))]
7609   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
7610   "fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>"
7611 )
7612
7613 ;; Predicated narrowing float-to-integer conversion.
7614 (define_insn "@aarch64_sve_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
7615   [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w")
7616         (unspec:VNx4SI_ONLY
7617           [(match_operand:VNx2BI 1 "register_operand" "Upl")
7618            (match_operand:SI 3 "aarch64_sve_gp_strictness")
7619            (match_operand:VNx2DF_ONLY 2 "register_operand" "w")]
7620           SVE_COND_FCVTI))]
7621   "TARGET_SVE"
7622   "fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>"
7623 )
7624
7625 ;; Predicated float-to-integer conversion with merging, either to the same
7626 ;; width or wider.
7627 (define_expand "@cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
7628   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
7629         (unspec:SVE_FULL_HSDI
7630           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
7631            (unspec:SVE_FULL_HSDI
7632              [(match_dup 1)
7633               (const_int SVE_STRICT_GP)
7634               (match_operand:SVE_FULL_F 2 "register_operand")]
7635              SVE_COND_FCVTI)
7636            (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero")]
7637           UNSPEC_SEL))]
7638   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
7639 )
7640
7641 ;; The first alternative doesn't need the earlyclobber, but the only case
7642 ;; it would help is the uninteresting one in which operands 2 and 3 are
7643 ;; the same register (despite having different modes).  Making all the
7644 ;; alternatives earlyclobber makes things more consistent for the
7645 ;; register allocator.
7646 (define_insn_and_rewrite "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
7647   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w, ?&w")
7648         (unspec:SVE_FULL_HSDI
7649           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
7650            (unspec:SVE_FULL_HSDI
7651              [(match_operand 4)
7652               (match_operand:SI 5 "aarch64_sve_gp_strictness")
7653               (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
7654              SVE_COND_FCVTI)
7655            (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
7656           UNSPEC_SEL))]
7657   "TARGET_SVE
7658    && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>
7659    && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
7660   "@
7661    fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
7662    movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
7663    movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>"
7664   "&& !rtx_equal_p (operands[1], operands[4])"
7665   {
7666     operands[4] = copy_rtx (operands[1]);
7667   }
7668   [(set_attr "movprfx" "*,yes,yes")]
7669 )
7670
7671 ;; Predicated narrowing float-to-integer conversion with merging.
7672 (define_expand "@cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
7673   [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
7674         (unspec:VNx4SI_ONLY
7675           [(match_operand:VNx2BI 1 "register_operand")
7676            (unspec:VNx4SI_ONLY
7677              [(match_dup 1)
7678               (const_int SVE_STRICT_GP)
7679               (match_operand:VNx2DF_ONLY 2 "register_operand")]
7680              SVE_COND_FCVTI)
7681            (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")]
7682           UNSPEC_SEL))]
7683   "TARGET_SVE"
7684 )
7685
7686 (define_insn "*cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
7687   [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=&w, &w, ?&w")
7688         (unspec:VNx4SI_ONLY
7689           [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl, Upl")
7690            (unspec:VNx4SI_ONLY
7691              [(match_dup 1)
7692               (match_operand:SI 4 "aarch64_sve_gp_strictness")
7693               (match_operand:VNx2DF_ONLY 2 "register_operand" "w, w, w")]
7694              SVE_COND_FCVTI)
7695            (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
7696           UNSPEC_SEL))]
7697   "TARGET_SVE"
7698   "@
7699    fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
7700    movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
7701    movprfx\t%0, %3\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>"
7702   [(set_attr "movprfx" "*,yes,yes")]
7703 )
7704
7705 ;; -------------------------------------------------------------------------
7706 ;; ---- [INT<-FP] Packs
7707 ;; -------------------------------------------------------------------------
7708 ;; The patterns in this section are synthetic.
7709 ;; -------------------------------------------------------------------------
7710
7711 ;; Convert two vectors of DF to SI and pack the results into a single vector.
7712 (define_expand "vec_pack_<su>fix_trunc_vnx2df"
7713   [(set (match_dup 4)
7714         (unspec:VNx4SI
7715           [(match_dup 3)
7716            (const_int SVE_RELAXED_GP)
7717            (match_operand:VNx2DF 1 "register_operand")]
7718           SVE_COND_FCVTI))
7719    (set (match_dup 5)
7720         (unspec:VNx4SI
7721           [(match_dup 3)
7722            (const_int SVE_RELAXED_GP)
7723            (match_operand:VNx2DF 2 "register_operand")]
7724           SVE_COND_FCVTI))
7725    (set (match_operand:VNx4SI 0 "register_operand")
7726         (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
7727   "TARGET_SVE"
7728   {
7729     operands[3] = aarch64_ptrue_reg (VNx2BImode);
7730     operands[4] = gen_reg_rtx (VNx4SImode);
7731     operands[5] = gen_reg_rtx (VNx4SImode);
7732   }
7733 )
7734
7735 ;; -------------------------------------------------------------------------
7736 ;; ---- [INT<-FP] Unpacks
7737 ;; -------------------------------------------------------------------------
7738 ;; No patterns here yet!
7739 ;; -------------------------------------------------------------------------
7740
7741 ;; -------------------------------------------------------------------------
7742 ;; ---- [FP<-INT] Conversions
7743 ;; -------------------------------------------------------------------------
7744 ;; Includes:
7745 ;; - SCVTF
7746 ;; - UCVTF
7747 ;; -------------------------------------------------------------------------
7748
7749 ;; Unpredicated conversion of integers to floats of the same size
7750 ;; (HI to HF, SI to SF or DI to DF).
7751 (define_expand "<optab><v_int_equiv><mode>2"
7752   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7753         (unspec:SVE_FULL_F
7754           [(match_dup 2)
7755            (const_int SVE_RELAXED_GP)
7756            (match_operand:<V_INT_EQUIV> 1 "register_operand")]
7757           SVE_COND_ICVTF))]
7758   "TARGET_SVE"
7759   {
7760     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
7761   }
7762 )
7763
7764 ;; Predicated integer-to-float conversion, either to the same width or
7765 ;; narrower.
7766 (define_insn "@aarch64_sve_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
7767   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
7768         (unspec:SVE_FULL_F
7769           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl")
7770            (match_operand:SI 3 "aarch64_sve_gp_strictness")
7771            (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")]
7772           SVE_COND_ICVTF))]
7773   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
7774   "<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
7775 )
7776
7777 ;; Predicated widening integer-to-float conversion.
7778 (define_insn "@aarch64_sve_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
7779   [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w")
7780         (unspec:VNx2DF_ONLY
7781           [(match_operand:VNx2BI 1 "register_operand" "Upl")
7782            (match_operand:SI 3 "aarch64_sve_gp_strictness")
7783            (match_operand:VNx4SI_ONLY 2 "register_operand" "w")]
7784           SVE_COND_ICVTF))]
7785   "TARGET_SVE"
7786   "<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>"
7787 )
7788
7789 ;; Predicated integer-to-float conversion with merging, either to the same
7790 ;; width or narrower.
7791 (define_expand "@cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
7792   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7793         (unspec:SVE_FULL_F
7794           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
7795            (unspec:SVE_FULL_F
7796              [(match_dup 1)
7797               (const_int SVE_STRICT_GP)
7798               (match_operand:SVE_FULL_HSDI 2 "register_operand")]
7799              SVE_COND_ICVTF)
7800            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
7801           UNSPEC_SEL))]
7802   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
7803 )
7804
7805 ;; The first alternative doesn't need the earlyclobber, but the only case
7806 ;; it would help is the uninteresting one in which operands 2 and 3 are
7807 ;; the same register (despite having different modes).  Making all the
7808 ;; alternatives earlyclobber makes things more consistent for the
7809 ;; register allocator.
7810 (define_insn_and_rewrite "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
7811   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, ?&w")
7812         (unspec:SVE_FULL_F
7813           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
7814            (unspec:SVE_FULL_F
7815              [(match_operand 4)
7816               (match_operand:SI 5 "aarch64_sve_gp_strictness")
7817               (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")]
7818              SVE_COND_ICVTF)
7819            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
7820           UNSPEC_SEL))]
7821   "TARGET_SVE
7822    && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>
7823    && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
7824   "@
7825    <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
7826    movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
7827    movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
7828   "&& !rtx_equal_p (operands[1], operands[4])"
7829   {
7830     operands[4] = copy_rtx (operands[1]);
7831   }
7832   [(set_attr "movprfx" "*,yes,yes")]
7833 )
7834
7835 ;; Predicated widening integer-to-float conversion with merging.
7836 (define_expand "@cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
7837   [(set (match_operand:VNx2DF_ONLY 0 "register_operand")
7838         (unspec:VNx2DF_ONLY
7839           [(match_operand:VNx2BI 1 "register_operand")
7840            (unspec:VNx2DF_ONLY
7841              [(match_dup 1)
7842               (const_int SVE_STRICT_GP)
7843               (match_operand:VNx4SI_ONLY 2 "register_operand")]
7844              SVE_COND_ICVTF)
7845            (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero")]
7846           UNSPEC_SEL))]
7847   "TARGET_SVE"
7848 )
7849
7850 (define_insn "*cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
7851   [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w, ?&w, ?&w")
7852         (unspec:VNx2DF_ONLY
7853           [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl, Upl")
7854            (unspec:VNx2DF_ONLY
7855              [(match_dup 1)
7856               (match_operand:SI 4 "aarch64_sve_gp_strictness")
7857               (match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w")]
7858              SVE_COND_ICVTF)
7859            (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
7860           UNSPEC_SEL))]
7861   "TARGET_SVE"
7862   "@
7863    <su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
7864    movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
7865    movprfx\t%0, %3\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>"
7866   [(set_attr "movprfx" "*,yes,yes")]
7867 )
7868
7869 ;; -------------------------------------------------------------------------
7870 ;; ---- [FP<-INT] Packs
7871 ;; -------------------------------------------------------------------------
7872 ;; No patterns here yet!
7873 ;; -------------------------------------------------------------------------
7874
7875 ;; -------------------------------------------------------------------------
7876 ;; ---- [FP<-INT] Unpacks
7877 ;; -------------------------------------------------------------------------
7878 ;; The patterns in this section are synthetic.
7879 ;; -------------------------------------------------------------------------
7880
7881 ;; Unpack one half of a VNx4SI to VNx2DF.  First unpack from VNx4SI
7882 ;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the
7883 ;; unpacked VNx4SI to VNx2DF.
7884 (define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si"
7885   [(match_operand:VNx2DF 0 "register_operand")
7886    (FLOATUORS:VNx2DF
7887      (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")]
7888                     UNPACK_UNSIGNED))]
7889   "TARGET_SVE"
7890   {
7891     /* Use ZIP to do the unpack, since we don't care about the upper halves
7892        and since it has the nice property of not needing any subregs.
7893        If using UUNPK* turns out to be preferable, we could model it as
7894        a ZIP whose first operand is zero.  */
7895     rtx temp = gen_reg_rtx (VNx4SImode);
7896     emit_insn ((<hi_lanes_optab>
7897                 ? gen_aarch64_sve_zip2vnx4si
7898                 : gen_aarch64_sve_zip1vnx4si)
7899                (temp, operands[1], operands[1]));
7900     rtx ptrue = aarch64_ptrue_reg (VNx2BImode);
7901     rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
7902     emit_insn (gen_aarch64_sve_<FLOATUORS:optab>_extendvnx4sivnx2df
7903                (operands[0], ptrue, temp, strictness));
7904     DONE;
7905   }
7906 )
7907
7908 ;; -------------------------------------------------------------------------
7909 ;; ---- [FP<-FP] Packs
7910 ;; -------------------------------------------------------------------------
7911 ;; Includes:
7912 ;; - FCVT
7913 ;; -------------------------------------------------------------------------
7914
7915 ;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack
7916 ;; the results into a single vector.
7917 (define_expand "vec_pack_trunc_<Vwide>"
7918   [(set (match_dup 4)
7919         (unspec:SVE_FULL_HSF
7920           [(match_dup 3)
7921            (const_int SVE_RELAXED_GP)
7922            (match_operand:<VWIDE> 1 "register_operand")]
7923           UNSPEC_COND_FCVT))
7924    (set (match_dup 5)
7925         (unspec:SVE_FULL_HSF
7926           [(match_dup 3)
7927            (const_int SVE_RELAXED_GP)
7928            (match_operand:<VWIDE> 2 "register_operand")]
7929           UNSPEC_COND_FCVT))
7930    (set (match_operand:SVE_FULL_HSF 0 "register_operand")
7931         (unspec:SVE_FULL_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
7932   "TARGET_SVE"
7933   {
7934     operands[3] = aarch64_ptrue_reg (<VWIDE_PRED>mode);
7935     operands[4] = gen_reg_rtx (<MODE>mode);
7936     operands[5] = gen_reg_rtx (<MODE>mode);
7937   }
7938 )
7939
7940 ;; Predicated float-to-float truncation.
7941 (define_insn "@aarch64_sve_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
7942   [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w")
7943         (unspec:SVE_FULL_HSF
7944           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl")
7945            (match_operand:SI 3 "aarch64_sve_gp_strictness")
7946            (match_operand:SVE_FULL_SDF 2 "register_operand" "w")]
7947           SVE_COND_FCVT))]
7948   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
7949   "fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>"
7950 )
7951
7952 ;; Predicated float-to-float truncation with merging.
7953 (define_expand "@cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
7954   [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
7955         (unspec:SVE_FULL_HSF
7956           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
7957            (unspec:SVE_FULL_HSF
7958              [(match_dup 1)
7959               (const_int SVE_STRICT_GP)
7960               (match_operand:SVE_FULL_SDF 2 "register_operand")]
7961              SVE_COND_FCVT)
7962            (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero")]
7963           UNSPEC_SEL))]
7964   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
7965 )
7966
7967 (define_insn "*cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
7968   [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w, ?&w")
7969         (unspec:SVE_FULL_HSF
7970           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl, Upl")
7971            (unspec:SVE_FULL_HSF
7972              [(match_dup 1)
7973               (match_operand:SI 4 "aarch64_sve_gp_strictness")
7974               (match_operand:SVE_FULL_SDF 2 "register_operand" "w, w, w")]
7975              SVE_COND_FCVT)
7976            (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
7977           UNSPEC_SEL))]
7978   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
7979   "@
7980    fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
7981    movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
7982    movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>"
7983   [(set_attr "movprfx" "*,yes,yes")]
7984 )
7985
7986 ;; -------------------------------------------------------------------------
7987 ;; ---- [FP<-FP] Unpacks
7988 ;; -------------------------------------------------------------------------
7989 ;; Includes:
7990 ;; - FCVT
7991 ;; -------------------------------------------------------------------------
7992
7993 ;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF.
7994 ;; First unpack the source without conversion, then float-convert the
7995 ;; unpacked source.
7996 (define_expand "vec_unpacks_<perm_hilo>_<mode>"
7997   [(match_operand:<VWIDE> 0 "register_operand")
7998    (unspec:SVE_FULL_HSF
7999      [(match_operand:SVE_FULL_HSF 1 "register_operand")]
8000      UNPACK_UNSIGNED)]
8001   "TARGET_SVE"
8002   {
8003     /* Use ZIP to do the unpack, since we don't care about the upper halves
8004        and since it has the nice property of not needing any subregs.
8005        If using UUNPK* turns out to be preferable, we could model it as
8006        a ZIP whose first operand is zero.  */
8007     rtx temp = gen_reg_rtx (<MODE>mode);
8008     emit_insn ((<hi_lanes_optab>
8009                 ? gen_aarch64_sve_zip2<mode>
8010                 : gen_aarch64_sve_zip1<mode>)
8011                 (temp, operands[1], operands[1]));
8012     rtx ptrue = aarch64_ptrue_reg (<VWIDE_PRED>mode);
8013     rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
8014     emit_insn (gen_aarch64_sve_fcvt_nontrunc<mode><Vwide>
8015                (operands[0], ptrue, temp, strictness));
8016     DONE;
8017   }
8018 )
8019
8020 ;; Predicated float-to-float extension.
8021 (define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
8022   [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
8023         (unspec:SVE_FULL_SDF
8024           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl")
8025            (match_operand:SI 3 "aarch64_sve_gp_strictness")
8026            (match_operand:SVE_FULL_HSF 2 "register_operand" "w")]
8027           SVE_COND_FCVT))]
8028   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
8029   "fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>"
8030 )
8031
8032 ;; Predicated float-to-float extension with merging.
8033 (define_expand "@cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
8034   [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
8035         (unspec:SVE_FULL_SDF
8036           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
8037            (unspec:SVE_FULL_SDF
8038              [(match_dup 1)
8039               (const_int SVE_STRICT_GP)
8040               (match_operand:SVE_FULL_HSF 2 "register_operand")]
8041              SVE_COND_FCVT)
8042            (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero")]
8043           UNSPEC_SEL))]
8044   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
8045 )
8046
8047 (define_insn "*cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
8048   [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w, ?&w, ?&w")
8049         (unspec:SVE_FULL_SDF
8050           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl, Upl")
8051            (unspec:SVE_FULL_SDF
8052              [(match_dup 1)
8053               (match_operand:SI 4 "aarch64_sve_gp_strictness")
8054               (match_operand:SVE_FULL_HSF 2 "register_operand" "w, w, w")]
8055              SVE_COND_FCVT)
8056            (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
8057           UNSPEC_SEL))]
8058   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
8059   "@
8060    fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
8061    movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
8062    movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>"
8063   [(set_attr "movprfx" "*,yes,yes")]
8064 )
8065
8066 ;; -------------------------------------------------------------------------
8067 ;; ---- [PRED<-PRED] Packs
8068 ;; -------------------------------------------------------------------------
8069 ;; Includes:
8070 ;; - UZP1
8071 ;; -------------------------------------------------------------------------
8072
8073 ;; Predicate pack.  Use UZP1 on the narrower type, which discards
8074 ;; the high part of each wide element.
8075 (define_insn "vec_pack_trunc_<Vwide>"
8076   [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa")
8077         (unspec:PRED_BHS
8078           [(match_operand:<VWIDE> 1 "register_operand" "Upa")
8079            (match_operand:<VWIDE> 2 "register_operand" "Upa")]
8080           UNSPEC_PACK))]
8081   "TARGET_SVE"
8082   "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
8083 )
8084
8085 ;; -------------------------------------------------------------------------
8086 ;; ---- [PRED<-PRED] Unpacks
8087 ;; -------------------------------------------------------------------------
8088 ;; Includes:
8089 ;; - PUNPKHI
8090 ;; - PUNPKLO
8091 ;; -------------------------------------------------------------------------
8092
8093 ;; Unpack the low or high half of a predicate, where "high" refers to
8094 ;; the low-numbered lanes for big-endian and the high-numbered lanes
8095 ;; for little-endian.
8096 (define_expand "vec_unpack<su>_<perm_hilo>_<mode>"
8097   [(match_operand:<VWIDE> 0 "register_operand")
8098    (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")]
8099                    UNPACK)]
8100   "TARGET_SVE"
8101   {
8102     emit_insn ((<hi_lanes_optab>
8103                 ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode>
8104                 : gen_aarch64_sve_punpklo_<PRED_BHS:mode>)
8105                (operands[0], operands[1]));
8106     DONE;
8107   }
8108 )
8109
8110 (define_insn "@aarch64_sve_punpk<perm_hilo>_<mode>"
8111   [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa")
8112         (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")]
8113                         UNPACK_UNSIGNED))]
8114   "TARGET_SVE"
8115   "punpk<perm_hilo>\t%0.h, %1.b"
8116 )
8117
8118 ;; =========================================================================
8119 ;; == Vector partitioning
8120 ;; =========================================================================
8121
8122 ;; -------------------------------------------------------------------------
8123 ;; ---- [PRED] Unary partitioning
8124 ;; -------------------------------------------------------------------------
8125 ;; Includes:
8126 ;; - BRKA
8127 ;; - BRKAS
8128 ;; - BRKB
8129 ;; - BRKBS
8130 ;; -------------------------------------------------------------------------
8131
8132 ;; Note that unlike most other instructions that have both merging and
8133 ;; zeroing forms, these instructions don't operate elementwise and so
8134 ;; don't fit the IFN_COND model.
8135 (define_insn "@aarch64_brk<brk_op>"
8136   [(set (match_operand:VNx16BI 0 "register_operand" "=Upa, Upa")
8137         (unspec:VNx16BI
8138           [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa")
8139            (match_operand:VNx16BI 2 "register_operand" "Upa, Upa")
8140            (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")]
8141           SVE_BRK_UNARY))]
8142   "TARGET_SVE"
8143   "@
8144    brk<brk_op>\t%0.b, %1/z, %2.b
8145    brk<brk_op>\t%0.b, %1/m, %2.b"
8146 )
8147
8148 ;; Same, but also producing a flags result.
8149 (define_insn "*aarch64_brk<brk_op>_cc"
8150   [(set (reg:CC_NZC CC_REGNUM)
8151         (unspec:CC_NZC
8152           [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa")
8153            (match_dup 1)
8154            (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8155            (unspec:VNx16BI
8156              [(match_dup 1)
8157               (match_operand:VNx16BI 2 "register_operand" "Upa, Upa")
8158               (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")]
8159              SVE_BRK_UNARY)]
8160           UNSPEC_PTEST))
8161    (set (match_operand:VNx16BI 0 "register_operand" "=Upa, Upa")
8162         (unspec:VNx16BI
8163           [(match_dup 1)
8164            (match_dup 2)
8165            (match_dup 3)]
8166           SVE_BRK_UNARY))]
8167   "TARGET_SVE"
8168   "@
8169    brk<brk_op>s\t%0.b, %1/z, %2.b
8170    brk<brk_op>s\t%0.b, %1/m, %2.b"
8171 )
8172
8173 ;; Same, but with only the flags result being interesting.
8174 (define_insn "*aarch64_brk<brk_op>_ptest"
8175   [(set (reg:CC_NZC CC_REGNUM)
8176         (unspec:CC_NZC
8177           [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa")
8178            (match_dup 1)
8179            (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8180            (unspec:VNx16BI
8181              [(match_dup 1)
8182               (match_operand:VNx16BI 2 "register_operand" "Upa, Upa")
8183               (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")]
8184              SVE_BRK_UNARY)]
8185           UNSPEC_PTEST))
8186    (clobber (match_scratch:VNx16BI 0 "=Upa, Upa"))]
8187   "TARGET_SVE"
8188   "@
8189    brk<brk_op>s\t%0.b, %1/z, %2.b
8190    brk<brk_op>s\t%0.b, %1/m, %2.b"
8191 )
8192
8193 ;; -------------------------------------------------------------------------
8194 ;; ---- [PRED] Binary partitioning
8195 ;; -------------------------------------------------------------------------
8196 ;; Includes:
8197 ;; - BRKN
8198 ;; - BRKNS
8199 ;; - BRKPA
8200 ;; - BRKPAS
8201 ;; - BRKPB
8202 ;; - BRKPBS
8203 ;; -------------------------------------------------------------------------
8204
8205 ;; Binary BRKs (BRKN, BRKPA, BRKPB).
8206 (define_insn "@aarch64_brk<brk_op>"
8207   [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
8208         (unspec:VNx16BI
8209           [(match_operand:VNx16BI 1 "register_operand" "Upa")
8210            (match_operand:VNx16BI 2 "register_operand" "Upa")
8211            (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")]
8212           SVE_BRK_BINARY))]
8213   "TARGET_SVE"
8214   "brk<brk_op>\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b"
8215 )
8216
8217 ;; Same, but also producing a flags result.
8218 (define_insn "*aarch64_brk<brk_op>_cc"
8219   [(set (reg:CC_NZC CC_REGNUM)
8220         (unspec:CC_NZC
8221           [(match_operand:VNx16BI 1 "register_operand" "Upa")
8222            (match_dup 1)
8223            (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8224            (unspec:VNx16BI
8225              [(match_dup 1)
8226               (match_operand:VNx16BI 2 "register_operand" "Upa")
8227               (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")]
8228              SVE_BRK_BINARY)]
8229           UNSPEC_PTEST))
8230    (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
8231         (unspec:VNx16BI
8232           [(match_dup 1)
8233            (match_dup 2)
8234            (match_dup 3)]
8235           SVE_BRK_BINARY))]
8236   "TARGET_SVE"
8237   "brk<brk_op>s\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b"
8238 )
8239
8240 ;; Same, but with only the flags result being interesting.
8241 (define_insn "*aarch64_brk<brk_op>_ptest"
8242   [(set (reg:CC_NZC CC_REGNUM)
8243         (unspec:CC_NZC
8244           [(match_operand:VNx16BI 1 "register_operand" "Upa")
8245            (match_dup 1)
8246            (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8247            (unspec:VNx16BI
8248              [(match_dup 1)
8249               (match_operand:VNx16BI 2 "register_operand" "Upa")
8250               (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")]
8251              SVE_BRK_BINARY)]
8252           UNSPEC_PTEST))
8253    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
8254   "TARGET_SVE"
8255   "brk<brk_op>s\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b"
8256 )
8257
8258 ;; -------------------------------------------------------------------------
8259 ;; ---- [PRED] Scalarization
8260 ;; -------------------------------------------------------------------------
8261 ;; Includes:
8262 ;; - PFIRST
8263 ;; - PNEXT
8264 ;; -------------------------------------------------------------------------
8265
8266 (define_insn "@aarch64_sve_<sve_pred_op><mode>"
8267   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8268         (unspec:PRED_ALL
8269           [(match_operand:PRED_ALL 1 "register_operand" "Upa")
8270            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8271            (match_operand:PRED_ALL 3 "register_operand" "0")]
8272           SVE_PITER))
8273    (clobber (reg:CC_NZC CC_REGNUM))]
8274   "TARGET_SVE && <max_elem_bits> >= <elem_bits>"
8275   "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
8276 )
8277
8278 ;; Same, but also producing a flags result.
8279 (define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_cc"
8280   [(set (reg:CC_NZC CC_REGNUM)
8281         (unspec:CC_NZC
8282           [(match_operand:VNx16BI 1 "register_operand" "Upa")
8283            (match_operand 2)
8284            (match_operand:SI 3 "aarch64_sve_ptrue_flag")
8285            (unspec:PRED_ALL
8286              [(match_operand 4)
8287               (match_operand:SI 5 "aarch64_sve_ptrue_flag")
8288               (match_operand:PRED_ALL 6 "register_operand" "0")]
8289              SVE_PITER)]
8290           UNSPEC_PTEST))
8291    (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8292         (unspec:PRED_ALL
8293           [(match_dup 4)
8294            (match_dup 5)
8295            (match_dup 6)]
8296           SVE_PITER))]
8297   "TARGET_SVE
8298    && <max_elem_bits> >= <elem_bits>
8299    && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])"
8300   "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
8301   "&& !rtx_equal_p (operands[2], operands[4])"
8302   {
8303     operands[4] = operands[2];
8304     operands[5] = operands[3];
8305   }
8306 )
8307
8308 ;; Same, but with only the flags result being interesting.
8309 (define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_ptest"
8310   [(set (reg:CC_NZC CC_REGNUM)
8311         (unspec:CC_NZC
8312           [(match_operand:VNx16BI 1 "register_operand" "Upa")
8313            (match_operand 2)
8314            (match_operand:SI 3 "aarch64_sve_ptrue_flag")
8315            (unspec:PRED_ALL
8316              [(match_operand 4)
8317               (match_operand:SI 5 "aarch64_sve_ptrue_flag")
8318               (match_operand:PRED_ALL 6 "register_operand" "0")]
8319              SVE_PITER)]
8320           UNSPEC_PTEST))
8321    (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
8322   "TARGET_SVE
8323    && <max_elem_bits> >= <elem_bits>
8324    && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])"
8325   "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
8326   "&& !rtx_equal_p (operands[2], operands[4])"
8327   {
8328     operands[4] = operands[2];
8329     operands[5] = operands[3];
8330   }
8331 )
8332
8333 ;; =========================================================================
8334 ;; == Counting elements
8335 ;; =========================================================================
8336
8337 ;; -------------------------------------------------------------------------
8338 ;; ---- [INT] Count elements in a pattern (scalar)
8339 ;; -------------------------------------------------------------------------
8340 ;; Includes:
8341 ;; - CNTB
8342 ;; - CNTD
8343 ;; - CNTH
8344 ;; - CNTW
8345 ;; -------------------------------------------------------------------------
8346
8347 ;; Count the number of elements in an svpattern.  Operand 1 is the pattern,
8348 ;; operand 2 is the number of elements that fit in a 128-bit block, and
8349 ;; operand 3 is a multiplier in the range [1, 16].
8350 ;;
8351 ;; Note that this pattern isn't used for SV_ALL (but would work for that too).
8352 (define_insn "aarch64_sve_cnt_pat"
8353   [(set (match_operand:DI 0 "register_operand" "=r")
8354         (zero_extend:DI
8355           (unspec:SI [(match_operand:DI 1 "const_int_operand")
8356                       (match_operand:DI 2 "const_int_operand")
8357                       (match_operand:DI 3 "const_int_operand")]
8358                      UNSPEC_SVE_CNT_PAT)))]
8359   "TARGET_SVE"
8360   {
8361     return aarch64_output_sve_cnt_pat_immediate ("cnt", "%x0", operands + 1);
8362   }
8363 )
8364
8365 ;; -------------------------------------------------------------------------
8366 ;; ---- [INT] Increment by the number of elements in a pattern (scalar)
8367 ;; -------------------------------------------------------------------------
8368 ;; Includes:
8369 ;; - INC
8370 ;; - SQINC
8371 ;; - UQINC
8372 ;; -------------------------------------------------------------------------
8373
8374 ;; Increment a DImode register by the number of elements in an svpattern.
8375 ;; See aarch64_sve_cnt_pat for the counting behavior.
8376 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8377   [(set (match_operand:DI 0 "register_operand" "=r")
8378         (ANY_PLUS:DI (zero_extend:DI
8379                        (unspec:SI [(match_operand:DI 2 "const_int_operand")
8380                                    (match_operand:DI 3 "const_int_operand")
8381                                    (match_operand:DI 4 "const_int_operand")]
8382                                   UNSPEC_SVE_CNT_PAT))
8383                      (match_operand:DI_ONLY 1 "register_operand" "0")))]
8384   "TARGET_SVE"
8385   {
8386     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0",
8387                                                  operands + 2);
8388   }
8389 )
8390
8391 ;; Increment an SImode register by the number of elements in an svpattern
8392 ;; using modular arithmetic.  See aarch64_sve_cnt_pat for the counting
8393 ;; behavior.
8394 (define_insn "*aarch64_sve_incsi_pat"
8395   [(set (match_operand:SI 0 "register_operand" "=r")
8396         (plus:SI (unspec:SI [(match_operand:DI 2 "const_int_operand")
8397                              (match_operand:DI 3 "const_int_operand")
8398                              (match_operand:DI 4 "const_int_operand")]
8399                             UNSPEC_SVE_CNT_PAT)
8400                  (match_operand:SI 1 "register_operand" "0")))]
8401   "TARGET_SVE"
8402   {
8403     return aarch64_output_sve_cnt_pat_immediate ("inc", "%x0", operands + 2);
8404   }
8405 )
8406
8407 ;; Increment an SImode register by the number of elements in an svpattern
8408 ;; using saturating arithmetic, extending the result to 64 bits.
8409 ;;
8410 ;; See aarch64_sve_cnt_pat for the counting behavior.
8411 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8412   [(set (match_operand:DI 0 "register_operand" "=r")
8413         (<paired_extend>:DI
8414           (SAT_PLUS:SI
8415             (unspec:SI [(match_operand:DI 2 "const_int_operand")
8416                         (match_operand:DI 3 "const_int_operand")
8417                         (match_operand:DI 4 "const_int_operand")]
8418                        UNSPEC_SVE_CNT_PAT)
8419             (match_operand:SI_ONLY 1 "register_operand" "0"))))]
8420   "TARGET_SVE"
8421   {
8422     const char *registers = (<CODE> == SS_PLUS ? "%x0, %w0" : "%w0");
8423     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers,
8424                                                  operands + 2);
8425   }
8426 )
8427
8428 ;; -------------------------------------------------------------------------
8429 ;; ---- [INT] Increment by the number of elements in a pattern (vector)
8430 ;; -------------------------------------------------------------------------
8431 ;; Includes:
8432 ;; - INC
8433 ;; - SQINC
8434 ;; - UQINC
8435 ;; -------------------------------------------------------------------------
8436
8437 ;; Increment a vector of DIs by the number of elements in an svpattern.
8438 ;; See aarch64_sve_cnt_pat for the counting behavior.
8439 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8440   [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
8441         (ANY_PLUS:VNx2DI
8442           (vec_duplicate:VNx2DI
8443             (zero_extend:DI
8444               (unspec:SI [(match_operand:DI 2 "const_int_operand")
8445                           (match_operand:DI 3 "const_int_operand")
8446                           (match_operand:DI 4 "const_int_operand")]
8447                          UNSPEC_SVE_CNT_PAT)))
8448           (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))]
8449   "TARGET_SVE"
8450   {
8451     if (which_alternative == 1)
8452       output_asm_insn ("movprfx\t%0, %1", operands);
8453     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8454                                                  operands + 2);
8455   }
8456   [(set_attr "movprfx" "*,yes")]
8457 )
8458
8459 ;; Increment a vector of SIs by the number of elements in an svpattern.
8460 ;; See aarch64_sve_cnt_pat for the counting behavior.
8461 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8462   [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
8463         (ANY_PLUS:VNx4SI
8464           (vec_duplicate:VNx4SI
8465             (unspec:SI [(match_operand:DI 2 "const_int_operand")
8466                         (match_operand:DI 3 "const_int_operand")
8467                         (match_operand:DI 4 "const_int_operand")]
8468                        UNSPEC_SVE_CNT_PAT))
8469           (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
8470   "TARGET_SVE"
8471   {
8472     if (which_alternative == 1)
8473       output_asm_insn ("movprfx\t%0, %1", operands);
8474     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8475                                                  operands + 2);
8476   }
8477   [(set_attr "movprfx" "*,yes")]
8478 )
8479
8480 ;; Increment a vector of HIs by the number of elements in an svpattern.
8481 ;; See aarch64_sve_cnt_pat for the counting behavior.
8482 (define_expand "@aarch64_sve_<inc_dec><mode>_pat"
8483   [(set (match_operand:VNx8HI 0 "register_operand")
8484         (ANY_PLUS:VNx8HI
8485           (vec_duplicate:VNx8HI
8486             (truncate:HI
8487               (unspec:SI [(match_operand:DI 2 "const_int_operand")
8488                           (match_operand:DI 3 "const_int_operand")
8489                           (match_operand:DI 4 "const_int_operand")]
8490                          UNSPEC_SVE_CNT_PAT)))
8491           (match_operand:VNx8HI_ONLY 1 "register_operand")))]
8492   "TARGET_SVE"
8493 )
8494
8495 (define_insn "*aarch64_sve_<inc_dec><mode>_pat"
8496   [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
8497         (ANY_PLUS:VNx8HI
8498           (vec_duplicate:VNx8HI
8499             (match_operator:HI 5 "subreg_lowpart_operator"
8500               [(unspec:SI [(match_operand:DI 2 "const_int_operand")
8501                            (match_operand:DI 3 "const_int_operand")
8502                            (match_operand:DI 4 "const_int_operand")]
8503                           UNSPEC_SVE_CNT_PAT)]))
8504           (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))]
8505   "TARGET_SVE"
8506   {
8507     if (which_alternative == 1)
8508       output_asm_insn ("movprfx\t%0, %1", operands);
8509     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8510                                                  operands + 2);
8511   }
8512   [(set_attr "movprfx" "*,yes")]
8513 )
8514
8515 ;; -------------------------------------------------------------------------
8516 ;; ---- [INT] Decrement by the number of elements in a pattern (scalar)
8517 ;; -------------------------------------------------------------------------
8518 ;; Includes:
8519 ;; - DEC
8520 ;; - SQDEC
8521 ;; - UQDEC
8522 ;; -------------------------------------------------------------------------
8523
8524 ;; Decrement a DImode register by the number of elements in an svpattern.
8525 ;; See aarch64_sve_cnt_pat for the counting behavior.
8526 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8527   [(set (match_operand:DI 0 "register_operand" "=r")
8528         (ANY_MINUS:DI (match_operand:DI_ONLY 1 "register_operand" "0")
8529                       (zero_extend:DI
8530                         (unspec:SI [(match_operand:DI 2 "const_int_operand")
8531                                     (match_operand:DI 3 "const_int_operand")
8532                                     (match_operand:DI 4 "const_int_operand")]
8533                                    UNSPEC_SVE_CNT_PAT))))]
8534   "TARGET_SVE"
8535   {
8536     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0",
8537                                                  operands + 2);
8538   }
8539 )
8540
8541 ;; Decrement an SImode register by the number of elements in an svpattern
8542 ;; using modular arithmetic.  See aarch64_sve_cnt_pat for the counting
8543 ;; behavior.
8544 (define_insn "*aarch64_sve_decsi_pat"
8545   [(set (match_operand:SI 0 "register_operand" "=r")
8546         (minus:SI (match_operand:SI 1 "register_operand" "0")
8547                   (unspec:SI [(match_operand:DI 2 "const_int_operand")
8548                               (match_operand:DI 3 "const_int_operand")
8549                               (match_operand:DI 4 "const_int_operand")]
8550                              UNSPEC_SVE_CNT_PAT)))]
8551   "TARGET_SVE"
8552   {
8553     return aarch64_output_sve_cnt_pat_immediate ("dec", "%x0", operands + 2);
8554   }
8555 )
8556
8557 ;; Decrement an SImode register by the number of elements in an svpattern
8558 ;; using saturating arithmetic, extending the result to 64 bits.
8559 ;;
8560 ;; See aarch64_sve_cnt_pat for the counting behavior.
8561 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8562   [(set (match_operand:DI 0 "register_operand" "=r")
8563         (<paired_extend>:DI
8564           (SAT_MINUS:SI
8565             (match_operand:SI_ONLY 1 "register_operand" "0")
8566             (unspec:SI [(match_operand:DI 2 "const_int_operand")
8567                         (match_operand:DI 3 "const_int_operand")
8568                         (match_operand:DI 4 "const_int_operand")]
8569                        UNSPEC_SVE_CNT_PAT))))]
8570   "TARGET_SVE"
8571   {
8572     const char *registers = (<CODE> == SS_MINUS ? "%x0, %w0" : "%w0");
8573     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers,
8574                                                  operands + 2);
8575   }
8576 )
8577
8578 ;; -------------------------------------------------------------------------
8579 ;; ---- [INT] Decrement by the number of elements in a pattern (vector)
8580 ;; -------------------------------------------------------------------------
8581 ;; Includes:
8582 ;; - DEC
8583 ;; - SQDEC
8584 ;; - UQDEC
8585 ;; -------------------------------------------------------------------------
8586
8587 ;; Decrement a vector of DIs by the number of elements in an svpattern.
8588 ;; See aarch64_sve_cnt_pat for the counting behavior.
8589 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8590   [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
8591         (ANY_MINUS:VNx2DI
8592           (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")
8593           (vec_duplicate:VNx2DI
8594             (zero_extend:DI
8595               (unspec:SI [(match_operand:DI 2 "const_int_operand")
8596                           (match_operand:DI 3 "const_int_operand")
8597                           (match_operand:DI 4 "const_int_operand")]
8598                          UNSPEC_SVE_CNT_PAT)))))]
8599   "TARGET_SVE"
8600   {
8601     if (which_alternative == 1)
8602       output_asm_insn ("movprfx\t%0, %1", operands);
8603     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8604                                                  operands + 2);
8605   }
8606   [(set_attr "movprfx" "*,yes")]
8607 )
8608
8609 ;; Decrement a vector of SIs by the number of elements in an svpattern.
8610 ;; See aarch64_sve_cnt_pat for the counting behavior.
8611 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8612   [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
8613         (ANY_MINUS:VNx4SI
8614           (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")
8615           (vec_duplicate:VNx4SI
8616             (unspec:SI [(match_operand:DI 2 "const_int_operand")
8617                         (match_operand:DI 3 "const_int_operand")
8618                         (match_operand:DI 4 "const_int_operand")]
8619                        UNSPEC_SVE_CNT_PAT))))]
8620   "TARGET_SVE"
8621   {
8622     if (which_alternative == 1)
8623       output_asm_insn ("movprfx\t%0, %1", operands);
8624     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8625                                                  operands + 2);
8626   }
8627   [(set_attr "movprfx" "*,yes")]
8628 )
8629
8630 ;; Decrement a vector of HIs by the number of elements in an svpattern.
8631 ;; See aarch64_sve_cnt_pat for the counting behavior.
8632 (define_expand "@aarch64_sve_<inc_dec><mode>_pat"
8633   [(set (match_operand:VNx8HI 0 "register_operand")
8634         (ANY_MINUS:VNx8HI
8635           (match_operand:VNx8HI_ONLY 1 "register_operand")
8636           (vec_duplicate:VNx8HI
8637             (truncate:HI
8638               (unspec:SI [(match_operand:DI 2 "const_int_operand")
8639                           (match_operand:DI 3 "const_int_operand")
8640                           (match_operand:DI 4 "const_int_operand")]
8641                          UNSPEC_SVE_CNT_PAT)))))]
8642   "TARGET_SVE"
8643 )
8644
8645 (define_insn "*aarch64_sve_<inc_dec><mode>_pat"
8646   [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
8647         (ANY_MINUS:VNx8HI
8648           (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")
8649           (vec_duplicate:VNx8HI
8650             (match_operator:HI 5 "subreg_lowpart_operator"
8651               [(unspec:SI [(match_operand:DI 2 "const_int_operand")
8652                            (match_operand:DI 3 "const_int_operand")
8653                            (match_operand:DI 4 "const_int_operand")]
8654                           UNSPEC_SVE_CNT_PAT)]))))]
8655   "TARGET_SVE"
8656   {
8657     if (which_alternative == 1)
8658       output_asm_insn ("movprfx\t%0, %1", operands);
8659     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8660                                                  operands + 2);
8661   }
8662   [(set_attr "movprfx" "*,yes")]
8663 )
8664
8665 ;; -------------------------------------------------------------------------
8666 ;; ---- [INT] Count elements in a predicate (scalar)
8667 ;; -------------------------------------------------------------------------
8668 ;; Includes:
8669 ;; - CNTP
8670 ;; -------------------------------------------------------------------------
8671
8672 ;; Count the number of set bits in a predicate.  Operand 3 is true if
8673 ;; operand 1 is known to be all-true.
8674 (define_insn "@aarch64_pred_cntp<mode>"
8675   [(set (match_operand:DI 0 "register_operand" "=r")
8676         (zero_extend:DI
8677           (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upl")
8678                       (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8679                       (match_operand:PRED_ALL 3 "register_operand" "Upa")]
8680                      UNSPEC_CNTP)))]
8681   "TARGET_SVE"
8682   "cntp\t%x0, %1, %3.<Vetype>")
8683
8684 ;; -------------------------------------------------------------------------
8685 ;; ---- [INT] Increment by the number of elements in a predicate (scalar)
8686 ;; -------------------------------------------------------------------------
8687 ;; Includes:
8688 ;; - INCP
8689 ;; - SQINCP
8690 ;; - UQINCP
8691 ;; -------------------------------------------------------------------------
8692
8693 ;; Increment a DImode register by the number of set bits in a predicate.
8694 ;; See aarch64_sve_cntp for a description of the operands.
8695 (define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
8696   [(set (match_operand:DI 0 "register_operand")
8697         (ANY_PLUS:DI
8698           (zero_extend:DI
8699             (unspec:SI [(match_dup 3)
8700                         (const_int SVE_KNOWN_PTRUE)
8701                         (match_operand:PRED_ALL 2 "register_operand")]
8702                        UNSPEC_CNTP))
8703           (match_operand:DI_ONLY 1 "register_operand")))]
8704   "TARGET_SVE"
8705   {
8706     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
8707   }
8708 )
8709
8710 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
8711   [(set (match_operand:DI 0 "register_operand" "=r")
8712         (ANY_PLUS:DI
8713           (zero_extend:DI
8714             (unspec:SI [(match_operand 3)
8715                         (const_int SVE_KNOWN_PTRUE)
8716                         (match_operand:PRED_ALL 2 "register_operand" "Upa")]
8717                        UNSPEC_CNTP))
8718           (match_operand:DI_ONLY 1 "register_operand" "0")))]
8719   "TARGET_SVE"
8720   "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>"
8721   "&& !CONSTANT_P (operands[3])"
8722   {
8723     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
8724   }
8725 )
8726
8727 ;; Increment an SImode register by the number of set bits in a predicate
8728 ;; using modular arithmetic.  See aarch64_sve_cntp for a description of
8729 ;; the operands.
8730 (define_insn_and_rewrite "*aarch64_incsi<mode>_cntp"
8731   [(set (match_operand:SI 0 "register_operand" "=r")
8732         (plus:SI
8733           (unspec:SI [(match_operand 3)
8734                       (const_int SVE_KNOWN_PTRUE)
8735                       (match_operand:PRED_ALL 2 "register_operand" "Upa")]
8736                      UNSPEC_CNTP)
8737           (match_operand:SI 1 "register_operand" "0")))]
8738   "TARGET_SVE"
8739   "incp\t%x0, %2.<Vetype>"
8740   "&& !CONSTANT_P (operands[3])"
8741   {
8742     operands[3] = CONSTM1_RTX (<MODE>mode);
8743   }
8744 )
8745
8746 ;; Increment an SImode register by the number of set bits in a predicate
8747 ;; using saturating arithmetic, extending the result to 64 bits.
8748 ;;
8749 ;; See aarch64_sve_cntp for a description of the operands.
8750 (define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
8751   [(set (match_operand:DI 0 "register_operand")
8752         (<paired_extend>:DI
8753           (SAT_PLUS:SI
8754             (unspec:SI [(match_dup 3)
8755                         (const_int SVE_KNOWN_PTRUE)
8756                         (match_operand:PRED_ALL 2 "register_operand")]
8757                        UNSPEC_CNTP)
8758             (match_operand:SI_ONLY 1 "register_operand"))))]
8759   "TARGET_SVE"
8760   {
8761     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
8762   }
8763 )
8764
8765 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
8766   [(set (match_operand:DI 0 "register_operand" "=r")
8767         (<paired_extend>:DI
8768           (SAT_PLUS:SI
8769             (unspec:SI [(match_operand 3)
8770                         (const_int SVE_KNOWN_PTRUE)
8771                         (match_operand:PRED_ALL 2 "register_operand" "Upa")]
8772                        UNSPEC_CNTP)
8773             (match_operand:SI_ONLY 1 "register_operand" "0"))))]
8774   "TARGET_SVE"
8775   {
8776     if (<CODE> == SS_PLUS)
8777       return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0";
8778     else
8779       return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>";
8780   }
8781   "&& !CONSTANT_P (operands[3])"
8782   {
8783     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
8784   }
8785 )
8786
8787 ;; -------------------------------------------------------------------------
8788 ;; ---- [INT] Increment by the number of elements in a predicate (vector)
8789 ;; -------------------------------------------------------------------------
8790 ;; Includes:
8791 ;; - INCP
8792 ;; - SQINCP
8793 ;; - UQINCP
8794 ;; -------------------------------------------------------------------------
8795
8796 ;; Increment a vector of DIs by the number of set bits in a predicate.
8797 ;; See aarch64_sve_cntp for a description of the operands.
8798 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
8799   [(set (match_operand:VNx2DI 0 "register_operand")
8800         (ANY_PLUS:VNx2DI
8801           (vec_duplicate:VNx2DI
8802             (zero_extend:DI
8803               (unspec:SI
8804                 [(match_dup 3)
8805                  (const_int SVE_KNOWN_PTRUE)
8806                  (match_operand:<VPRED> 2 "register_operand")]
8807                 UNSPEC_CNTP)))
8808           (match_operand:VNx2DI_ONLY 1 "register_operand")))]
8809   "TARGET_SVE"
8810   {
8811     operands[3] = CONSTM1_RTX (<VPRED>mode);
8812   }
8813 )
8814
8815 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
8816   [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
8817         (ANY_PLUS:VNx2DI
8818           (vec_duplicate:VNx2DI
8819             (zero_extend:DI
8820               (unspec:SI
8821                 [(match_operand 3)
8822                  (const_int SVE_KNOWN_PTRUE)
8823                  (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
8824                 UNSPEC_CNTP)))
8825           (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))]
8826   "TARGET_SVE"
8827   "@
8828    <inc_dec>p\t%0.d, %2
8829    movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2"
8830   "&& !CONSTANT_P (operands[3])"
8831   {
8832     operands[3] = CONSTM1_RTX (<VPRED>mode);
8833   }
8834   [(set_attr "movprfx" "*,yes")]
8835 )
8836
8837 ;; Increment a vector of SIs by the number of set bits in a predicate.
8838 ;; See aarch64_sve_cntp for a description of the operands.
8839 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
8840   [(set (match_operand:VNx4SI 0 "register_operand")
8841         (ANY_PLUS:VNx4SI
8842           (vec_duplicate:VNx4SI
8843             (unspec:SI
8844               [(match_dup 3)
8845                (const_int SVE_KNOWN_PTRUE)
8846                (match_operand:<VPRED> 2 "register_operand")]
8847               UNSPEC_CNTP))
8848           (match_operand:VNx4SI_ONLY 1 "register_operand")))]
8849   "TARGET_SVE"
8850   {
8851     operands[3] = CONSTM1_RTX (<VPRED>mode);
8852   }
8853 )
8854
8855 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
8856   [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
8857         (ANY_PLUS:VNx4SI
8858           (vec_duplicate:VNx4SI
8859             (unspec:SI
8860               [(match_operand 3)
8861                (const_int SVE_KNOWN_PTRUE)
8862                (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
8863               UNSPEC_CNTP))
8864           (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
8865   "TARGET_SVE"
8866   "@
8867    <inc_dec>p\t%0.s, %2
8868    movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2"
8869   "&& !CONSTANT_P (operands[3])"
8870   {
8871     operands[3] = CONSTM1_RTX (<VPRED>mode);
8872   }
8873   [(set_attr "movprfx" "*,yes")]
8874 )
8875
8876 ;; Increment a vector of HIs by the number of set bits in a predicate.
8877 ;; See aarch64_sve_cntp for a description of the operands.
8878 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
8879   [(set (match_operand:VNx8HI 0 "register_operand")
8880         (ANY_PLUS:VNx8HI
8881           (vec_duplicate:VNx8HI
8882             (truncate:HI
8883               (unspec:SI
8884                 [(match_dup 3)
8885                  (const_int SVE_KNOWN_PTRUE)
8886                  (match_operand:<VPRED> 2 "register_operand")]
8887                 UNSPEC_CNTP)))
8888           (match_operand:VNx8HI_ONLY 1 "register_operand")))]
8889   "TARGET_SVE"
8890   {
8891     operands[3] = CONSTM1_RTX (<VPRED>mode);
8892   }
8893 )
8894
8895 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
8896   [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
8897         (ANY_PLUS:VNx8HI
8898           (vec_duplicate:VNx8HI
8899             (match_operator:HI 3 "subreg_lowpart_operator"
8900               [(unspec:SI
8901                  [(match_operand 4)
8902                   (const_int SVE_KNOWN_PTRUE)
8903                   (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
8904                  UNSPEC_CNTP)]))
8905           (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))]
8906   "TARGET_SVE"
8907   "@
8908    <inc_dec>p\t%0.h, %2
8909    movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2"
8910   "&& !CONSTANT_P (operands[4])"
8911   {
8912     operands[4] = CONSTM1_RTX (<VPRED>mode);
8913   }
8914   [(set_attr "movprfx" "*,yes")]
8915 )
8916
8917 ;; -------------------------------------------------------------------------
8918 ;; ---- [INT] Decrement by the number of elements in a predicate (scalar)
8919 ;; -------------------------------------------------------------------------
8920 ;; Includes:
8921 ;; - DECP
8922 ;; - SQDECP
8923 ;; - UQDECP
8924 ;; -------------------------------------------------------------------------
8925
8926 ;; Decrement a DImode register by the number of set bits in a predicate.
8927 ;; See aarch64_sve_cntp for a description of the operands.
8928 (define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
8929   [(set (match_operand:DI 0 "register_operand")
8930         (ANY_MINUS:DI
8931           (match_operand:DI_ONLY 1 "register_operand")
8932           (zero_extend:DI
8933             (unspec:SI [(match_dup 3)
8934                         (const_int SVE_KNOWN_PTRUE)
8935                         (match_operand:PRED_ALL 2 "register_operand")]
8936                        UNSPEC_CNTP))))]
8937   "TARGET_SVE"
8938   {
8939     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
8940   }
8941 )
8942
8943 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
8944   [(set (match_operand:DI 0 "register_operand" "=r")
8945         (ANY_MINUS:DI
8946           (match_operand:DI_ONLY 1 "register_operand" "0")
8947           (zero_extend:DI
8948             (unspec:SI [(match_operand 3)
8949                         (const_int SVE_KNOWN_PTRUE)
8950                         (match_operand:PRED_ALL 2 "register_operand" "Upa")]
8951                        UNSPEC_CNTP))))]
8952   "TARGET_SVE"
8953   "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>"
8954   "&& !CONSTANT_P (operands[3])"
8955   {
8956     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
8957   }
8958 )
8959
8960 ;; Decrement an SImode register by the number of set bits in a predicate
8961 ;; using modular arithmetic.  See aarch64_sve_cntp for a description of the
8962 ;; operands.
8963 (define_insn_and_rewrite "*aarch64_decsi<mode>_cntp"
8964   [(set (match_operand:SI 0 "register_operand" "=r")
8965         (minus:SI
8966           (match_operand:SI 1 "register_operand" "0")
8967           (unspec:SI [(match_operand 3)
8968                       (const_int SVE_KNOWN_PTRUE)
8969                       (match_operand:PRED_ALL 2 "register_operand" "Upa")]
8970                      UNSPEC_CNTP)))]
8971   "TARGET_SVE"
8972   "decp\t%x0, %2.<Vetype>"
8973   "&& !CONSTANT_P (operands[3])"
8974   {
8975     operands[3] = CONSTM1_RTX (<MODE>mode);
8976   }
8977 )
8978
8979 ;; Decrement an SImode register by the number of set bits in a predicate
8980 ;; using saturating arithmetic, extending the result to 64 bits.
8981 ;;
8982 ;; See aarch64_sve_cntp for a description of the operands.
8983 (define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
8984   [(set (match_operand:DI 0 "register_operand")
8985         (<paired_extend>:DI
8986           (SAT_MINUS:SI
8987             (match_operand:SI_ONLY 1 "register_operand")
8988             (unspec:SI [(match_dup 3)
8989                         (const_int SVE_KNOWN_PTRUE)
8990                         (match_operand:PRED_ALL 2 "register_operand")]
8991                        UNSPEC_CNTP))))]
8992   "TARGET_SVE"
8993   {
8994     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
8995   }
8996 )
8997
8998 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
8999   [(set (match_operand:DI 0 "register_operand" "=r")
9000         (<paired_extend>:DI
9001           (SAT_MINUS:SI
9002             (match_operand:SI_ONLY 1 "register_operand" "0")
9003             (unspec:SI [(match_operand 3)
9004                         (const_int SVE_KNOWN_PTRUE)
9005                         (match_operand:PRED_ALL 2 "register_operand" "Upa")]
9006                        UNSPEC_CNTP))))]
9007   "TARGET_SVE"
9008   {
9009     if (<CODE> == SS_MINUS)
9010       return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0";
9011     else
9012       return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>";
9013   }
9014   "&& !CONSTANT_P (operands[3])"
9015   {
9016     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
9017   }
9018 )
9019
9020 ;; -------------------------------------------------------------------------
9021 ;; ---- [INT] Decrement by the number of elements in a predicate (vector)
9022 ;; -------------------------------------------------------------------------
9023 ;; Includes:
9024 ;; - DECP
9025 ;; - SQDECP
9026 ;; - UQDECP
9027 ;; -------------------------------------------------------------------------
9028
9029 ;; Decrement a vector of DIs by the number of set bits in a predicate.
9030 ;; See aarch64_sve_cntp for a description of the operands.
9031 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
9032   [(set (match_operand:VNx2DI 0 "register_operand")
9033         (ANY_MINUS:VNx2DI
9034           (match_operand:VNx2DI_ONLY 1 "register_operand")
9035           (vec_duplicate:VNx2DI
9036             (zero_extend:DI
9037               (unspec:SI
9038                 [(match_dup 3)
9039                  (const_int SVE_KNOWN_PTRUE)
9040                  (match_operand:<VPRED> 2 "register_operand")]
9041                 UNSPEC_CNTP)))))]
9042   "TARGET_SVE"
9043   {
9044     operands[3] = CONSTM1_RTX (<VPRED>mode);
9045   }
9046 )
9047
9048 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
9049   [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
9050         (ANY_MINUS:VNx2DI
9051           (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")
9052           (vec_duplicate:VNx2DI
9053             (zero_extend:DI
9054               (unspec:SI
9055                 [(match_operand 3)
9056                  (const_int SVE_KNOWN_PTRUE)
9057                  (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
9058                 UNSPEC_CNTP)))))]
9059   "TARGET_SVE"
9060   "@
9061    <inc_dec>p\t%0.d, %2
9062    movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2"
9063   "&& !CONSTANT_P (operands[3])"
9064   {
9065     operands[3] = CONSTM1_RTX (<VPRED>mode);
9066   }
9067   [(set_attr "movprfx" "*,yes")]
9068 )
9069
9070 ;; Decrement a vector of SIs by the number of set bits in a predicate.
9071 ;; See aarch64_sve_cntp for a description of the operands.
9072 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
9073   [(set (match_operand:VNx4SI 0 "register_operand")
9074         (ANY_MINUS:VNx4SI
9075           (match_operand:VNx4SI_ONLY 1 "register_operand")
9076           (vec_duplicate:VNx4SI
9077             (unspec:SI
9078               [(match_dup 3)
9079                (const_int SVE_KNOWN_PTRUE)
9080                (match_operand:<VPRED> 2 "register_operand")]
9081               UNSPEC_CNTP))))]
9082   "TARGET_SVE"
9083   {
9084     operands[3] = CONSTM1_RTX (<VPRED>mode);
9085   }
9086 )
9087
9088 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
9089   [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
9090         (ANY_MINUS:VNx4SI
9091           (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")
9092           (vec_duplicate:VNx4SI
9093             (unspec:SI
9094               [(match_operand 3)
9095                (const_int SVE_KNOWN_PTRUE)
9096                (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
9097               UNSPEC_CNTP))))]
9098   "TARGET_SVE"
9099   "@
9100    <inc_dec>p\t%0.s, %2
9101    movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2"
9102   "&& !CONSTANT_P (operands[3])"
9103   {
9104     operands[3] = CONSTM1_RTX (<VPRED>mode);
9105   }
9106   [(set_attr "movprfx" "*,yes")]
9107 )
9108
9109 ;; Decrement a vector of HIs by the number of set bits in a predicate.
9110 ;; See aarch64_sve_cntp for a description of the operands.
9111 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
9112   [(set (match_operand:VNx8HI 0 "register_operand")
9113         (ANY_MINUS:VNx8HI
9114           (match_operand:VNx8HI_ONLY 1 "register_operand")
9115           (vec_duplicate:VNx8HI
9116             (truncate:HI
9117               (unspec:SI
9118                 [(match_dup 3)
9119                  (const_int SVE_KNOWN_PTRUE)
9120                  (match_operand:<VPRED> 2 "register_operand")]
9121                 UNSPEC_CNTP)))))]
9122   "TARGET_SVE"
9123   {
9124     operands[3] = CONSTM1_RTX (<VPRED>mode);
9125   }
9126 )
9127
9128 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
9129   [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
9130         (ANY_MINUS:VNx8HI
9131           (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")
9132           (vec_duplicate:VNx8HI
9133             (match_operator:HI 3 "subreg_lowpart_operator"
9134               [(unspec:SI
9135                  [(match_operand 4)
9136                   (const_int SVE_KNOWN_PTRUE)
9137                   (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
9138                  UNSPEC_CNTP)]))))]
9139   "TARGET_SVE"
9140   "@
9141    <inc_dec>p\t%0.h, %2
9142    movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2"
9143   "&& !CONSTANT_P (operands[4])"
9144   {
9145     operands[4] = CONSTM1_RTX (<VPRED>mode);
9146   }
9147   [(set_attr "movprfx" "*,yes")]
9148 )