gcc/config/aarch64/aarch64-sve.md

   1 ;; Machine description for AArch64 SVE.
   2 ;; Copyright (C) 2009-2019 Free Software Foundation, Inc.
   3 ;; Contributed by ARM Ltd.
   4 ;;
   5 ;; This file is part of GCC.
   6 ;;
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published by
   9 ;; the Free Software Foundation; either version 3, or (at your option)
  10 ;; any later version.
  11 ;;
  12 ;; GCC is distributed in the hope that it will be useful, but
  13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 ;; General Public License for more details.
  16 ;;
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 ;; Note on the handling of big-endian SVE
  22 ;; --------------------------------------
  23 ;;
  24 ;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the
  25 ;; same way as movdi or movti would: the first byte of memory goes
  26 ;; into the most significant byte of the register and the last byte
  27 ;; of memory goes into the least significant byte of the register.
  28 ;; This is the most natural ordering for Advanced SIMD and matches
  29 ;; the ABI layout for 64-bit and 128-bit vector types.
  30 ;;
  31 ;; As a result, the order of bytes within the register is what GCC
  32 ;; expects for a big-endian target, and subreg offsets therefore work
  33 ;; as expected, with the first element in memory having subreg offset 0
  34 ;; and the last element in memory having the subreg offset associated
  35 ;; with a big-endian lowpart.  However, this ordering also means that
  36 ;; GCC's lane numbering does not match the architecture's numbering:
  37 ;; GCC always treats the element at the lowest address in memory
  38 ;; (subreg offset 0) as element 0, while the architecture treats
  39 ;; the least significant end of the register as element 0.
  40 ;;
  41 ;; The situation for SVE is different.  We want the layout of the
  42 ;; SVE register to be same for mov<mode> as it is for maskload<mode>:
  43 ;; logically, a mov<mode> load must be indistinguishable from a
  44 ;; maskload<mode> whose mask is all true.  We therefore need the
  45 ;; register layout to match LD1 rather than LDR.  The ABI layout of
  46 ;; SVE types also matches LD1 byte ordering rather than LDR byte ordering.
  47 ;;
  48 ;; As a result, the architecture lane numbering matches GCC's lane
  49 ;; numbering, with element 0 always being the first in memory.
  50 ;; However:
  51 ;;
  52 ;; - Applying a subreg offset to a register does not give the element
  53 ;;   that GCC expects: the first element in memory has the subreg offset
  54 ;;   associated with a big-endian lowpart while the last element in memory
  55 ;;   has subreg offset 0.  We handle this via TARGET_CAN_CHANGE_MODE_CLASS.
  56 ;;
  57 ;; - We cannot use LDR and STR for spill slots that might be accessed
  58 ;;   via subregs, since although the elements have the order GCC expects,
  59 ;;   the order of the bytes within the elements is different.  We instead
  60 ;;   access spill slots via LD1 and ST1, using secondary reloads to
  61 ;;   reserve a predicate register.
  62
  63
  64 ;; SVE data moves.
  65 (define_expand "mov<mode>"
  66   [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
  67         (match_operand:SVE_ALL 1 "general_operand"))]
  68   "TARGET_SVE"
  69   {
  70     /* Use the predicated load and store patterns where possible.
  71        This is required for big-endian targets (see the comment at the
  72        head of the file) and increases the addressing choices for
  73        little-endian.  */
  74     if ((MEM_P (operands[0]) || MEM_P (operands[1]))
  75         && can_create_pseudo_p ())
  76       {
  77         aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
  78         DONE;
  79       }
  80
  81     if (CONSTANT_P (operands[1]))
  82       {
  83         aarch64_expand_mov_immediate (operands[0], operands[1],
  84                                       gen_vec_duplicate<mode>);
  85         DONE;
  86       }
  87
  88     /* Optimize subregs on big-endian targets: we can use REV[BHW]
  89        instead of going through memory.  */
  90     if (BYTES_BIG_ENDIAN
  91         && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1]))
  92       DONE;
  93   }
  94 )
  95
  96 ;; A pattern for optimizing SUBREGs that have a reinterpreting effect
  97 ;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move
  98 ;; for details.  We use a special predicate for operand 2 to reduce
  99 ;; the number of patterns.
 100 (define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be"
 101   [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w")
 102         (unspec:SVE_ALL
 103           [(match_operand:VNx16BI 1 "register_operand" "Upl")
 104            (match_operand 2 "aarch64_any_register_operand" "w")]
 105           UNSPEC_REV_SUBREG))]
 106   "TARGET_SVE && BYTES_BIG_ENDIAN"
 107   "#"
 108   "&& reload_completed"
 109   [(const_int 0)]
 110   {
 111     aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]);
 112     DONE;
 113   }
 114 )
 115
 116 ;; Unpredicated moves (little-endian).  Only allow memory operations
 117 ;; during and after RA; before RA we want the predicated load and
 118 ;; store patterns to be used instead.
 119 (define_insn "*aarch64_sve_mov<mode>_le"
 120   [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
 121         (match_operand:SVE_ALL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
 122   "TARGET_SVE
 123    && !BYTES_BIG_ENDIAN
 124    && ((lra_in_progress || reload_completed)
 125        || (register_operand (operands[0], <MODE>mode)
 126            && nonmemory_operand (operands[1], <MODE>mode)))"
 127   "@
 128    ldr\t%0, %1
 129    str\t%1, %0
 130    mov\t%0.d, %1.d
 131    * return aarch64_output_sve_mov_immediate (operands[1]);"
 132 )
 133
 134 ;; Unpredicated moves (big-endian).  Memory accesses require secondary
 135 ;; reloads.
 136 (define_insn "*aarch64_sve_mov<mode>_be"
 137   [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
 138         (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))]
 139   "TARGET_SVE && BYTES_BIG_ENDIAN"
 140   "@
 141    mov\t%0.d, %1.d
 142    * return aarch64_output_sve_mov_immediate (operands[1]);"
 143 )
 144
 145 ;; Handle big-endian memory reloads.  We use byte PTRUE for all modes
 146 ;; to try to encourage reuse.
 147 (define_expand "aarch64_sve_reload_be"
 148   [(parallel
 149      [(set (match_operand 0)
 150            (match_operand 1))
 151       (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])]
 152   "TARGET_SVE && BYTES_BIG_ENDIAN"
 153   {
 154     /* Create a PTRUE.  */
 155     emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
 156
 157     /* Refer to the PTRUE in the appropriate mode for this move.  */
 158     machine_mode mode = GET_MODE (operands[0]);
 159     machine_mode pred_mode
 160       = aarch64_sve_pred_mode (GET_MODE_UNIT_SIZE (mode)).require ();
 161     rtx pred = gen_lowpart (pred_mode, operands[2]);
 162
 163     /* Emit a predicated load or store.  */
 164     aarch64_emit_sve_pred_move (operands[0], pred, operands[1]);
 165     DONE;
 166   }
 167 )
 168
 169 ;; A predicated load or store for which the predicate is known to be
 170 ;; all-true.  Note that this pattern is generated directly by
 171 ;; aarch64_emit_sve_pred_move, so changes to this pattern will
 172 ;; need changes there as well.
 173 (define_insn_and_split "@aarch64_pred_mov<mode>"
 174   [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, w, m")
 175         (unspec:SVE_ALL
 176           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
 177            (match_operand:SVE_ALL 2 "nonimmediate_operand" "w, m, w")]
 178           UNSPEC_MERGE_PTRUE))]
 179   "TARGET_SVE
 180    && (register_operand (operands[0], <MODE>mode)
 181        || register_operand (operands[2], <MODE>mode))"
 182   "@
 183    #
 184    ld1<Vesize>\t%0.<Vetype>, %1/z, %2
 185    st1<Vesize>\t%2.<Vetype>, %1, %0"
 186   "&& register_operand (operands[0], <MODE>mode)
 187    && register_operand (operands[2], <MODE>mode)"
 188   [(set (match_dup 0) (match_dup 2))]
 189 )
 190
 191 (define_expand "movmisalign<mode>"
 192   [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
 193         (match_operand:SVE_ALL 1 "general_operand"))]
 194   "TARGET_SVE"
 195   {
 196     /* Equivalent to a normal move for our purpooses.  */
 197     emit_move_insn (operands[0], operands[1]);
 198     DONE;
 199   }
 200 )
 201
 202 (define_insn "maskload<mode><vpred>"
 203   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
 204         (unspec:SVE_ALL
 205           [(match_operand:<VPRED> 2 "register_operand" "Upl")
 206            (match_operand:SVE_ALL 1 "memory_operand" "m")]
 207           UNSPEC_LD1_SVE))]
 208   "TARGET_SVE"
 209   "ld1<Vesize>\t%0.<Vetype>, %2/z, %1"
 210 )
 211
 212 (define_insn "maskstore<mode><vpred>"
 213   [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
 214         (unspec:SVE_ALL [(match_operand:<VPRED> 2 "register_operand" "Upl")
 215                          (match_operand:SVE_ALL 1 "register_operand" "w")
 216                          (match_dup 0)]
 217                         UNSPEC_ST1_SVE))]
 218   "TARGET_SVE"
 219   "st1<Vesize>\t%1.<Vetype>, %2, %0"
 220 )
 221
 222 ;; Unpredicated gather loads.
 223 (define_expand "gather_load<mode>"
 224   [(set (match_operand:SVE_SD 0 "register_operand")
 225         (unspec:SVE_SD
 226           [(match_dup 5)
 227            (match_operand:DI 1 "aarch64_reg_or_zero")
 228            (match_operand:<V_INT_EQUIV> 2 "register_operand")
 229            (match_operand:DI 3 "const_int_operand")
 230            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
 231            (mem:BLK (scratch))]
 232           UNSPEC_LD1_GATHER))]
 233   "TARGET_SVE"
 234   {
 235     operands[5] = aarch64_ptrue_reg (<VPRED>mode);
 236   }
 237 )
 238
 239 ;; Predicated gather loads for 32-bit elements.  Operand 3 is true for
 240 ;; unsigned extension and false for signed extension.
 241 (define_insn "mask_gather_load<mode>"
 242   [(set (match_operand:SVE_S 0 "register_operand" "=w, w, w, w, w")
 243         (unspec:SVE_S
 244           [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl")
 245            (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk")
 246            (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w, w, w")
 247            (match_operand:DI 3 "const_int_operand" "i, Z, Ui1, Z, Ui1")
 248            (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i")
 249            (mem:BLK (scratch))]
 250           UNSPEC_LD1_GATHER))]
 251   "TARGET_SVE"
 252   "@
 253    ld1w\t%0.s, %5/z, [%2.s]
 254    ld1w\t%0.s, %5/z, [%1, %2.s, sxtw]
 255    ld1w\t%0.s, %5/z, [%1, %2.s, uxtw]
 256    ld1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
 257    ld1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
 258 )
 259
 260 ;; Predicated gather loads for 64-bit elements.  The value of operand 3
 261 ;; doesn't matter in this case.
 262 (define_insn "mask_gather_load<mode>"
 263   [(set (match_operand:SVE_D 0 "register_operand" "=w, w, w")
 264         (unspec:SVE_D
 265           [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl")
 266            (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk")
 267            (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w")
 268            (match_operand:DI 3 "const_int_operand")
 269            (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i")
 270            (mem:BLK (scratch))]
 271           UNSPEC_LD1_GATHER))]
 272   "TARGET_SVE"
 273   "@
 274    ld1d\t%0.d, %5/z, [%2.d]
 275    ld1d\t%0.d, %5/z, [%1, %2.d]
 276    ld1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
 277 )
 278
 279 ;; Unpredicated scatter store.
 280 (define_expand "scatter_store<mode>"
 281   [(set (mem:BLK (scratch))
 282         (unspec:BLK
 283           [(match_dup 5)
 284            (match_operand:DI 0 "aarch64_reg_or_zero")
 285            (match_operand:<V_INT_EQUIV> 1 "register_operand")
 286            (match_operand:DI 2 "const_int_operand")
 287            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
 288            (match_operand:SVE_SD 4 "register_operand")]
 289           UNSPEC_ST1_SCATTER))]
 290   "TARGET_SVE"
 291   {
 292     operands[5] = aarch64_ptrue_reg (<VPRED>mode);
 293   }
 294 )
 295
 296 ;; Predicated scatter stores for 32-bit elements.  Operand 2 is true for
 297 ;; unsigned extension and false for signed extension.
 298 (define_insn "mask_scatter_store<mode>"
 299   [(set (mem:BLK (scratch))
 300         (unspec:BLK
 301           [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl")
 302            (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk")
 303            (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w, w, w")
 304            (match_operand:DI 2 "const_int_operand" "i, Z, Ui1, Z, Ui1")
 305            (match_operand:DI 3 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i")
 306            (match_operand:SVE_S 4 "register_operand" "w, w, w, w, w")]
 307           UNSPEC_ST1_SCATTER))]
 308   "TARGET_SVE"
 309   "@
 310    st1w\t%4.s, %5, [%1.s]
 311    st1w\t%4.s, %5, [%0, %1.s, sxtw]
 312    st1w\t%4.s, %5, [%0, %1.s, uxtw]
 313    st1w\t%4.s, %5, [%0, %1.s, sxtw %p3]
 314    st1w\t%4.s, %5, [%0, %1.s, uxtw %p3]"
 315 )
 316
 317 ;; Predicated scatter stores for 64-bit elements.  The value of operand 2
 318 ;; doesn't matter in this case.
 319 (define_insn "mask_scatter_store<mode>"
 320   [(set (mem:BLK (scratch))
 321         (unspec:BLK
 322           [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl")
 323            (match_operand:DI 0 "aarch64_reg_or_zero" "Z, rk, rk")
 324            (match_operand:<V_INT_EQUIV> 1 "register_operand" "w, w, w")
 325            (match_operand:DI 2 "const_int_operand")
 326            (match_operand:DI 3 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i")
 327            (match_operand:SVE_D 4 "register_operand" "w, w, w")]
 328           UNSPEC_ST1_SCATTER))]
 329   "TARGET_SVE"
 330   "@
 331    st1d\t%4.d, %5, [%1.d]
 332    st1d\t%4.d, %5, [%0, %1.d]
 333    st1d\t%4.d, %5, [%0, %1.d, lsl %p3]"
 334 )
 335
 336 ;; SVE structure moves.
 337 (define_expand "mov<mode>"
 338   [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand")
 339         (match_operand:SVE_STRUCT 1 "general_operand"))]
 340   "TARGET_SVE"
 341   {
 342     /* Big-endian loads and stores need to be done via LD1 and ST1;
 343        see the comment at the head of the file for details.  */
 344     if ((MEM_P (operands[0]) || MEM_P (operands[1]))
 345         && BYTES_BIG_ENDIAN)
 346       {
 347         gcc_assert (can_create_pseudo_p ());
 348         aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
 349         DONE;
 350       }
 351
 352     if (CONSTANT_P (operands[1]))
 353       {
 354         aarch64_expand_mov_immediate (operands[0], operands[1]);
 355         DONE;
 356       }
 357   }
 358 )
 359
 360 ;; Unpredicated structure moves (little-endian).
 361 (define_insn "*aarch64_sve_mov<mode>_le"
 362   [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
 363         (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
 364   "TARGET_SVE && !BYTES_BIG_ENDIAN"
 365   "#"
 366   [(set_attr "length" "<insn_length>")]
 367 )
 368
 369 ;; Unpredicated structure moves (big-endian).  Memory accesses require
 370 ;; secondary reloads.
 371 (define_insn "*aarch64_sve_mov<mode>_le"
 372   [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w")
 373         (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))]
 374   "TARGET_SVE && BYTES_BIG_ENDIAN"
 375   "#"
 376   [(set_attr "length" "<insn_length>")]
 377 )
 378
 379 ;; Split unpredicated structure moves into pieces.  This is the same
 380 ;; for both big-endian and little-endian code, although it only needs
 381 ;; to handle memory operands for little-endian code.
 382 (define_split
 383   [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand")
 384         (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))]
 385   "TARGET_SVE && reload_completed"
 386   [(const_int 0)]
 387   {
 388     rtx dest = operands[0];
 389     rtx src = operands[1];
 390     if (REG_P (dest) && REG_P (src))
 391       aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>);
 392     else
 393       for (unsigned int i = 0; i < <vector_count>; ++i)
 394         {
 395           rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode,
 396                                              i * BYTES_PER_SVE_VECTOR);
 397           rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode,
 398                                             i * BYTES_PER_SVE_VECTOR);
 399           emit_insn (gen_rtx_SET (subdest, subsrc));
 400         }
 401     DONE;
 402   }
 403 )
 404
 405 ;; Predicated structure moves.  This works for both endiannesses but in
 406 ;; practice is only useful for big-endian.
 407 (define_insn_and_split "@aarch64_pred_mov<mode>"
 408   [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, w, Utx")
 409         (unspec:SVE_STRUCT
 410           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
 411            (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "w, Utx, w")]
 412           UNSPEC_MERGE_PTRUE))]
 413   "TARGET_SVE
 414    && (register_operand (operands[0], <MODE>mode)
 415        || register_operand (operands[2], <MODE>mode))"
 416   "#"
 417   "&& reload_completed"
 418   [(const_int 0)]
 419   {
 420     for (unsigned int i = 0; i < <vector_count>; ++i)
 421       {
 422         rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0],
 423                                            <MODE>mode,
 424                                            i * BYTES_PER_SVE_VECTOR);
 425         rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2],
 426                                           <MODE>mode,
 427                                           i * BYTES_PER_SVE_VECTOR);
 428         aarch64_emit_sve_pred_move (subdest, operands[1], subsrc);
 429       }
 430     DONE;
 431   }
 432   [(set_attr "length" "<insn_length>")]
 433 )
 434
 435 (define_expand "mov<mode>"
 436   [(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
 437         (match_operand:PRED_ALL 1 "general_operand"))]
 438   "TARGET_SVE"
 439   {
 440     if (GET_CODE (operands[0]) == MEM)
 441       operands[1] = force_reg (<MODE>mode, operands[1]);
 442   }
 443 )
 444
 445 (define_insn "*aarch64_sve_mov<mode>"
 446   [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa, Upa")
 447         (match_operand:PRED_ALL 1 "general_operand" "Upa, Upa, m, Dz, Dm"))]
 448   "TARGET_SVE
 449    && (register_operand (operands[0], <MODE>mode)
 450        || register_operand (operands[1], <MODE>mode))"
 451   "@
 452    mov\t%0.b, %1.b
 453    str\t%1, %0
 454    ldr\t%0, %1
 455    pfalse\t%0.b
 456    * return aarch64_output_ptrue (<MODE>mode, '<Vetype>');"
 457 )
 458
 459 ;; Handle extractions from a predicate by converting to an integer vector
 460 ;; and extracting from there.
 461 (define_expand "vec_extract<vpred><Vel>"
 462   [(match_operand:<VEL> 0 "register_operand")
 463    (match_operand:<VPRED> 1 "register_operand")
 464    (match_operand:SI 2 "nonmemory_operand")
 465    ;; Dummy operand to which we can attach the iterator.
 466    (reg:SVE_I V0_REGNUM)]
 467   "TARGET_SVE"
 468   {
 469     rtx tmp = gen_reg_rtx (<MODE>mode);
 470     emit_insn (gen_aarch64_sve_dup<mode>_const (tmp, operands[1],
 471                                                 CONST1_RTX (<MODE>mode),
 472                                                 CONST0_RTX (<MODE>mode)));
 473     emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
 474     DONE;
 475   }
 476 )
 477
 478 (define_expand "vec_extract<mode><Vel>"
 479   [(set (match_operand:<VEL> 0 "register_operand")
 480         (vec_select:<VEL>
 481           (match_operand:SVE_ALL 1 "register_operand")
 482           (parallel [(match_operand:SI 2 "nonmemory_operand")])))]
 483   "TARGET_SVE"
 484   {
 485     poly_int64 val;
 486     if (poly_int_rtx_p (operands[2], &val)
 487         && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
 488       {
 489         /* The last element can be extracted with a LASTB and a false
 490            predicate.  */
 491         rtx sel = force_reg (<VPRED>mode, CONST0_RTX (<VPRED>mode));
 492         emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
 493         DONE;
 494       }
 495     if (!CONST_INT_P (operands[2]))
 496       {
 497         /* Create an index with operand[2] as the base and -1 as the step.
 498            It will then be zero for the element we care about.  */
 499         rtx index = gen_lowpart (<VEL_INT>mode, operands[2]);
 500         index = force_reg (<VEL_INT>mode, index);
 501         rtx series = gen_reg_rtx (<V_INT_EQUIV>mode);
 502         emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx));
 503
 504         /* Get a predicate that is true for only that element.  */
 505         rtx zero = CONST0_RTX (<V_INT_EQUIV>mode);
 506         rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero);
 507         rtx sel = gen_reg_rtx (<VPRED>mode);
 508         emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero));
 509
 510         /* Select the element using LASTB.  */
 511         emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
 512         DONE;
 513       }
 514   }
 515 )
 516
 517 ;; Extract element zero.  This is a special case because we want to force
 518 ;; the registers to be the same for the second alternative, and then
 519 ;; split the instruction into nothing after RA.
 520 (define_insn_and_split "*vec_extract<mode><Vel>_0"
 521   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
 522         (vec_select:<VEL>
 523           (match_operand:SVE_ALL 1 "register_operand" "w, 0, w")
 524           (parallel [(const_int 0)])))]
 525   "TARGET_SVE"
 526   {
 527     operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
 528     switch (which_alternative)
 529       {
 530         case 0:
 531           return "umov\\t%<vwcore>0, %1.<Vetype>[0]";
 532         case 1:
 533           return "#";
 534         case 2:
 535           return "st1\\t{%1.<Vetype>}[0], %0";
 536         default:
 537           gcc_unreachable ();
 538       }
 539   }
 540   "&& reload_completed
 541    && REG_P (operands[0])
 542    && REGNO (operands[0]) == REGNO (operands[1])"
 543   [(const_int 0)]
 544   {
 545     emit_note (NOTE_INSN_DELETED);
 546     DONE;
 547   }
 548   [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")]
 549 )
 550
 551 ;; Extract an element from the Advanced SIMD portion of the register.
 552 ;; We don't just reuse the aarch64-simd.md pattern because we don't
 553 ;; want any change in lane number on big-endian targets.
 554 (define_insn "*vec_extract<mode><Vel>_v128"
 555   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
 556         (vec_select:<VEL>
 557           (match_operand:SVE_ALL 1 "register_operand" "w, w, w")
 558           (parallel [(match_operand:SI 2 "const_int_operand")])))]
 559   "TARGET_SVE
 560    && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)"
 561   {
 562     operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
 563     switch (which_alternative)
 564       {
 565         case 0:
 566           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
 567         case 1:
 568           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
 569         case 2:
 570           return "st1\\t{%1.<Vetype>}[%2], %0";
 571         default:
 572           gcc_unreachable ();
 573       }
 574   }
 575   [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")]
 576 )
 577
 578 ;; Extract an element in the range of DUP.  This pattern allows the
 579 ;; source and destination to be different.
 580 (define_insn "*vec_extract<mode><Vel>_dup"
 581   [(set (match_operand:<VEL> 0 "register_operand" "=w")
 582         (vec_select:<VEL>
 583           (match_operand:SVE_ALL 1 "register_operand" "w")
 584           (parallel [(match_operand:SI 2 "const_int_operand")])))]
 585   "TARGET_SVE
 586    && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)"
 587   {
 588     operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
 589     return "dup\t%0.<Vetype>, %1.<Vetype>[%2]";
 590   }
 591 )
 592
 593 ;; Extract an element outside the range of DUP.  This pattern requires the
 594 ;; source and destination to be the same.
 595 (define_insn "*vec_extract<mode><Vel>_ext"
 596   [(set (match_operand:<VEL> 0 "register_operand" "=w")
 597         (vec_select:<VEL>
 598           (match_operand:SVE_ALL 1 "register_operand" "0")
 599           (parallel [(match_operand:SI 2 "const_int_operand")])))]
 600   "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64"
 601   {
 602     operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
 603     operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode));
 604     return "ext\t%0.b, %0.b, %0.b, #%2";
 605   }
 606 )
 607
 608 ;; Extract the last active element of operand 1 into operand 0.
 609 ;; If no elements are active, extract the last inactive element instead.
 610 (define_insn "extract_last_<mode>"
 611   [(set (match_operand:<VEL> 0 "register_operand" "=r, w")
 612         (unspec:<VEL>
 613           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
 614            (match_operand:SVE_ALL 2 "register_operand" "w, w")]
 615           UNSPEC_LASTB))]
 616   "TARGET_SVE"
 617   "@
 618    lastb\t%<vwcore>0, %1, %2.<Vetype>
 619    lastb\t%<Vetype>0, %1, %2.<Vetype>"
 620 )
 621
 622 (define_expand "vec_duplicate<mode>"
 623   [(parallel
 624     [(set (match_operand:SVE_ALL 0 "register_operand")
 625           (vec_duplicate:SVE_ALL
 626             (match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
 627      (clobber (scratch:<VPRED>))])]
 628   "TARGET_SVE"
 629   {
 630     if (MEM_P (operands[1]))
 631       {
 632         rtx ptrue = aarch64_ptrue_reg (<VPRED>mode);
 633         emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1],
 634                                        CONST0_RTX (<MODE>mode)));
 635         DONE;
 636       }
 637   }
 638 )
 639
 640 ;; Accept memory operands for the benefit of combine, and also in case
 641 ;; the scalar input gets spilled to memory during RA.  We want to split
 642 ;; the load at the first opportunity in order to allow the PTRUE to be
 643 ;; optimized with surrounding code.
 644 (define_insn_and_split "*vec_duplicate<mode>_reg"
 645   [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w")
 646         (vec_duplicate:SVE_ALL
 647           (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty")))
 648    (clobber (match_scratch:<VPRED> 2 "=X, X, Upl"))]
 649   "TARGET_SVE"
 650   "@
 651    mov\t%0.<Vetype>, %<vwcore>1
 652    mov\t%0.<Vetype>, %<Vetype>1
 653    #"
 654   "&& MEM_P (operands[1])"
 655   [(const_int 0)]
 656   {
 657     if (GET_CODE (operands[2]) == SCRATCH)
 658       operands[2] = gen_reg_rtx (<VPRED>mode);
 659     emit_move_insn (operands[2], CONSTM1_RTX (<VPRED>mode));
 660     emit_insn (gen_sve_ld1r<mode> (operands[0], operands[2], operands[1],
 661                                    CONST0_RTX (<MODE>mode)));
 662     DONE;
 663   }
 664   [(set_attr "length" "4,4,8")]
 665 )
 666
 667 ;; This is used for vec_duplicate<mode>s from memory, but can also
 668 ;; be used by combine to optimize selects of a a vec_duplicate<mode>
 669 ;; with zero.
 670 (define_insn "sve_ld1r<mode>"
 671   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
 672         (unspec:SVE_ALL
 673           [(match_operand:<VPRED> 1 "register_operand" "Upl")
 674            (vec_duplicate:SVE_ALL
 675              (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty"))
 676            (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")]
 677           UNSPEC_SEL))]
 678   "TARGET_SVE"
 679   "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2"
 680 )
 681
 682 ;; Load 128 bits from memory and duplicate to fill a vector.  Since there
 683 ;; are so few operations on 128-bit "elements", we don't define a VNx1TI
 684 ;; and simply use vectors of bytes instead.
 685 (define_insn "*sve_ld1rq<Vesize>"
 686   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
 687         (unspec:SVE_ALL
 688           [(match_operand:<VPRED> 1 "register_operand" "Upl")
 689            (match_operand:TI 2 "aarch64_sve_ld1r_operand" "Uty")]
 690           UNSPEC_LD1RQ))]
 691   "TARGET_SVE"
 692   "ld1rq<Vesize>\t%0.<Vetype>, %1/z, %2"
 693 )
 694
 695 ;; Implement a predicate broadcast by shifting the low bit of the scalar
 696 ;; input into the top bit and using a WHILELO.  An alternative would be to
 697 ;; duplicate the input and do a compare with zero.
 698 (define_expand "vec_duplicate<mode>"
 699   [(set (match_operand:PRED_ALL 0 "register_operand")
 700         (vec_duplicate:PRED_ALL (match_operand 1 "register_operand")))]
 701   "TARGET_SVE"
 702   {
 703     rtx tmp = gen_reg_rtx (DImode);
 704     rtx op1 = gen_lowpart (DImode, operands[1]);
 705     emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
 706     emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
 707     DONE;
 708   }
 709 )
 710
 711 (define_insn "vec_series<mode>"
 712   [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w")
 713         (vec_series:SVE_I
 714           (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r")
 715           (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))]
 716   "TARGET_SVE"
 717   "@
 718    index\t%0.<Vetype>, #%1, %<vw>2
 719    index\t%0.<Vetype>, %<vw>1, #%2
 720    index\t%0.<Vetype>, %<vw>1, %<vw>2"
 721 )
 722
 723 ;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range
 724 ;; of an INDEX instruction.
 725 (define_insn "*vec_series<mode>_plus"
 726   [(set (match_operand:SVE_I 0 "register_operand" "=w")
 727         (plus:SVE_I
 728           (vec_duplicate:SVE_I
 729             (match_operand:<VEL> 1 "register_operand" "r"))
 730           (match_operand:SVE_I 2 "immediate_operand")))]
 731   "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])"
 732   {
 733     operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]);
 734     return "index\t%0.<Vetype>, %<vw>1, #%2";
 735   }
 736 )
 737
 738 ;; Unpredicated LD[234].
 739 (define_expand "vec_load_lanes<mode><vsingle>"
 740   [(set (match_operand:SVE_STRUCT 0 "register_operand")
 741         (unspec:SVE_STRUCT
 742           [(match_dup 2)
 743            (match_operand:SVE_STRUCT 1 "memory_operand")]
 744           UNSPEC_LDN))]
 745   "TARGET_SVE"
 746   {
 747     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
 748   }
 749 )
 750
 751 ;; Predicated LD[234].
 752 (define_insn "vec_mask_load_lanes<mode><vsingle>"
 753   [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
 754         (unspec:SVE_STRUCT
 755           [(match_operand:<VPRED> 2 "register_operand" "Upl")
 756            (match_operand:SVE_STRUCT 1 "memory_operand" "m")]
 757           UNSPEC_LDN))]
 758   "TARGET_SVE"
 759   "ld<vector_count><Vesize>\t%0, %2/z, %1"
 760 )
 761
 762 ;; Unpredicated ST[234].  This is always a full update, so the dependence
 763 ;; on the old value of the memory location (via (match_dup 0)) is redundant.
 764 ;; There doesn't seem to be any obvious benefit to treating the all-true
 765 ;; case differently though.  In particular, it's very unlikely that we'll
 766 ;; only find out during RTL that a store_lanes is dead.
 767 (define_expand "vec_store_lanes<mode><vsingle>"
 768   [(set (match_operand:SVE_STRUCT 0 "memory_operand")
 769         (unspec:SVE_STRUCT
 770           [(match_dup 2)
 771            (match_operand:SVE_STRUCT 1 "register_operand")
 772            (match_dup 0)]
 773           UNSPEC_STN))]
 774   "TARGET_SVE"
 775   {
 776     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
 777   }
 778 )
 779
 780 ;; Predicated ST[234].
 781 (define_insn "vec_mask_store_lanes<mode><vsingle>"
 782   [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m")
 783         (unspec:SVE_STRUCT
 784           [(match_operand:<VPRED> 2 "register_operand" "Upl")
 785            (match_operand:SVE_STRUCT 1 "register_operand" "w")
 786            (match_dup 0)]
 787           UNSPEC_STN))]
 788   "TARGET_SVE"
 789   "st<vector_count><Vesize>\t%1, %2, %0"
 790 )
 791
 792 (define_expand "vec_perm<mode>"
 793   [(match_operand:SVE_ALL 0 "register_operand")
 794    (match_operand:SVE_ALL 1 "register_operand")
 795    (match_operand:SVE_ALL 2 "register_operand")
 796    (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")]
 797   "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
 798   {
 799     aarch64_expand_sve_vec_perm (operands[0], operands[1],
 800                                  operands[2], operands[3]);
 801     DONE;
 802   }
 803 )
 804
 805 (define_insn "*aarch64_sve_tbl<mode>"
 806   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
 807         (unspec:SVE_ALL
 808           [(match_operand:SVE_ALL 1 "register_operand" "w")
 809            (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
 810           UNSPEC_TBL))]
 811   "TARGET_SVE"
 812   "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
 813 )
 814
 815 (define_insn "*aarch64_sve_<perm_insn><perm_hilo><mode>"
 816   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
 817         (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")
 818                           (match_operand:PRED_ALL 2 "register_operand" "Upa")]
 819                          PERMUTE))]
 820   "TARGET_SVE"
 821   "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
 822 )
 823
 824 (define_insn "aarch64_sve_<perm_insn><perm_hilo><mode>"
 825   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
 826         (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")
 827                          (match_operand:SVE_ALL 2 "register_operand" "w")]
 828                         PERMUTE))]
 829   "TARGET_SVE"
 830   "<perm_insn><perm_hilo>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
 831 )
 832
 833 (define_insn "*aarch64_sve_rev64<mode>"
 834   [(set (match_operand:SVE_BHS 0 "register_operand" "=w")
 835         (unspec:SVE_BHS
 836           [(match_operand:VNx2BI 1 "register_operand" "Upl")
 837            (unspec:SVE_BHS [(match_operand:SVE_BHS 2 "register_operand" "w")]
 838                            UNSPEC_REV64)]
 839           UNSPEC_MERGE_PTRUE))]
 840   "TARGET_SVE"
 841   "rev<Vesize>\t%0.d, %1/m, %2.d"
 842 )
 843
 844 (define_insn "*aarch64_sve_rev32<mode>"
 845   [(set (match_operand:SVE_BH 0 "register_operand" "=w")
 846         (unspec:SVE_BH
 847           [(match_operand:VNx4BI 1 "register_operand" "Upl")
 848            (unspec:SVE_BH [(match_operand:SVE_BH 2 "register_operand" "w")]
 849                           UNSPEC_REV32)]
 850           UNSPEC_MERGE_PTRUE))]
 851   "TARGET_SVE"
 852   "rev<Vesize>\t%0.s, %1/m, %2.s"
 853 )
 854
 855 (define_insn "*aarch64_sve_rev16vnx16qi"
 856   [(set (match_operand:VNx16QI 0 "register_operand" "=w")
 857         (unspec:VNx16QI
 858           [(match_operand:VNx8BI 1 "register_operand" "Upl")
 859            (unspec:VNx16QI [(match_operand:VNx16QI 2 "register_operand" "w")]
 860                            UNSPEC_REV16)]
 861           UNSPEC_MERGE_PTRUE))]
 862   "TARGET_SVE"
 863   "revb\t%0.h, %1/m, %2.h"
 864 )
 865
 866 (define_insn "@aarch64_sve_rev<mode>"
 867   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
 868         (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "w")]
 869                         UNSPEC_REV))]
 870   "TARGET_SVE"
 871   "rev\t%0.<Vetype>, %1.<Vetype>")
 872
 873 (define_insn "*aarch64_sve_dup_lane<mode>"
 874   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
 875         (vec_duplicate:SVE_ALL
 876           (vec_select:<VEL>
 877             (match_operand:SVE_ALL 1 "register_operand" "w")
 878             (parallel [(match_operand:SI 2 "const_int_operand")]))))]
 879   "TARGET_SVE
 880    && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 63)"
 881   "dup\t%0.<Vetype>, %1.<Vetype>[%2]"
 882 )
 883
 884 ;; Note that the immediate (third) operand is the lane index not
 885 ;; the byte index.
 886 (define_insn "*aarch64_sve_ext<mode>"
 887   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
 888         (unspec:SVE_ALL [(match_operand:SVE_ALL 1 "register_operand" "0")
 889                          (match_operand:SVE_ALL 2 "register_operand" "w")
 890                          (match_operand:SI 3 "const_int_operand")]
 891                         UNSPEC_EXT))]
 892   "TARGET_SVE
 893    && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode), 0, 255)"
 894   {
 895     operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode));
 896     return "ext\\t%0.b, %0.b, %2.b, #%3";
 897   }
 898 )
 899
 900 (define_insn "add<mode>3"
 901   [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, w")
 902         (plus:SVE_I
 903           (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w")
 904           (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, w")))]
 905   "TARGET_SVE"
 906   "@
 907    add\t%0.<Vetype>, %0.<Vetype>, #%D2
 908    sub\t%0.<Vetype>, %0.<Vetype>, #%N2
 909    * return aarch64_output_sve_inc_dec_immediate (\"%0.<Vetype>\", operands[2]);
 910    add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
 911 )
 912
 913 (define_insn "sub<mode>3"
 914   [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
 915         (minus:SVE_I
 916           (match_operand:SVE_I 1 "aarch64_sve_arith_operand" "w, vsa")
 917           (match_operand:SVE_I 2 "register_operand" "w, 0")))]
 918   "TARGET_SVE"
 919   "@
 920    sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
 921    subr\t%0.<Vetype>, %0.<Vetype>, #%D1"
 922 )
 923
 924 ;; Unpredicated multiplication.
 925 (define_expand "mul<mode>3"
 926   [(set (match_operand:SVE_I 0 "register_operand")
 927         (unspec:SVE_I
 928           [(match_dup 3)
 929            (mult:SVE_I
 930              (match_operand:SVE_I 1 "register_operand")
 931              (match_operand:SVE_I 2 "aarch64_sve_mul_operand"))]
 932           UNSPEC_MERGE_PTRUE))]
 933   "TARGET_SVE"
 934   {
 935     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
 936   }
 937 )
 938
 939 ;; Multiplication predicated with a PTRUE.  We don't actually need the
 940 ;; predicate for the first alternative, but using Upa or X isn't likely
 941 ;; to gain much and would make the instruction seem less uniform to the
 942 ;; register allocator.
 943 (define_insn_and_split "*mul<mode>3"
 944   [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
 945         (unspec:SVE_I
 946           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
 947            (mult:SVE_I
 948              (match_operand:SVE_I 2 "register_operand" "%0, 0, w")
 949              (match_operand:SVE_I 3 "aarch64_sve_mul_operand" "vsm, w, w"))]
 950           UNSPEC_MERGE_PTRUE))]
 951   "TARGET_SVE"
 952   "@
 953    #
 954    mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
 955    movprfx\t%0, %2\;mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
 956   ; Split the unpredicated form after reload, so that we don't have
 957   ; the unnecessary PTRUE.
 958   "&& reload_completed
 959    && !register_operand (operands[3], <MODE>mode)"
 960   [(set (match_dup 0) (mult:SVE_I (match_dup 2) (match_dup 3)))]
 961   ""
 962   [(set_attr "movprfx" "*,*,yes")]
 963 )
 964
 965 ;; Unpredicated multiplications by a constant (post-RA only).
 966 ;; These are generated by splitting a predicated instruction whose
 967 ;; predicate is unused.
 968 (define_insn "*post_ra_mul<mode>3"
 969   [(set (match_operand:SVE_I 0 "register_operand" "=w")
 970         (mult:SVE_I
 971           (match_operand:SVE_I 1 "register_operand" "0")
 972           (match_operand:SVE_I 2 "aarch64_sve_mul_immediate")))]
 973   "TARGET_SVE && reload_completed"
 974   "mul\t%0.<Vetype>, %0.<Vetype>, #%2"
 975 )
 976
 977 (define_insn "*madd<mode>"
 978   [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
 979         (plus:SVE_I
 980           (unspec:SVE_I
 981             [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
 982              (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w")
 983                          (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
 984             UNSPEC_MERGE_PTRUE)
 985           (match_operand:SVE_I 4 "register_operand" "w, 0, w")))]
 986   "TARGET_SVE"
 987   "@
 988    mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
 989    mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
 990    movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
 991   [(set_attr "movprfx" "*,*,yes")]
 992 )
 993
 994 (define_insn "*msub<mode>3"
 995   [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
 996         (minus:SVE_I
 997           (match_operand:SVE_I 4 "register_operand" "w, 0, w")
 998           (unspec:SVE_I
 999             [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
1000              (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w")
1001                          (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
1002             UNSPEC_MERGE_PTRUE)))]
1003   "TARGET_SVE"
1004   "@
1005    msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
1006    mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
1007    movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
1008   [(set_attr "movprfx" "*,*,yes")]
1009 )
1010
1011 ;; Unpredicated highpart multiplication.
1012 (define_expand "<su>mul<mode>3_highpart"
1013   [(set (match_operand:SVE_I 0 "register_operand")
1014         (unspec:SVE_I
1015           [(match_dup 3)
1016            (unspec:SVE_I [(match_operand:SVE_I 1 "register_operand")
1017                           (match_operand:SVE_I 2 "register_operand")]
1018                          MUL_HIGHPART)]
1019           UNSPEC_MERGE_PTRUE))]
1020   "TARGET_SVE"
1021   {
1022     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
1023   }
1024 )
1025
1026 ;; Predicated highpart multiplication.
1027 (define_insn "*<su>mul<mode>3_highpart"
1028   [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
1029         (unspec:SVE_I
1030           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1031            (unspec:SVE_I [(match_operand:SVE_I 2 "register_operand" "%0, w")
1032                           (match_operand:SVE_I 3 "register_operand" "w, w")]
1033                          MUL_HIGHPART)]
1034           UNSPEC_MERGE_PTRUE))]
1035   "TARGET_SVE"
1036   "@
1037    <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1038    movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1039   [(set_attr "movprfx" "*,yes")]
1040 )
1041
1042 ;; Unpredicated division.
1043 (define_expand "<optab><mode>3"
1044   [(set (match_operand:SVE_SDI 0 "register_operand")
1045         (unspec:SVE_SDI
1046           [(match_dup 3)
1047            (SVE_INT_BINARY_SD:SVE_SDI
1048              (match_operand:SVE_SDI 1 "register_operand")
1049              (match_operand:SVE_SDI 2 "register_operand"))]
1050           UNSPEC_MERGE_PTRUE))]
1051   "TARGET_SVE"
1052   {
1053     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
1054   }
1055 )
1056
1057 ;; Division predicated with a PTRUE.
1058 (define_insn "*<optab><mode>3"
1059   [(set (match_operand:SVE_SDI 0 "register_operand" "=w, w, ?&w")
1060         (unspec:SVE_SDI
1061           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
1062            (SVE_INT_BINARY_SD:SVE_SDI
1063              (match_operand:SVE_SDI 2 "register_operand" "0, w, w")
1064              (match_operand:SVE_SDI 3 "aarch64_sve_mul_operand" "w, 0, w"))]
1065           UNSPEC_MERGE_PTRUE))]
1066   "TARGET_SVE"
1067   "@
1068    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1069    <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1070    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1071   [(set_attr "movprfx" "*,*,yes")]
1072 )
1073
1074 ;; Unpredicated NEG, NOT and POPCOUNT.
1075 (define_expand "<optab><mode>2"
1076   [(set (match_operand:SVE_I 0 "register_operand")
1077         (unspec:SVE_I
1078           [(match_dup 2)
1079            (SVE_INT_UNARY:SVE_I (match_operand:SVE_I 1 "register_operand"))]
1080           UNSPEC_MERGE_PTRUE))]
1081   "TARGET_SVE"
1082   {
1083     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
1084   }
1085 )
1086
1087 ;; NEG, NOT and POPCOUNT predicated with a PTRUE.
1088 (define_insn "*<optab><mode>2"
1089   [(set (match_operand:SVE_I 0 "register_operand" "=w")
1090         (unspec:SVE_I
1091           [(match_operand:<VPRED> 1 "register_operand" "Upl")
1092            (SVE_INT_UNARY:SVE_I
1093              (match_operand:SVE_I 2 "register_operand" "w"))]
1094           UNSPEC_MERGE_PTRUE))]
1095   "TARGET_SVE"
1096   "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
1097 )
1098
1099 ;; Vector AND, ORR and XOR.
1100 (define_insn "<optab><mode>3"
1101   [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
1102         (LOGICAL:SVE_I
1103           (match_operand:SVE_I 1 "register_operand" "%0, w")
1104           (match_operand:SVE_I 2 "aarch64_sve_logical_operand" "vsl, w")))]
1105   "TARGET_SVE"
1106   "@
1107    <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
1108    <logical>\t%0.d, %1.d, %2.d"
1109 )
1110
1111 ;; Vector AND, ORR and XOR on floating-point modes.  We avoid subregs
1112 ;; by providing this, but we need to use UNSPECs since rtx logical ops
1113 ;; aren't defined for floating-point modes.
1114 (define_insn "*<optab><mode>3"
1115   [(set (match_operand:SVE_F 0 "register_operand" "=w")
1116         (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand" "w")
1117                        (match_operand:SVE_F 2 "register_operand" "w")]
1118                       LOGICALF))]
1119   "TARGET_SVE"
1120   "<logicalf_op>\t%0.d, %1.d, %2.d"
1121 )
1122
1123 ;; REG_EQUAL notes on "not<mode>3" should ensure that we can generate
1124 ;; this pattern even though the NOT instruction itself is predicated.
1125 (define_insn "bic<mode>3"
1126   [(set (match_operand:SVE_I 0 "register_operand" "=w")
1127         (and:SVE_I
1128           (not:SVE_I (match_operand:SVE_I 1 "register_operand" "w"))
1129           (match_operand:SVE_I 2 "register_operand" "w")))]
1130   "TARGET_SVE"
1131   "bic\t%0.d, %2.d, %1.d"
1132 )
1133
1134 ;; Predicate AND.  We can reuse one of the inputs as the GP.
1135 (define_insn "and<mode>3"
1136   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1137         (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa")
1138                       (match_operand:PRED_ALL 2 "register_operand" "Upa")))]
1139   "TARGET_SVE"
1140   "and\t%0.b, %1/z, %1.b, %2.b"
1141 )
1142
1143 ;; Unpredicated predicate ORR and XOR.
1144 (define_expand "<optab><mode>3"
1145   [(set (match_operand:PRED_ALL 0 "register_operand")
1146         (and:PRED_ALL
1147           (LOGICAL_OR:PRED_ALL
1148             (match_operand:PRED_ALL 1 "register_operand")
1149             (match_operand:PRED_ALL 2 "register_operand"))
1150           (match_dup 3)))]
1151   "TARGET_SVE"
1152   {
1153     operands[3] = aarch64_ptrue_reg (<MODE>mode);
1154   }
1155 )
1156
1157 ;; Predicated predicate ORR and XOR.
1158 (define_insn "pred_<optab><mode>3"
1159   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1160         (and:PRED_ALL
1161           (LOGICAL:PRED_ALL
1162             (match_operand:PRED_ALL 2 "register_operand" "Upa")
1163             (match_operand:PRED_ALL 3 "register_operand" "Upa"))
1164           (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
1165   "TARGET_SVE"
1166   "<logical>\t%0.b, %1/z, %2.b, %3.b"
1167 )
1168
1169 ;; Perform a logical operation on operands 2 and 3, using operand 1 as
1170 ;; the GP (which is known to be a PTRUE).  Store the result in operand 0
1171 ;; and set the flags in the same way as for PTEST.  The (and ...) in the
1172 ;; UNSPEC_PTEST_PTRUE is logically redundant, but means that the tested
1173 ;; value is structurally equivalent to rhs of the second set.
1174 (define_insn "*<optab><mode>3_cc"
1175   [(set (reg:CC CC_REGNUM)
1176         (compare:CC
1177           (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upa")
1178                       (and:PRED_ALL
1179                         (LOGICAL:PRED_ALL
1180                           (match_operand:PRED_ALL 2 "register_operand" "Upa")
1181                           (match_operand:PRED_ALL 3 "register_operand" "Upa"))
1182                         (match_dup 1))]
1183                      UNSPEC_PTEST_PTRUE)
1184           (const_int 0)))
1185    (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1186         (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3))
1187                       (match_dup 1)))]
1188   "TARGET_SVE"
1189   "<logical>s\t%0.b, %1/z, %2.b, %3.b"
1190 )
1191
1192 ;; Unpredicated predicate inverse.
1193 (define_expand "one_cmpl<mode>2"
1194   [(set (match_operand:PRED_ALL 0 "register_operand")
1195         (and:PRED_ALL
1196           (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand"))
1197           (match_dup 2)))]
1198   "TARGET_SVE"
1199   {
1200     operands[2] = aarch64_ptrue_reg (<MODE>mode);
1201   }
1202 )
1203
1204 ;; Predicated predicate inverse.
1205 (define_insn "*one_cmpl<mode>3"
1206   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1207         (and:PRED_ALL
1208           (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
1209           (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
1210   "TARGET_SVE"
1211   "not\t%0.b, %1/z, %2.b"
1212 )
1213
1214 ;; Predicated predicate BIC and ORN.
1215 (define_insn "*<nlogical><mode>3"
1216   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1217         (and:PRED_ALL
1218           (NLOGICAL:PRED_ALL
1219             (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
1220             (match_operand:PRED_ALL 3 "register_operand" "Upa"))
1221           (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
1222   "TARGET_SVE"
1223   "<nlogical>\t%0.b, %1/z, %3.b, %2.b"
1224 )
1225
1226 ;; Predicated predicate NAND and NOR.
1227 (define_insn "*<logical_nn><mode>3"
1228   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1229         (and:PRED_ALL
1230           (NLOGICAL:PRED_ALL
1231             (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
1232             (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa")))
1233           (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
1234   "TARGET_SVE"
1235   "<logical_nn>\t%0.b, %1/z, %2.b, %3.b"
1236 )
1237
1238 ;; Unpredicated LSL, LSR and ASR by a vector.
1239 (define_expand "v<optab><mode>3"
1240   [(set (match_operand:SVE_I 0 "register_operand")
1241         (unspec:SVE_I
1242           [(match_dup 3)
1243            (ASHIFT:SVE_I
1244              (match_operand:SVE_I 1 "register_operand")
1245              (match_operand:SVE_I 2 "aarch64_sve_<lr>shift_operand"))]
1246           UNSPEC_MERGE_PTRUE))]
1247   "TARGET_SVE"
1248   {
1249     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
1250   }
1251 )
1252
1253 ;; LSL, LSR and ASR by a vector, predicated with a PTRUE.  We don't
1254 ;; actually need the predicate for the first alternative, but using Upa
1255 ;; or X isn't likely to gain much and would make the instruction seem
1256 ;; less uniform to the register allocator.
1257 (define_insn_and_split "*v<optab><mode>3"
1258   [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
1259         (unspec:SVE_I
1260           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
1261            (ASHIFT:SVE_I
1262              (match_operand:SVE_I 2 "register_operand" "w, 0, w")
1263              (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, w"))]
1264           UNSPEC_MERGE_PTRUE))]
1265   "TARGET_SVE"
1266   "@
1267    #
1268    <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1269    movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1270   "&& reload_completed
1271    && !register_operand (operands[3], <MODE>mode)"
1272   [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))]
1273   ""
1274   [(set_attr "movprfx" "*,*,yes")]
1275 )
1276
1277 ;; Unpredicated shift operations by a constant (post-RA only).
1278 ;; These are generated by splitting a predicated instruction whose
1279 ;; predicate is unused.
1280 (define_insn "*post_ra_v<optab><mode>3"
1281   [(set (match_operand:SVE_I 0 "register_operand" "=w")
1282         (ASHIFT:SVE_I
1283           (match_operand:SVE_I 1 "register_operand" "w")
1284           (match_operand:SVE_I 2 "aarch64_simd_<lr>shift_imm")))]
1285   "TARGET_SVE && reload_completed"
1286   "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
1287 )
1288
1289 ;; LSL, LSR and ASR by a scalar, which expands into one of the vector
1290 ;; shifts above.
1291 (define_expand "<ASHIFT:optab><mode>3"
1292   [(set (match_operand:SVE_I 0 "register_operand")
1293         (ASHIFT:SVE_I (match_operand:SVE_I 1 "register_operand")
1294                       (match_operand:<VEL> 2 "general_operand")))]
1295   "TARGET_SVE"
1296   {
1297     rtx amount;
1298     if (CONST_INT_P (operands[2]))
1299       {
1300         amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
1301         if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
1302           amount = force_reg (<MODE>mode, amount);
1303       }
1304     else
1305       {
1306         amount = gen_reg_rtx (<MODE>mode);
1307         emit_insn (gen_vec_duplicate<mode> (amount,
1308                                             convert_to_mode (<VEL>mode,
1309                                                              operands[2], 0)));
1310       }
1311     emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount));
1312     DONE;
1313   }
1314 )
1315
1316 ;; Test all bits of operand 1.  Operand 0 is a GP that is known to hold PTRUE.
1317 ;;
1318 ;; Using UNSPEC_PTEST_PTRUE allows combine patterns to assume that the GP
1319 ;; is a PTRUE even if the optimizers haven't yet been able to propagate
1320 ;; the constant.  We would use a separate unspec code for PTESTs involving
1321 ;; GPs that might not be PTRUEs.
1322 (define_insn "ptest_ptrue<mode>"
1323   [(set (reg:CC CC_REGNUM)
1324         (compare:CC
1325           (unspec:SI [(match_operand:PRED_ALL 0 "register_operand" "Upa")
1326                       (match_operand:PRED_ALL 1 "register_operand" "Upa")]
1327                      UNSPEC_PTEST_PTRUE)
1328           (const_int 0)))]
1329   "TARGET_SVE"
1330   "ptest\t%0, %1.b"
1331 )
1332
1333 ;; Set element I of the result if operand1 + J < operand2 for all J in [0, I].
1334 ;; with the comparison being unsigned.
1335 (define_insn "while_ult<GPI:mode><PRED_ALL:mode>"
1336   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1337         (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
1338                           (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
1339                          UNSPEC_WHILE_LO))
1340    (clobber (reg:CC CC_REGNUM))]
1341   "TARGET_SVE"
1342   "whilelo\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
1343 )
1344
1345 ;; WHILELO sets the flags in the same way as a PTEST with a PTRUE GP.
1346 ;; Handle the case in which both results are useful.  The GP operand
1347 ;; to the PTEST isn't needed, so we allow it to be anything.
1348 (define_insn_and_rewrite "*while_ult<GPI:mode><PRED_ALL:mode>_cc"
1349   [(set (reg:CC CC_REGNUM)
1350         (compare:CC
1351           (unspec:SI [(match_operand:PRED_ALL 1)
1352                       (unspec:PRED_ALL
1353                         [(match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")
1354                          (match_operand:GPI 3 "aarch64_reg_or_zero" "rZ")]
1355                         UNSPEC_WHILE_LO)]
1356                      UNSPEC_PTEST_PTRUE)
1357           (const_int 0)))
1358    (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
1359         (unspec:PRED_ALL [(match_dup 2)
1360                           (match_dup 3)]
1361                          UNSPEC_WHILE_LO))]
1362   "TARGET_SVE"
1363   "whilelo\t%0.<PRED_ALL:Vetype>, %<w>2, %<w>3"
1364   ;; Force the compiler to drop the unused predicate operand, so that we
1365   ;; don't have an unnecessary PTRUE.
1366   "&& !CONSTANT_P (operands[1])"
1367   {
1368     operands[1] = CONSTM1_RTX (<MODE>mode);
1369   }
1370 )
1371
1372 ;; Integer comparisons predicated with a PTRUE.
1373 (define_insn "*cmp<cmp_op><mode>"
1374   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1375         (unspec:<VPRED>
1376           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1377            (SVE_INT_CMP:<VPRED>
1378              (match_operand:SVE_I 2 "register_operand" "w, w")
1379              (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
1380           UNSPEC_MERGE_PTRUE))
1381    (clobber (reg:CC CC_REGNUM))]
1382   "TARGET_SVE"
1383   "@
1384    cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
1385    cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1386 )
1387
1388 ;; Integer comparisons predicated with a PTRUE in which only the flags result
1389 ;; is interesting.
1390 (define_insn "*cmp<cmp_op><mode>_ptest"
1391   [(set (reg:CC CC_REGNUM)
1392         (compare:CC
1393           (unspec:SI
1394             [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1395              (unspec:<VPRED>
1396                [(match_dup 1)
1397                 (SVE_INT_CMP:<VPRED>
1398                   (match_operand:SVE_I 2 "register_operand" "w, w")
1399                   (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
1400                UNSPEC_MERGE_PTRUE)]
1401             UNSPEC_PTEST_PTRUE)
1402           (const_int 0)))
1403    (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))]
1404   "TARGET_SVE"
1405   "@
1406    cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
1407    cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1408 )
1409
1410 ;; Integer comparisons predicated with a PTRUE in which both the flag and
1411 ;; predicate results are interesting.
1412 (define_insn "*cmp<cmp_op><mode>_cc"
1413   [(set (reg:CC CC_REGNUM)
1414         (compare:CC
1415           (unspec:SI
1416             [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1417              (unspec:<VPRED>
1418                [(match_dup 1)
1419                 (SVE_INT_CMP:<VPRED>
1420                   (match_operand:SVE_I 2 "register_operand" "w, w")
1421                   (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
1422                UNSPEC_MERGE_PTRUE)]
1423             UNSPEC_PTEST_PTRUE)
1424           (const_int 0)))
1425    (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1426         (unspec:<VPRED>
1427           [(match_dup 1)
1428            (SVE_INT_CMP:<VPRED>
1429              (match_dup 2)
1430              (match_dup 3))]
1431           UNSPEC_MERGE_PTRUE))]
1432   "TARGET_SVE"
1433   "@
1434    cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
1435    cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1436 )
1437
1438 ;; Predicated integer comparisons, formed by combining a PTRUE-predicated
1439 ;; comparison with an AND.  Split the instruction into its preferred form
1440 ;; (below) at the earliest opportunity, in order to get rid of the
1441 ;; redundant operand 1.
1442 (define_insn_and_split "*pred_cmp<cmp_op><mode>_combine"
1443   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1444        (and:<VPRED>
1445          (unspec:<VPRED>
1446            [(match_operand:<VPRED> 1)
1447             (SVE_INT_CMP:<VPRED>
1448               (match_operand:SVE_I 2 "register_operand" "w, w")
1449               (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
1450            UNSPEC_MERGE_PTRUE)
1451          (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))
1452    (clobber (reg:CC CC_REGNUM))]
1453   "TARGET_SVE"
1454   "#"
1455   "&& 1"
1456   [(parallel
1457      [(set (match_dup 0)
1458           (and:<VPRED>
1459             (SVE_INT_CMP:<VPRED>
1460               (match_dup 2)
1461               (match_dup 3))
1462             (match_dup 4)))
1463       (clobber (reg:CC CC_REGNUM))])]
1464 )
1465
1466 ;; Predicated integer comparisons.
1467 (define_insn "*pred_cmp<cmp_op><mode>"
1468   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1469         (and:<VPRED>
1470           (SVE_INT_CMP:<VPRED>
1471             (match_operand:SVE_I 2 "register_operand" "w, w")
1472             (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))
1473           (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))
1474    (clobber (reg:CC CC_REGNUM))]
1475   "TARGET_SVE"
1476   "@
1477    cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
1478    cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1479 )
1480
1481 ;; Floating-point comparisons predicated with a PTRUE.
1482 (define_insn "*fcm<cmp_op><mode>"
1483   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1484         (unspec:<VPRED>
1485           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1486            (SVE_FP_CMP:<VPRED>
1487              (match_operand:SVE_F 2 "register_operand" "w, w")
1488              (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))]
1489           UNSPEC_MERGE_PTRUE))]
1490   "TARGET_SVE"
1491   "@
1492    fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
1493    fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1494 )
1495
1496 (define_insn "*fcmuo<mode>"
1497   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
1498         (unspec:<VPRED>
1499           [(match_operand:<VPRED> 1 "register_operand" "Upl")
1500            (unordered:<VPRED>
1501              (match_operand:SVE_F 2 "register_operand" "w")
1502              (match_operand:SVE_F 3 "register_operand" "w"))]
1503           UNSPEC_MERGE_PTRUE))]
1504   "TARGET_SVE"
1505   "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1506 )
1507
1508 ;; Floating-point comparisons predicated on a PTRUE, with the results ANDed
1509 ;; with another predicate P.  This does not have the same trapping behavior
1510 ;; as predicating the comparison itself on P, but it's a legitimate fold,
1511 ;; since we can drop any potentially-trapping operations whose results
1512 ;; are not needed.
1513 ;;
1514 ;; Split the instruction into its preferred form (below) at the earliest
1515 ;; opportunity, in order to get rid of the redundant operand 1.
1516 (define_insn_and_split "*fcm<cmp_op><mode>_and_combine"
1517   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1518         (and:<VPRED>
1519           (unspec:<VPRED>
1520             [(match_operand:<VPRED> 1)
1521              (SVE_FP_CMP
1522                (match_operand:SVE_F 2 "register_operand" "w, w")
1523                (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))]
1524             UNSPEC_MERGE_PTRUE)
1525           (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
1526   "TARGET_SVE"
1527   "#"
1528   "&& 1"
1529   [(set (match_dup 0)
1530         (and:<VPRED>
1531           (SVE_FP_CMP:<VPRED>
1532             (match_dup 2)
1533             (match_dup 3))
1534           (match_dup 4)))]
1535 )
1536
1537 (define_insn_and_split "*fcmuo<mode>_and_combine"
1538   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
1539         (and:<VPRED>
1540           (unspec:<VPRED>
1541             [(match_operand:<VPRED> 1)
1542              (unordered
1543                (match_operand:SVE_F 2 "register_operand" "w")
1544                (match_operand:SVE_F 3 "register_operand" "w"))]
1545             UNSPEC_MERGE_PTRUE)
1546           (match_operand:<VPRED> 4 "register_operand" "Upl")))]
1547   "TARGET_SVE"
1548   "#"
1549   "&& 1"
1550   [(set (match_dup 0)
1551         (and:<VPRED>
1552           (unordered:<VPRED>
1553             (match_dup 2)
1554             (match_dup 3))
1555           (match_dup 4)))]
1556 )
1557
1558 ;; Unpredicated floating-point comparisons, with the results ANDed
1559 ;; with another predicate.  This is a valid fold for the same reasons
1560 ;; as above.
1561 (define_insn "*fcm<cmp_op><mode>_and"
1562   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1563         (and:<VPRED>
1564           (SVE_FP_CMP:<VPRED>
1565             (match_operand:SVE_F 2 "register_operand" "w, w")
1566             (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w"))
1567           (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))]
1568   "TARGET_SVE"
1569   "@
1570    fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
1571    fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1572 )
1573
1574 (define_insn "*fcmuo<mode>_and"
1575   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
1576         (and:<VPRED>
1577           (unordered:<VPRED>
1578             (match_operand:SVE_F 2 "register_operand" "w")
1579             (match_operand:SVE_F 3 "register_operand" "w"))
1580           (match_operand:<VPRED> 1 "register_operand" "Upl")))]
1581   "TARGET_SVE"
1582   "fcmuo\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1583 )
1584
1585 ;; Predicated floating-point comparisons.  We don't need a version
1586 ;; of this for unordered comparisons.
1587 (define_insn "*pred_fcm<cmp_op><mode>"
1588   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
1589         (unspec:<VPRED>
1590           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1591            (match_operand:SVE_F 2 "register_operand" "w, w")
1592            (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
1593           SVE_COND_FP_CMP))]
1594   "TARGET_SVE"
1595   "@
1596    fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #0.0
1597    fcm<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
1598 )
1599
1600 ;; vcond_mask operand order: true, false, mask
1601 ;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
1602 ;; SEL operand order:        mask, true, false
1603 (define_insn "vcond_mask_<mode><vpred>"
1604   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
1605         (unspec:SVE_ALL
1606           [(match_operand:<VPRED> 3 "register_operand" "Upa")
1607            (match_operand:SVE_ALL 1 "register_operand" "w")
1608            (match_operand:SVE_ALL 2 "register_operand" "w")]
1609           UNSPEC_SEL))]
1610   "TARGET_SVE"
1611   "sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>"
1612 )
1613
1614 ;; Selects between a duplicated immediate and zero.
1615 (define_insn "aarch64_sve_dup<mode>_const"
1616   [(set (match_operand:SVE_I 0 "register_operand" "=w")
1617         (unspec:SVE_I
1618           [(match_operand:<VPRED> 1 "register_operand" "Upl")
1619            (match_operand:SVE_I 2 "aarch64_sve_dup_immediate")
1620            (match_operand:SVE_I 3 "aarch64_simd_imm_zero")]
1621           UNSPEC_SEL))]
1622   "TARGET_SVE"
1623   "mov\t%0.<Vetype>, %1/z, #%2"
1624 )
1625
1626 ;; Integer (signed) vcond.  Don't enforce an immediate range here, since it
1627 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
1628 (define_expand "vcond<mode><v_int_equiv>"
1629   [(set (match_operand:SVE_ALL 0 "register_operand")
1630         (if_then_else:SVE_ALL
1631           (match_operator 3 "comparison_operator"
1632             [(match_operand:<V_INT_EQUIV> 4 "register_operand")
1633              (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
1634           (match_operand:SVE_ALL 1 "register_operand")
1635           (match_operand:SVE_ALL 2 "register_operand")))]
1636   "TARGET_SVE"
1637   {
1638     aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
1639     DONE;
1640   }
1641 )
1642
1643 ;; Integer vcondu.  Don't enforce an immediate range here, since it
1644 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
1645 (define_expand "vcondu<mode><v_int_equiv>"
1646   [(set (match_operand:SVE_ALL 0 "register_operand")
1647         (if_then_else:SVE_ALL
1648           (match_operator 3 "comparison_operator"
1649             [(match_operand:<V_INT_EQUIV> 4 "register_operand")
1650              (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
1651           (match_operand:SVE_ALL 1 "register_operand")
1652           (match_operand:SVE_ALL 2 "register_operand")))]
1653   "TARGET_SVE"
1654   {
1655     aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
1656     DONE;
1657   }
1658 )
1659
1660 ;; Floating-point vcond.  All comparisons except FCMUO allow a zero
1661 ;; operand; aarch64_expand_sve_vcond handles the case of an FCMUO
1662 ;; with zero.
1663 (define_expand "vcond<mode><v_fp_equiv>"
1664   [(set (match_operand:SVE_SD 0 "register_operand")
1665         (if_then_else:SVE_SD
1666           (match_operator 3 "comparison_operator"
1667             [(match_operand:<V_FP_EQUIV> 4 "register_operand")
1668              (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
1669           (match_operand:SVE_SD 1 "register_operand")
1670           (match_operand:SVE_SD 2 "register_operand")))]
1671   "TARGET_SVE"
1672   {
1673     aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
1674     DONE;
1675   }
1676 )
1677
1678 ;; Signed integer comparisons.  Don't enforce an immediate range here, since
1679 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
1680 ;; instead.
1681 (define_expand "vec_cmp<mode><vpred>"
1682   [(parallel
1683     [(set (match_operand:<VPRED> 0 "register_operand")
1684           (match_operator:<VPRED> 1 "comparison_operator"
1685             [(match_operand:SVE_I 2 "register_operand")
1686              (match_operand:SVE_I 3 "nonmemory_operand")]))
1687      (clobber (reg:CC CC_REGNUM))])]
1688   "TARGET_SVE"
1689   {
1690     aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
1691                                     operands[2], operands[3]);
1692     DONE;
1693   }
1694 )
1695
1696 ;; Unsigned integer comparisons.  Don't enforce an immediate range here, since
1697 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
1698 ;; instead.
1699 (define_expand "vec_cmpu<mode><vpred>"
1700   [(parallel
1701     [(set (match_operand:<VPRED> 0 "register_operand")
1702           (match_operator:<VPRED> 1 "comparison_operator"
1703             [(match_operand:SVE_I 2 "register_operand")
1704              (match_operand:SVE_I 3 "nonmemory_operand")]))
1705      (clobber (reg:CC CC_REGNUM))])]
1706   "TARGET_SVE"
1707   {
1708     aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
1709                                     operands[2], operands[3]);
1710     DONE;
1711   }
1712 )
1713
1714 ;; Floating-point comparisons.  All comparisons except FCMUO allow a zero
1715 ;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO
1716 ;; with zero.
1717 (define_expand "vec_cmp<mode><vpred>"
1718   [(set (match_operand:<VPRED> 0 "register_operand")
1719         (match_operator:<VPRED> 1 "comparison_operator"
1720           [(match_operand:SVE_F 2 "register_operand")
1721            (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]))]
1722   "TARGET_SVE"
1723   {
1724     aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
1725                                       operands[2], operands[3], false);
1726     DONE;
1727   }
1728 )
1729
1730 ;; Branch based on predicate equality or inequality.
1731 (define_expand "cbranch<mode>4"
1732   [(set (pc)
1733         (if_then_else
1734           (match_operator 0 "aarch64_equality_operator"
1735             [(match_operand:PRED_ALL 1 "register_operand")
1736              (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")])
1737           (label_ref (match_operand 3 ""))
1738           (pc)))]
1739   ""
1740   {
1741     rtx ptrue = aarch64_ptrue_reg (<MODE>mode);
1742     rtx pred;
1743     if (operands[2] == CONST0_RTX (<MODE>mode))
1744       pred = operands[1];
1745     else
1746       {
1747         pred = gen_reg_rtx (<MODE>mode);
1748         emit_insn (gen_pred_xor<mode>3 (pred, ptrue, operands[1],
1749                                         operands[2]));
1750       }
1751     emit_insn (gen_ptest_ptrue<mode> (ptrue, pred));
1752     operands[1] = gen_rtx_REG (CCmode, CC_REGNUM);
1753     operands[2] = const0_rtx;
1754   }
1755 )
1756
1757 ;; Unpredicated integer MIN/MAX.
1758 (define_expand "<su><maxmin><mode>3"
1759   [(set (match_operand:SVE_I 0 "register_operand")
1760         (unspec:SVE_I
1761           [(match_dup 3)
1762            (MAXMIN:SVE_I (match_operand:SVE_I 1 "register_operand")
1763                          (match_operand:SVE_I 2 "register_operand"))]
1764           UNSPEC_MERGE_PTRUE))]
1765   "TARGET_SVE"
1766   {
1767     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
1768   }
1769 )
1770
1771 ;; Integer MIN/MAX predicated with a PTRUE.
1772 (define_insn "*<su><maxmin><mode>3"
1773   [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
1774         (unspec:SVE_I
1775           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1776            (MAXMIN:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w")
1777                          (match_operand:SVE_I 3 "register_operand" "w, w"))]
1778           UNSPEC_MERGE_PTRUE))]
1779   "TARGET_SVE"
1780   "@
1781    <su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1782    movprfx\t%0, %2\;<su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1783   [(set_attr "movprfx" "*,yes")]
1784 )
1785
1786 ;; Unpredicated floating-point MIN/MAX.
1787 (define_expand "<su><maxmin><mode>3"
1788   [(set (match_operand:SVE_F 0 "register_operand")
1789         (unspec:SVE_F
1790           [(match_dup 3)
1791            (FMAXMIN:SVE_F (match_operand:SVE_F 1 "register_operand")
1792                           (match_operand:SVE_F 2 "register_operand"))]
1793           UNSPEC_MERGE_PTRUE))]
1794   "TARGET_SVE"
1795   {
1796     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
1797   }
1798 )
1799
1800 ;; Floating-point MIN/MAX predicated with a PTRUE.
1801 (define_insn "*<su><maxmin><mode>3"
1802   [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
1803         (unspec:SVE_F
1804           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1805            (FMAXMIN:SVE_F (match_operand:SVE_F 2 "register_operand" "%0, w")
1806                           (match_operand:SVE_F 3 "register_operand" "w, w"))]
1807           UNSPEC_MERGE_PTRUE))]
1808   "TARGET_SVE"
1809   "@
1810    f<maxmin>nm\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1811    movprfx\t%0, %2\;f<maxmin>nm\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1812   [(set_attr "movprfx" "*,yes")]
1813 )
1814
1815 ;; Unpredicated fmin/fmax.
1816 (define_expand "<maxmin_uns><mode>3"
1817   [(set (match_operand:SVE_F 0 "register_operand")
1818         (unspec:SVE_F
1819           [(match_dup 3)
1820            (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")
1821                           (match_operand:SVE_F 2 "register_operand")]
1822                          FMAXMIN_UNS)]
1823           UNSPEC_MERGE_PTRUE))]
1824   "TARGET_SVE"
1825   {
1826     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
1827   }
1828 )
1829
1830 ;; fmin/fmax predicated with a PTRUE.
1831 (define_insn "*<maxmin_uns><mode>3"
1832   [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
1833         (unspec:SVE_F
1834           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1835            (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "%0, w")
1836                           (match_operand:SVE_F 3 "register_operand" "w, w")]
1837                          FMAXMIN_UNS)]
1838           UNSPEC_MERGE_PTRUE))]
1839   "TARGET_SVE"
1840   "@
1841    <maxmin_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1842    movprfx\t%0, %2\;<maxmin_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1843   [(set_attr "movprfx" "*,yes")]
1844 )
1845
1846 ;; Predicated integer operations with select.
1847 (define_expand "cond_<optab><mode>"
1848   [(set (match_operand:SVE_I 0 "register_operand")
1849         (unspec:SVE_I
1850           [(match_operand:<VPRED> 1 "register_operand")
1851            (SVE_INT_BINARY:SVE_I
1852              (match_operand:SVE_I 2 "register_operand")
1853              (match_operand:SVE_I 3 "register_operand"))
1854            (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
1855           UNSPEC_SEL))]
1856   "TARGET_SVE"
1857 )
1858
1859 (define_expand "cond_<optab><mode>"
1860   [(set (match_operand:SVE_SDI 0 "register_operand")
1861         (unspec:SVE_SDI
1862           [(match_operand:<VPRED> 1 "register_operand")
1863            (SVE_INT_BINARY_SD:SVE_SDI
1864              (match_operand:SVE_SDI 2 "register_operand")
1865              (match_operand:SVE_SDI 3 "register_operand"))
1866            (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero")]
1867           UNSPEC_SEL))]
1868   "TARGET_SVE"
1869 )
1870
1871 ;; Predicated integer operations with select matching the first operand.
1872 (define_insn "*cond_<optab><mode>_2"
1873   [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
1874         (unspec:SVE_I
1875           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1876            (SVE_INT_BINARY:SVE_I
1877              (match_operand:SVE_I 2 "register_operand" "0, w")
1878              (match_operand:SVE_I 3 "register_operand" "w, w"))
1879            (match_dup 2)]
1880           UNSPEC_SEL))]
1881   "TARGET_SVE"
1882   "@
1883    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1884    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1885   [(set_attr "movprfx" "*,yes")]
1886 )
1887
1888 (define_insn "*cond_<optab><mode>_2"
1889   [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
1890         (unspec:SVE_SDI
1891           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1892            (SVE_INT_BINARY_SD:SVE_SDI
1893              (match_operand:SVE_SDI 2 "register_operand" "0, w")
1894              (match_operand:SVE_SDI 3 "register_operand" "w, w"))
1895            (match_dup 2)]
1896           UNSPEC_SEL))]
1897   "TARGET_SVE"
1898   "@
1899    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1900    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
1901   [(set_attr "movprfx" "*,yes")]
1902 )
1903
1904 ;; Predicated integer operations with select matching the second operand.
1905 (define_insn "*cond_<optab><mode>_3"
1906   [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
1907         (unspec:SVE_I
1908           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1909            (SVE_INT_BINARY:SVE_I
1910              (match_operand:SVE_I 2 "register_operand" "w, w")
1911              (match_operand:SVE_I 3 "register_operand" "0, w"))
1912            (match_dup 3)]
1913           UNSPEC_SEL))]
1914   "TARGET_SVE"
1915   "@
1916    <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1917    movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
1918   [(set_attr "movprfx" "*,yes")]
1919 )
1920
1921 (define_insn "*cond_<optab><mode>_3"
1922   [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
1923         (unspec:SVE_SDI
1924           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
1925            (SVE_INT_BINARY_SD:SVE_SDI
1926              (match_operand:SVE_SDI 2 "register_operand" "w, w")
1927              (match_operand:SVE_SDI 3 "register_operand" "0, w"))
1928            (match_dup 3)]
1929           UNSPEC_SEL))]
1930   "TARGET_SVE"
1931   "@
1932    <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1933    movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
1934   [(set_attr "movprfx" "*,yes")]
1935 )
1936
1937 ;; Predicated integer binary operations in which the values of inactive
1938 ;; lanes are distinct from the other inputs.
1939 (define_insn_and_rewrite "*cond_<optab><mode>_any"
1940   [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
1941         (unspec:SVE_I
1942           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
1943            (SVE_INT_BINARY:SVE_I
1944              (match_operand:SVE_I 2 "register_operand" "0, w, w, w, w")
1945              (match_operand:SVE_I 3 "register_operand" "w, 0, w, w, w"))
1946            (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
1947           UNSPEC_SEL))]
1948   "TARGET_SVE
1949    && !rtx_equal_p (operands[2], operands[4])
1950    && !rtx_equal_p (operands[3], operands[4])"
1951   "@
1952    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1953    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1954    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1955    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1956    #"
1957   "&& reload_completed
1958    && register_operand (operands[4], <MODE>mode)
1959    && !rtx_equal_p (operands[0], operands[4])"
1960   {
1961     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
1962                                              operands[4], operands[1]));
1963     operands[4] = operands[2] = operands[0];
1964   }
1965   [(set_attr "movprfx" "yes")]
1966 )
1967
1968 (define_insn_and_rewrite "*cond_<optab><mode>_any"
1969   [(set (match_operand:SVE_SDI 0 "register_operand" "=&w, &w, &w, &w, ?&w")
1970         (unspec:SVE_SDI
1971           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
1972            (SVE_INT_BINARY_SD:SVE_SDI
1973              (match_operand:SVE_SDI 2 "register_operand" "0, w, w, w, w")
1974              (match_operand:SVE_SDI 3 "register_operand" "w, 0, w, w, w"))
1975            (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
1976           UNSPEC_SEL))]
1977   "TARGET_SVE
1978    && !rtx_equal_p (operands[2], operands[4])
1979    && !rtx_equal_p (operands[3], operands[4])"
1980   "@
1981    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1982    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1983    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1984    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1985    #"
1986   "&& reload_completed
1987    && register_operand (operands[4], <MODE>mode)
1988    && !rtx_equal_p (operands[0], operands[4])"
1989   {
1990     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
1991                                              operands[4], operands[1]));
1992     operands[4] = operands[2] = operands[0];
1993   }
1994   [(set_attr "movprfx" "yes")]
1995 )
1996
1997 ;; Set operand 0 to the last active element in operand 3, or to tied
1998 ;; operand 1 if no elements are active.
1999 (define_insn "fold_extract_last_<mode>"
2000   [(set (match_operand:<VEL> 0 "register_operand" "=r, w")
2001         (unspec:<VEL>
2002           [(match_operand:<VEL> 1 "register_operand" "0, 0")
2003            (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
2004            (match_operand:SVE_ALL 3 "register_operand" "w, w")]
2005           UNSPEC_CLASTB))]
2006   "TARGET_SVE"
2007   "@
2008    clastb\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype>
2009    clastb\t%<vw>0, %2, %<vw>0, %3.<Vetype>"
2010 )
2011
2012 ;; Unpredicated integer add reduction.
2013 (define_expand "reduc_plus_scal_<mode>"
2014   [(set (match_operand:<VEL> 0 "register_operand")
2015         (unspec:<VEL> [(match_dup 2)
2016                        (match_operand:SVE_I 1 "register_operand")]
2017                       UNSPEC_ADDV))]
2018   "TARGET_SVE"
2019   {
2020     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2021   }
2022 )
2023
2024 ;; Predicated integer add reduction.  The result is always 64-bits.
2025 (define_insn "*reduc_plus_scal_<mode>"
2026   [(set (match_operand:<VEL> 0 "register_operand" "=w")
2027         (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
2028                        (match_operand:SVE_I 2 "register_operand" "w")]
2029                       UNSPEC_ADDV))]
2030   "TARGET_SVE"
2031   "uaddv\t%d0, %1, %2.<Vetype>"
2032 )
2033
2034 ;; Unpredicated floating-point add reduction.
2035 (define_expand "reduc_plus_scal_<mode>"
2036   [(set (match_operand:<VEL> 0 "register_operand")
2037         (unspec:<VEL> [(match_dup 2)
2038                        (match_operand:SVE_F 1 "register_operand")]
2039                       UNSPEC_FADDV))]
2040   "TARGET_SVE"
2041   {
2042     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2043   }
2044 )
2045
2046 ;; Predicated floating-point add reduction.
2047 (define_insn "*reduc_plus_scal_<mode>"
2048   [(set (match_operand:<VEL> 0 "register_operand" "=w")
2049         (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
2050                        (match_operand:SVE_F 2 "register_operand" "w")]
2051                       UNSPEC_FADDV))]
2052   "TARGET_SVE"
2053   "faddv\t%<Vetype>0, %1, %2.<Vetype>"
2054 )
2055
2056 ;; Unpredicated integer MIN/MAX reduction.
2057 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2058   [(set (match_operand:<VEL> 0 "register_operand")
2059         (unspec:<VEL> [(match_dup 2)
2060                        (match_operand:SVE_I 1 "register_operand")]
2061                       MAXMINV))]
2062   "TARGET_SVE"
2063   {
2064     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2065   }
2066 )
2067
2068 ;; Predicated integer MIN/MAX reduction.
2069 (define_insn "*reduc_<maxmin_uns>_scal_<mode>"
2070   [(set (match_operand:<VEL> 0 "register_operand" "=w")
2071         (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
2072                        (match_operand:SVE_I 2 "register_operand" "w")]
2073                       MAXMINV))]
2074   "TARGET_SVE"
2075   "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>"
2076 )
2077
2078 ;; Unpredicated floating-point MIN/MAX reduction.
2079 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2080   [(set (match_operand:<VEL> 0 "register_operand")
2081         (unspec:<VEL> [(match_dup 2)
2082                        (match_operand:SVE_F 1 "register_operand")]
2083                       FMAXMINV))]
2084   "TARGET_SVE"
2085   {
2086     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2087   }
2088 )
2089
2090 ;; Predicated floating-point MIN/MAX reduction.
2091 (define_insn "*reduc_<maxmin_uns>_scal_<mode>"
2092   [(set (match_operand:<VEL> 0 "register_operand" "=w")
2093         (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
2094                        (match_operand:SVE_F 2 "register_operand" "w")]
2095                       FMAXMINV))]
2096   "TARGET_SVE"
2097   "<maxmin_uns_op>v\t%<Vetype>0, %1, %2.<Vetype>"
2098 )
2099
2100 (define_expand "reduc_<optab>_scal_<mode>"
2101   [(set (match_operand:<VEL> 0 "register_operand")
2102         (unspec:<VEL> [(match_dup 2)
2103                        (match_operand:SVE_I 1 "register_operand")]
2104                       BITWISEV))]
2105   "TARGET_SVE"
2106   {
2107     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2108   }
2109 )
2110
2111 (define_insn "*reduc_<optab>_scal_<mode>"
2112   [(set (match_operand:<VEL> 0 "register_operand" "=w")
2113         (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
2114                        (match_operand:SVE_I 2 "register_operand" "w")]
2115                       BITWISEV))]
2116   "TARGET_SVE"
2117   "<bit_reduc_op>\t%<Vetype>0, %1, %2.<Vetype>"
2118 )
2119
2120 ;; Unpredicated in-order FP reductions.
2121 (define_expand "fold_left_plus_<mode>"
2122   [(set (match_operand:<VEL> 0 "register_operand")
2123         (unspec:<VEL> [(match_dup 3)
2124                        (match_operand:<VEL> 1 "register_operand")
2125                        (match_operand:SVE_F 2 "register_operand")]
2126                       UNSPEC_FADDA))]
2127   "TARGET_SVE"
2128   {
2129     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
2130   }
2131 )
2132
2133 ;; In-order FP reductions predicated with PTRUE.
2134 (define_insn "mask_fold_left_plus_<mode>"
2135   [(set (match_operand:<VEL> 0 "register_operand" "=w")
2136         (unspec:<VEL> [(match_operand:<VPRED> 3 "register_operand" "Upl")
2137                        (match_operand:<VEL> 1 "register_operand" "0")
2138                        (match_operand:SVE_F 2 "register_operand" "w")]
2139                       UNSPEC_FADDA))]
2140   "TARGET_SVE"
2141   "fadda\t%<Vetype>0, %3, %<Vetype>0, %2.<Vetype>"
2142 )
2143
2144 ;; Predicated form of the above in-order reduction.
2145 (define_insn "*pred_fold_left_plus_<mode>"
2146   [(set (match_operand:<VEL> 0 "register_operand" "=w")
2147         (unspec:<VEL>
2148           [(match_operand:<VEL> 1 "register_operand" "0")
2149            (unspec:SVE_F
2150              [(match_operand:<VPRED> 2 "register_operand" "Upl")
2151               (match_operand:SVE_F 3 "register_operand" "w")
2152               (match_operand:SVE_F 4 "aarch64_simd_imm_zero")]
2153              UNSPEC_SEL)]
2154           UNSPEC_FADDA))]
2155   "TARGET_SVE"
2156   "fadda\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>"
2157 )
2158
2159 ;; Unpredicated floating-point addition.
2160 (define_expand "add<mode>3"
2161   [(set (match_operand:SVE_F 0 "register_operand")
2162         (unspec:SVE_F
2163           [(match_dup 3)
2164            (plus:SVE_F
2165              (match_operand:SVE_F 1 "register_operand")
2166              (match_operand:SVE_F 2 "aarch64_sve_float_arith_with_sub_operand"))]
2167           UNSPEC_MERGE_PTRUE))]
2168   "TARGET_SVE"
2169   {
2170     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
2171   }
2172 )
2173
2174 ;; Floating-point addition predicated with a PTRUE.
2175 (define_insn_and_split "*add<mode>3"
2176   [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w")
2177         (unspec:SVE_F
2178           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2179            (plus:SVE_F
2180               (match_operand:SVE_F 2 "register_operand" "%0, 0, w")
2181               (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w"))]
2182           UNSPEC_MERGE_PTRUE))]
2183   "TARGET_SVE"
2184   "@
2185    fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
2186    fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
2187    #"
2188   ; Split the unpredicated form after reload, so that we don't have
2189   ; the unnecessary PTRUE.
2190   "&& reload_completed
2191    && register_operand (operands[3], <MODE>mode)"
2192   [(set (match_dup 0) (plus:SVE_F (match_dup 2) (match_dup 3)))]
2193 )
2194
2195 ;; Unpredicated floating-point subtraction.
2196 (define_expand "sub<mode>3"
2197   [(set (match_operand:SVE_F 0 "register_operand")
2198         (unspec:SVE_F
2199           [(match_dup 3)
2200            (minus:SVE_F
2201              (match_operand:SVE_F 1 "aarch64_sve_float_arith_operand")
2202              (match_operand:SVE_F 2 "register_operand"))]
2203           UNSPEC_MERGE_PTRUE))]
2204   "TARGET_SVE"
2205   {
2206     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
2207   }
2208 )
2209
2210 ;; Floating-point subtraction predicated with a PTRUE.
2211 (define_insn_and_split "*sub<mode>3"
2212   [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, w")
2213         (unspec:SVE_F
2214           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
2215            (minus:SVE_F
2216              (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand" "0, 0, vsA, w")
2217              (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, 0, w"))]
2218           UNSPEC_MERGE_PTRUE))]
2219   "TARGET_SVE
2220    && (register_operand (operands[2], <MODE>mode)
2221        || register_operand (operands[3], <MODE>mode))"
2222   "@
2223    fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
2224    fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
2225    fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
2226    #"
2227   ; Split the unpredicated form after reload, so that we don't have
2228   ; the unnecessary PTRUE.
2229   "&& reload_completed
2230    && register_operand (operands[2], <MODE>mode)
2231    && register_operand (operands[3], <MODE>mode)"
2232   [(set (match_dup 0) (minus:SVE_F (match_dup 2) (match_dup 3)))]
2233 )
2234
2235 ;; Unpredicated floating-point multiplication.
2236 (define_expand "mul<mode>3"
2237   [(set (match_operand:SVE_F 0 "register_operand")
2238         (unspec:SVE_F
2239           [(match_dup 3)
2240            (mult:SVE_F
2241              (match_operand:SVE_F 1 "register_operand")
2242              (match_operand:SVE_F 2 "aarch64_sve_float_mul_operand"))]
2243           UNSPEC_MERGE_PTRUE))]
2244   "TARGET_SVE"
2245   {
2246     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
2247   }
2248 )
2249
2250 ;; Floating-point multiplication predicated with a PTRUE.
2251 (define_insn_and_split "*mul<mode>3"
2252   [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
2253         (unspec:SVE_F
2254           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2255            (mult:SVE_F
2256              (match_operand:SVE_F 2 "register_operand" "%0, w")
2257              (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand" "vsM, w"))]
2258           UNSPEC_MERGE_PTRUE))]
2259   "TARGET_SVE"
2260   "@
2261    fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
2262    #"
2263   ; Split the unpredicated form after reload, so that we don't have
2264   ; the unnecessary PTRUE.
2265   "&& reload_completed
2266    && register_operand (operands[3], <MODE>mode)"
2267   [(set (match_dup 0) (mult:SVE_F (match_dup 2) (match_dup 3)))]
2268 )
2269
2270 ;; Unpredicated floating-point binary operations (post-RA only).
2271 ;; These are generated by splitting a predicated instruction whose
2272 ;; predicate is unused.
2273 (define_insn "*post_ra_<sve_fp_op><mode>3"
2274   [(set (match_operand:SVE_F 0 "register_operand" "=w")
2275         (SVE_UNPRED_FP_BINARY:SVE_F
2276           (match_operand:SVE_F 1 "register_operand" "w")
2277           (match_operand:SVE_F 2 "register_operand" "w")))]
2278   "TARGET_SVE && reload_completed"
2279   "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
2280
2281 ;; Unpredicated fma (%0 = (%1 * %2) + %3).
2282 (define_expand "fma<mode>4"
2283   [(set (match_operand:SVE_F 0 "register_operand")
2284         (unspec:SVE_F
2285           [(match_dup 4)
2286            (fma:SVE_F (match_operand:SVE_F 1 "register_operand")
2287                       (match_operand:SVE_F 2 "register_operand")
2288                       (match_operand:SVE_F 3 "register_operand"))]
2289           UNSPEC_MERGE_PTRUE))]
2290   "TARGET_SVE"
2291   {
2292     operands[4] = aarch64_ptrue_reg (<VPRED>mode);
2293   }
2294 )
2295
2296 ;; fma predicated with a PTRUE.
2297 (define_insn "*fma<mode>4"
2298   [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
2299         (unspec:SVE_F
2300           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2301            (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w, w")
2302                       (match_operand:SVE_F 4 "register_operand" "w, w, w")
2303                       (match_operand:SVE_F 2 "register_operand" "w, 0, w"))]
2304           UNSPEC_MERGE_PTRUE))]
2305   "TARGET_SVE"
2306   "@
2307    fmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
2308    fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
2309    movprfx\t%0, %2\;fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
2310   [(set_attr "movprfx" "*,*,yes")]
2311 )
2312
2313 ;; Unpredicated fnma (%0 = (-%1 * %2) + %3).
2314 (define_expand "fnma<mode>4"
2315   [(set (match_operand:SVE_F 0 "register_operand")
2316         (unspec:SVE_F
2317           [(match_dup 4)
2318            (fma:SVE_F (neg:SVE_F
2319                         (match_operand:SVE_F 1 "register_operand"))
2320                       (match_operand:SVE_F 2 "register_operand")
2321                       (match_operand:SVE_F 3 "register_operand"))]
2322           UNSPEC_MERGE_PTRUE))]
2323   "TARGET_SVE"
2324   {
2325     operands[4] = aarch64_ptrue_reg (<VPRED>mode);
2326   }
2327 )
2328
2329 ;; fnma predicated with a PTRUE.
2330 (define_insn "*fnma<mode>4"
2331   [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
2332         (unspec:SVE_F
2333           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2334            (fma:SVE_F (neg:SVE_F
2335                         (match_operand:SVE_F 3 "register_operand" "%0, w, w"))
2336                       (match_operand:SVE_F 4 "register_operand" "w, w, w")
2337                       (match_operand:SVE_F 2 "register_operand" "w, 0, w"))]
2338           UNSPEC_MERGE_PTRUE))]
2339   "TARGET_SVE"
2340   "@
2341    fmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
2342    fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
2343    movprfx\t%0, %2\;fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
2344   [(set_attr "movprfx" "*,*,yes")]
2345 )
2346
2347 ;; Unpredicated fms (%0 = (%1 * %2) - %3).
2348 (define_expand "fms<mode>4"
2349   [(set (match_operand:SVE_F 0 "register_operand")
2350         (unspec:SVE_F
2351           [(match_dup 4)
2352            (fma:SVE_F (match_operand:SVE_F 1 "register_operand")
2353                       (match_operand:SVE_F 2 "register_operand")
2354                       (neg:SVE_F
2355                         (match_operand:SVE_F 3 "register_operand")))]
2356           UNSPEC_MERGE_PTRUE))]
2357   "TARGET_SVE"
2358   {
2359     operands[4] = aarch64_ptrue_reg (<VPRED>mode);
2360   }
2361 )
2362
2363 ;; fms predicated with a PTRUE.
2364 (define_insn "*fms<mode>4"
2365   [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
2366         (unspec:SVE_F
2367           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2368            (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w, w")
2369                       (match_operand:SVE_F 4 "register_operand" "w, w, w")
2370                       (neg:SVE_F
2371                         (match_operand:SVE_F 2 "register_operand" "w, 0, w")))]
2372           UNSPEC_MERGE_PTRUE))]
2373   "TARGET_SVE"
2374   "@
2375    fnmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
2376    fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
2377    movprfx\t%0, %2\;fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
2378   [(set_attr "movprfx" "*,*,yes")]
2379 )
2380
2381 ;; Unpredicated fnms (%0 = (-%1 * %2) - %3).
2382 (define_expand "fnms<mode>4"
2383   [(set (match_operand:SVE_F 0 "register_operand")
2384         (unspec:SVE_F
2385           [(match_dup 4)
2386            (fma:SVE_F (neg:SVE_F
2387                         (match_operand:SVE_F 1 "register_operand"))
2388                       (match_operand:SVE_F 2 "register_operand")
2389                       (neg:SVE_F
2390                         (match_operand:SVE_F 3 "register_operand")))]
2391           UNSPEC_MERGE_PTRUE))]
2392   "TARGET_SVE"
2393   {
2394     operands[4] = aarch64_ptrue_reg (<VPRED>mode);
2395   }
2396 )
2397
2398 ;; fnms predicated with a PTRUE.
2399 (define_insn "*fnms<mode>4"
2400   [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
2401         (unspec:SVE_F
2402           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2403            (fma:SVE_F (neg:SVE_F
2404                         (match_operand:SVE_F 3 "register_operand" "%0, w, w"))
2405                       (match_operand:SVE_F 4 "register_operand" "w, w, w")
2406                       (neg:SVE_F
2407                         (match_operand:SVE_F 2 "register_operand" "w, 0, w")))]
2408           UNSPEC_MERGE_PTRUE))]
2409   "TARGET_SVE"
2410   "@
2411    fnmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
2412    fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
2413    movprfx\t%0, %2\;fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
2414   [(set_attr "movprfx" "*,*,yes")]
2415 )
2416
2417 ;; Unpredicated floating-point division.
2418 (define_expand "div<mode>3"
2419   [(set (match_operand:SVE_F 0 "register_operand")
2420         (unspec:SVE_F
2421           [(match_dup 3)
2422            (div:SVE_F (match_operand:SVE_F 1 "register_operand")
2423                       (match_operand:SVE_F 2 "register_operand"))]
2424           UNSPEC_MERGE_PTRUE))]
2425   "TARGET_SVE"
2426   {
2427     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
2428   }
2429 )
2430
2431 ;; Floating-point division predicated with a PTRUE.
2432 (define_insn "*div<mode>3"
2433   [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
2434         (unspec:SVE_F
2435           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2436            (div:SVE_F (match_operand:SVE_F 2 "register_operand" "0, w, w")
2437                       (match_operand:SVE_F 3 "register_operand" "w, 0, w"))]
2438           UNSPEC_MERGE_PTRUE))]
2439   "TARGET_SVE"
2440   "@
2441    fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2442    fdivr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
2443    movprfx\t%0, %2\;fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
2444   [(set_attr "movprfx" "*,*,yes")]
2445 )
2446
2447 ;; Unpredicated FNEG, FABS and FSQRT.
2448 (define_expand "<optab><mode>2"
2449   [(set (match_operand:SVE_F 0 "register_operand")
2450         (unspec:SVE_F
2451           [(match_dup 2)
2452            (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 1 "register_operand"))]
2453           UNSPEC_MERGE_PTRUE))]
2454   "TARGET_SVE"
2455   {
2456     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2457   }
2458 )
2459
2460 ;; FNEG, FABS and FSQRT predicated with a PTRUE.
2461 (define_insn "*<optab><mode>2"
2462   [(set (match_operand:SVE_F 0 "register_operand" "=w")
2463         (unspec:SVE_F
2464           [(match_operand:<VPRED> 1 "register_operand" "Upl")
2465            (SVE_FP_UNARY:SVE_F (match_operand:SVE_F 2 "register_operand" "w"))]
2466           UNSPEC_MERGE_PTRUE))]
2467   "TARGET_SVE"
2468   "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2469 )
2470
2471 (define_insn "*fabd<mode>3"
2472   [(set (match_operand:SVE_F 0 "register_operand" "=w")
2473         (unspec:SVE_F
2474           [(match_operand:<VPRED> 1 "register_operand" "Upl")
2475            (abs:SVE_F
2476             (minus:SVE_F
2477                 (match_operand:SVE_F 2 "register_operand" "0")
2478                 (match_operand:SVE_F 3 "register_operand" "w")))]
2479         UNSPEC_MERGE_PTRUE))]
2480   "TARGET_SVE"
2481   "fabd\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
2482 )
2483
2484 ;; Unpredicated FRINTy.
2485 (define_expand "<frint_pattern><mode>2"
2486   [(set (match_operand:SVE_F 0 "register_operand")
2487         (unspec:SVE_F
2488           [(match_dup 2)
2489            (unspec:SVE_F [(match_operand:SVE_F 1 "register_operand")]
2490                          FRINT)]
2491           UNSPEC_MERGE_PTRUE))]
2492   "TARGET_SVE"
2493   {
2494     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2495   }
2496 )
2497
2498 ;; FRINTy predicated with a PTRUE.
2499 (define_insn "*<frint_pattern><mode>2"
2500   [(set (match_operand:SVE_F 0 "register_operand" "=w")
2501         (unspec:SVE_F
2502           [(match_operand:<VPRED> 1 "register_operand" "Upl")
2503            (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "w")]
2504                          FRINT)]
2505           UNSPEC_MERGE_PTRUE))]
2506   "TARGET_SVE"
2507   "frint<frint_suffix>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2508 )
2509
2510 ;; Unpredicated conversion of floats to integers of the same size (HF to HI,
2511 ;; SF to SI or DF to DI).
2512 (define_expand "<fix_trunc_optab><mode><v_int_equiv>2"
2513   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2514         (unspec:<V_INT_EQUIV>
2515           [(match_dup 2)
2516            (FIXUORS:<V_INT_EQUIV>
2517              (match_operand:SVE_F 1 "register_operand"))]
2518           UNSPEC_MERGE_PTRUE))]
2519   "TARGET_SVE"
2520   {
2521     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2522   }
2523 )
2524
2525 ;; Conversion of SF to DI, SI or HI, predicated with a PTRUE.
2526 (define_insn "*<fix_trunc_optab>v16hsf<mode>2"
2527   [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
2528         (unspec:SVE_HSDI
2529           [(match_operand:<VPRED> 1 "register_operand" "Upl")
2530            (FIXUORS:SVE_HSDI
2531              (match_operand:VNx8HF 2 "register_operand" "w"))]
2532           UNSPEC_MERGE_PTRUE))]
2533   "TARGET_SVE"
2534   "fcvtz<su>\t%0.<Vetype>, %1/m, %2.h"
2535 )
2536
2537 ;; Conversion of SF to DI or SI, predicated with a PTRUE.
2538 (define_insn "*<fix_trunc_optab>vnx4sf<mode>2"
2539   [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
2540         (unspec:SVE_SDI
2541           [(match_operand:<VPRED> 1 "register_operand" "Upl")
2542            (FIXUORS:SVE_SDI
2543              (match_operand:VNx4SF 2 "register_operand" "w"))]
2544           UNSPEC_MERGE_PTRUE))]
2545   "TARGET_SVE"
2546   "fcvtz<su>\t%0.<Vetype>, %1/m, %2.s"
2547 )
2548
2549 ;; Conversion of DF to DI or SI, predicated with a PTRUE.
2550 (define_insn "*<fix_trunc_optab>vnx2df<mode>2"
2551   [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
2552         (unspec:SVE_SDI
2553           [(match_operand:VNx2BI 1 "register_operand" "Upl")
2554            (FIXUORS:SVE_SDI
2555              (match_operand:VNx2DF 2 "register_operand" "w"))]
2556           UNSPEC_MERGE_PTRUE))]
2557   "TARGET_SVE"
2558   "fcvtz<su>\t%0.<Vetype>, %1/m, %2.d"
2559 )
2560
2561 ;; Unpredicated conversion of integers to floats of the same size
2562 ;; (HI to HF, SI to SF or DI to DF).
2563 (define_expand "<optab><v_int_equiv><mode>2"
2564   [(set (match_operand:SVE_F 0 "register_operand")
2565         (unspec:SVE_F
2566           [(match_dup 2)
2567            (FLOATUORS:SVE_F
2568              (match_operand:<V_INT_EQUIV> 1 "register_operand"))]
2569           UNSPEC_MERGE_PTRUE))]
2570   "TARGET_SVE"
2571   {
2572     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2573   }
2574 )
2575
2576 ;; Conversion of DI, SI or HI to the same number of HFs, predicated
2577 ;; with a PTRUE.
2578 (define_insn "*<optab><mode>vnx8hf2"
2579   [(set (match_operand:VNx8HF 0 "register_operand" "=w")
2580         (unspec:VNx8HF
2581           [(match_operand:<VPRED> 1 "register_operand" "Upl")
2582            (FLOATUORS:VNx8HF
2583              (match_operand:SVE_HSDI 2 "register_operand" "w"))]
2584           UNSPEC_MERGE_PTRUE))]
2585   "TARGET_SVE"
2586   "<su_optab>cvtf\t%0.h, %1/m, %2.<Vetype>"
2587 )
2588
2589 ;; Conversion of DI or SI to the same number of SFs, predicated with a PTRUE.
2590 (define_insn "*<optab><mode>vnx4sf2"
2591   [(set (match_operand:VNx4SF 0 "register_operand" "=w")
2592         (unspec:VNx4SF
2593           [(match_operand:<VPRED> 1 "register_operand" "Upl")
2594            (FLOATUORS:VNx4SF
2595              (match_operand:SVE_SDI 2 "register_operand" "w"))]
2596           UNSPEC_MERGE_PTRUE))]
2597   "TARGET_SVE"
2598   "<su_optab>cvtf\t%0.s, %1/m, %2.<Vetype>"
2599 )
2600
2601 ;; Conversion of DI or SI to DF, predicated with a PTRUE.
2602 (define_insn "aarch64_sve_<optab><mode>vnx2df2"
2603   [(set (match_operand:VNx2DF 0 "register_operand" "=w")
2604         (unspec:VNx2DF
2605           [(match_operand:VNx2BI 1 "register_operand" "Upl")
2606            (FLOATUORS:VNx2DF
2607              (match_operand:SVE_SDI 2 "register_operand" "w"))]
2608           UNSPEC_MERGE_PTRUE))]
2609   "TARGET_SVE"
2610   "<su_optab>cvtf\t%0.d, %1/m, %2.<Vetype>"
2611 )
2612
2613 ;; Conversion of DFs to the same number of SFs, or SFs to the same number
2614 ;; of HFs.
2615 (define_insn "*trunc<Vwide><mode>2"
2616   [(set (match_operand:SVE_HSF 0 "register_operand" "=w")
2617         (unspec:SVE_HSF
2618           [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
2619            (unspec:SVE_HSF
2620              [(match_operand:<VWIDE> 2 "register_operand" "w")]
2621              UNSPEC_FLOAT_CONVERT)]
2622           UNSPEC_MERGE_PTRUE))]
2623   "TARGET_SVE"
2624   "fcvt\t%0.<Vetype>, %1/m, %2.<Vewtype>"
2625 )
2626
2627 ;; Conversion of SFs to the same number of DFs, or HFs to the same number
2628 ;; of SFs.
2629 (define_insn "aarch64_sve_extend<mode><Vwide>2"
2630   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2631         (unspec:<VWIDE>
2632           [(match_operand:<VWIDE_PRED> 1 "register_operand" "Upl")
2633            (unspec:<VWIDE>
2634              [(match_operand:SVE_HSF 2 "register_operand" "w")]
2635              UNSPEC_FLOAT_CONVERT)]
2636           UNSPEC_MERGE_PTRUE))]
2637   "TARGET_SVE"
2638   "fcvt\t%0.<Vewtype>, %1/m, %2.<Vetype>"
2639 )
2640
2641 ;; Unpack the low or high half of a predicate, where "high" refers to
2642 ;; the low-numbered lanes for big-endian and the high-numbered lanes
2643 ;; for little-endian.
2644 (define_expand "vec_unpack<su>_<perm_hilo>_<mode>"
2645   [(match_operand:<VWIDE> 0 "register_operand")
2646    (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")]
2647                    UNPACK)]
2648   "TARGET_SVE"
2649   {
2650     emit_insn ((<hi_lanes_optab>
2651                 ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode>
2652                 : gen_aarch64_sve_punpklo_<PRED_BHS:mode>)
2653                (operands[0], operands[1]));
2654     DONE;
2655   }
2656 )
2657
2658 ;; PUNPKHI and PUNPKLO.
2659 (define_insn "aarch64_sve_punpk<perm_hilo>_<mode>"
2660   [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa")
2661         (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")]
2662                         UNPACK_UNSIGNED))]
2663   "TARGET_SVE"
2664   "punpk<perm_hilo>\t%0.h, %1.b"
2665 )
2666
2667 ;; Unpack the low or high half of a vector, where "high" refers to
2668 ;; the low-numbered lanes for big-endian and the high-numbered lanes
2669 ;; for little-endian.
2670 (define_expand "vec_unpack<su>_<perm_hilo>_<SVE_BHSI:mode>"
2671   [(match_operand:<VWIDE> 0 "register_operand")
2672    (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand")] UNPACK)]
2673   "TARGET_SVE"
2674   {
2675     emit_insn ((<hi_lanes_optab>
2676                 ? gen_aarch64_sve_<su>unpkhi_<SVE_BHSI:mode>
2677                 : gen_aarch64_sve_<su>unpklo_<SVE_BHSI:mode>)
2678                (operands[0], operands[1]));
2679     DONE;
2680   }
2681 )
2682
2683 ;; SUNPKHI, UUNPKHI, SUNPKLO and UUNPKLO.
2684 (define_insn "aarch64_sve_<su>unpk<perm_hilo>_<SVE_BHSI:mode>"
2685   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2686         (unspec:<VWIDE> [(match_operand:SVE_BHSI 1 "register_operand" "w")]
2687                         UNPACK))]
2688   "TARGET_SVE"
2689   "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
2690 )
2691
2692 ;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF.
2693 ;; First unpack the source without conversion, then float-convert the
2694 ;; unpacked source.
2695 (define_expand "vec_unpacks_<perm_hilo>_<mode>"
2696   [(match_operand:<VWIDE> 0 "register_operand")
2697    (unspec:SVE_HSF [(match_operand:SVE_HSF 1 "register_operand")]
2698                    UNPACK_UNSIGNED)]
2699   "TARGET_SVE"
2700   {
2701     /* Use ZIP to do the unpack, since we don't care about the upper halves
2702        and since it has the nice property of not needing any subregs.
2703        If using UUNPK* turns out to be preferable, we could model it as
2704        a ZIP whose first operand is zero.  */
2705     rtx temp = gen_reg_rtx (<MODE>mode);
2706     emit_insn ((<hi_lanes_optab>
2707                 ? gen_aarch64_sve_zip2<mode>
2708                 : gen_aarch64_sve_zip1<mode>)
2709                 (temp, operands[1], operands[1]));
2710     rtx ptrue = aarch64_ptrue_reg (<VWIDE_PRED>mode);
2711     emit_insn (gen_aarch64_sve_extend<mode><Vwide>2 (operands[0],
2712                                                      ptrue, temp));
2713     DONE;
2714   }
2715 )
2716
2717 ;; Unpack one half of a VNx4SI to VNx2DF.  First unpack from VNx4SI
2718 ;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the
2719 ;; unpacked VNx4SI to VNx2DF.
2720 (define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si"
2721   [(match_operand:VNx2DF 0 "register_operand")
2722    (FLOATUORS:VNx2DF
2723      (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")]
2724                     UNPACK_UNSIGNED))]
2725   "TARGET_SVE"
2726   {
2727     /* Use ZIP to do the unpack, since we don't care about the upper halves
2728        and since it has the nice property of not needing any subregs.
2729        If using UUNPK* turns out to be preferable, we could model it as
2730        a ZIP whose first operand is zero.  */
2731     rtx temp = gen_reg_rtx (VNx4SImode);
2732     emit_insn ((<hi_lanes_optab>
2733                 ? gen_aarch64_sve_zip2vnx4si
2734                 : gen_aarch64_sve_zip1vnx4si)
2735                (temp, operands[1], operands[1]));
2736     rtx ptrue = aarch64_ptrue_reg (VNx2BImode);
2737     emit_insn (gen_aarch64_sve_<FLOATUORS:optab>vnx4sivnx2df2 (operands[0],
2738                                                                ptrue, temp));
2739     DONE;
2740   }
2741 )
2742
2743 ;; Predicate pack.  Use UZP1 on the narrower type, which discards
2744 ;; the high part of each wide element.
2745 (define_insn "vec_pack_trunc_<Vwide>"
2746   [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa")
2747         (unspec:PRED_BHS
2748           [(match_operand:<VWIDE> 1 "register_operand" "Upa")
2749            (match_operand:<VWIDE> 2 "register_operand" "Upa")]
2750           UNSPEC_PACK))]
2751   "TARGET_SVE"
2752   "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
2753 )
2754
2755 ;; Integer pack.  Use UZP1 on the narrower type, which discards
2756 ;; the high part of each wide element.
2757 (define_insn "vec_pack_trunc_<Vwide>"
2758   [(set (match_operand:SVE_BHSI 0 "register_operand" "=w")
2759         (unspec:SVE_BHSI
2760           [(match_operand:<VWIDE> 1 "register_operand" "w")
2761            (match_operand:<VWIDE> 2 "register_operand" "w")]
2762           UNSPEC_PACK))]
2763   "TARGET_SVE"
2764   "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
2765 )
2766
2767 ;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack
2768 ;; the results into a single vector.
2769 (define_expand "vec_pack_trunc_<Vwide>"
2770   [(set (match_dup 4)
2771         (unspec:SVE_HSF
2772           [(match_dup 3)
2773            (unspec:SVE_HSF [(match_operand:<VWIDE> 1 "register_operand")]
2774                            UNSPEC_FLOAT_CONVERT)]
2775           UNSPEC_MERGE_PTRUE))
2776    (set (match_dup 5)
2777         (unspec:SVE_HSF
2778           [(match_dup 3)
2779            (unspec:SVE_HSF [(match_operand:<VWIDE> 2 "register_operand")]
2780                            UNSPEC_FLOAT_CONVERT)]
2781           UNSPEC_MERGE_PTRUE))
2782    (set (match_operand:SVE_HSF 0 "register_operand")
2783         (unspec:SVE_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
2784   "TARGET_SVE"
2785   {
2786     operands[3] = aarch64_ptrue_reg (<VWIDE_PRED>mode);
2787     operands[4] = gen_reg_rtx (<MODE>mode);
2788     operands[5] = gen_reg_rtx (<MODE>mode);
2789   }
2790 )
2791
2792 ;; Convert two vectors of DF to SI and pack the results into a single vector.
2793 (define_expand "vec_pack_<su>fix_trunc_vnx2df"
2794   [(set (match_dup 4)
2795         (unspec:VNx4SI
2796           [(match_dup 3)
2797            (FIXUORS:VNx4SI (match_operand:VNx2DF 1 "register_operand"))]
2798           UNSPEC_MERGE_PTRUE))
2799    (set (match_dup 5)
2800         (unspec:VNx4SI
2801           [(match_dup 3)
2802            (FIXUORS:VNx4SI (match_operand:VNx2DF 2 "register_operand"))]
2803           UNSPEC_MERGE_PTRUE))
2804    (set (match_operand:VNx4SI 0 "register_operand")
2805         (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
2806   "TARGET_SVE"
2807   {
2808     operands[3] = aarch64_ptrue_reg (VNx2BImode);
2809     operands[4] = gen_reg_rtx (VNx4SImode);
2810     operands[5] = gen_reg_rtx (VNx4SImode);
2811   }
2812 )
2813
2814 ;; Predicated floating-point operations with select.
2815 (define_expand "cond_<optab><mode>"
2816   [(set (match_operand:SVE_F 0 "register_operand")
2817         (unspec:SVE_F
2818           [(match_operand:<VPRED> 1 "register_operand")
2819            (unspec:SVE_F
2820              [(match_operand:SVE_F 2 "register_operand")
2821               (match_operand:SVE_F 3 "register_operand")]
2822              SVE_COND_FP_BINARY)
2823            (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
2824           UNSPEC_SEL))]
2825   "TARGET_SVE"
2826 )
2827
2828 ;; Predicated floating-point operations with select matching first operand.
2829 (define_insn "*cond_<optab><mode>_2"
2830   [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
2831         (unspec:SVE_F
2832           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2833            (unspec:SVE_F
2834              [(match_operand:SVE_F 2 "register_operand" "0, w")
2835               (match_operand:SVE_F 3 "register_operand" "w, w")]
2836              SVE_COND_FP_BINARY)
2837            (match_dup 2)]
2838           UNSPEC_SEL))]
2839   "TARGET_SVE"
2840   "@
2841    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2842    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
2843   [(set_attr "movprfx" "*,yes")]
2844 )
2845
2846 ;; Predicated floating-point operations with select matching second operand.
2847 (define_insn "*cond_<optab><mode>_3"
2848   [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
2849         (unspec:SVE_F
2850           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2851            (unspec:SVE_F
2852              [(match_operand:SVE_F 2 "register_operand" "w, w")
2853               (match_operand:SVE_F 3 "register_operand" "0, w")]
2854              SVE_COND_FP_BINARY)
2855            (match_dup 3)]
2856           UNSPEC_SEL))]
2857   "TARGET_SVE"
2858   "@
2859    <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
2860    movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
2861   [(set_attr "movprfx" "*,yes")]
2862 )
2863
2864 ;; Predicated floating-point binary operations in which the values of
2865 ;; inactive lanes are distinct from the other inputs.
2866 (define_insn_and_rewrite "*cond_<optab><mode>_any"
2867   [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
2868         (unspec:SVE_F
2869           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
2870            (unspec:SVE_F
2871              [(match_operand:SVE_F 2 "register_operand" "0, w, w, w, w")
2872               (match_operand:SVE_F 3 "register_operand" "w, 0, w, w, w")]
2873              SVE_COND_FP_BINARY)
2874            (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
2875           UNSPEC_SEL))]
2876   "TARGET_SVE
2877    && !rtx_equal_p (operands[2], operands[4])
2878    && !rtx_equal_p (operands[3], operands[4])"
2879   "@
2880    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2881    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
2882    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2883    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2884    #"
2885   "&& reload_completed
2886    && register_operand (operands[4], <MODE>mode)
2887    && !rtx_equal_p (operands[0], operands[4])"
2888   {
2889     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
2890                                              operands[4], operands[1]));
2891     operands[4] = operands[2] = operands[0];
2892   }
2893   [(set_attr "movprfx" "yes")]
2894 )
2895
2896 ;; Predicated floating-point ternary operations with select.
2897 (define_expand "cond_<optab><mode>"
2898   [(set (match_operand:SVE_F 0 "register_operand")
2899         (unspec:SVE_F
2900           [(match_operand:<VPRED> 1 "register_operand")
2901            (unspec:SVE_F
2902              [(match_operand:SVE_F 2 "register_operand")
2903               (match_operand:SVE_F 3 "register_operand")
2904               (match_operand:SVE_F 4 "register_operand")]
2905              SVE_COND_FP_TERNARY)
2906            (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero")]
2907           UNSPEC_SEL))]
2908   "TARGET_SVE"
2909 {
2910   /* Swap the multiplication operands if the fallback value is the
2911      second of the two.  */
2912   if (rtx_equal_p (operands[3], operands[5]))
2913     std::swap (operands[2], operands[3]);
2914 })
2915
2916 ;; Predicated floating-point ternary operations using the FMAD-like form.
2917 (define_insn "*cond_<optab><mode>_2"
2918   [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
2919         (unspec:SVE_F
2920           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2921            (unspec:SVE_F
2922              [(match_operand:SVE_F 2 "register_operand" "0, w")
2923               (match_operand:SVE_F 3 "register_operand" "w, w")
2924               (match_operand:SVE_F 4 "register_operand" "w, w")]
2925              SVE_COND_FP_TERNARY)
2926            (match_dup 2)]
2927           UNSPEC_SEL))]
2928   "TARGET_SVE"
2929   "@
2930    <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
2931    movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
2932   [(set_attr "movprfx" "*,yes")]
2933 )
2934
2935 ;; Predicated floating-point ternary operations using the FMLA-like form.
2936 (define_insn "*cond_<optab><mode>_4"
2937   [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
2938         (unspec:SVE_F
2939           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2940            (unspec:SVE_F
2941              [(match_operand:SVE_F 2 "register_operand" "w, w")
2942               (match_operand:SVE_F 3 "register_operand" "w, w")
2943               (match_operand:SVE_F 4 "register_operand" "0, w")]
2944              SVE_COND_FP_TERNARY)
2945            (match_dup 4)]
2946           UNSPEC_SEL))]
2947   "TARGET_SVE"
2948   "@
2949    <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
2950    movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
2951   [(set_attr "movprfx" "*,yes")]
2952 )
2953
2954 ;; Predicated floating-point ternary operations in which the value for
2955 ;; inactive lanes is distinct from the other inputs.
2956 (define_insn_and_rewrite "*cond_<optab><mode>_any"
2957   [(set (match_operand:SVE_F 0 "register_operand" "=&w, &w, ?&w")
2958         (unspec:SVE_F
2959           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2960            (unspec:SVE_F
2961              [(match_operand:SVE_F 2 "register_operand" "w, w, w")
2962               (match_operand:SVE_F 3 "register_operand" "w, w, w")
2963               (match_operand:SVE_F 4 "register_operand" "w, w, w")]
2964              SVE_COND_FP_TERNARY)
2965            (match_operand:SVE_F 5 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
2966           UNSPEC_SEL))]
2967   "TARGET_SVE
2968    && !rtx_equal_p (operands[2], operands[5])
2969    && !rtx_equal_p (operands[3], operands[5])
2970    && !rtx_equal_p (operands[4], operands[5])"
2971   "@
2972    movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
2973    movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
2974    #"
2975   "&& reload_completed
2976    && !CONSTANT_P (operands[5])
2977    && !rtx_equal_p (operands[0], operands[5])"
2978   {
2979     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
2980                                              operands[5], operands[1]));
2981     operands[5] = operands[4] = operands[0];
2982   }
2983   [(set_attr "movprfx" "yes")]
2984 )
2985
2986 ;; Shift an SVE vector left and insert a scalar into element 0.
2987 (define_insn "vec_shl_insert_<mode>"
2988   [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
2989         (unspec:SVE_ALL
2990           [(match_operand:SVE_ALL 1 "register_operand" "0, 0")
2991            (match_operand:<VEL> 2 "register_operand" "rZ, w")]
2992           UNSPEC_INSR))]
2993   "TARGET_SVE"
2994   "@
2995    insr\t%0.<Vetype>, %<vwcore>2
2996    insr\t%0.<Vetype>, %<Vetype>2"
2997 )
2998
2999 (define_expand "copysign<mode>3"
3000   [(match_operand:SVE_F 0 "register_operand")
3001    (match_operand:SVE_F 1 "register_operand")
3002    (match_operand:SVE_F 2 "register_operand")]
3003   "TARGET_SVE"
3004   {
3005     rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
3006     rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode);
3007     rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
3008     int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
3009
3010     rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
3011     rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
3012
3013     emit_insn (gen_and<v_int_equiv>3
3014                (sign, arg2,
3015                 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
3016                                                    HOST_WIDE_INT_M1U
3017                                                    << bits)));
3018     emit_insn (gen_and<v_int_equiv>3
3019                (mant, arg1,
3020                 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
3021                                                    ~(HOST_WIDE_INT_M1U
3022                                                      << bits))));
3023     emit_insn (gen_ior<v_int_equiv>3 (int_res, sign, mant));
3024     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
3025     DONE;
3026   }
3027 )
3028
3029 (define_expand "xorsign<mode>3"
3030   [(match_operand:SVE_F 0 "register_operand")
3031    (match_operand:SVE_F 1 "register_operand")
3032    (match_operand:SVE_F 2 "register_operand")]
3033   "TARGET_SVE"
3034   {
3035     rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
3036     rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
3037     int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
3038
3039     rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
3040     rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
3041
3042     emit_insn (gen_and<v_int_equiv>3
3043                (sign, arg2,
3044                 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
3045                                                    HOST_WIDE_INT_M1U
3046                                                    << bits)));
3047     emit_insn (gen_xor<v_int_equiv>3 (int_res, arg1, sign));
3048     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
3049     DONE;
3050   }
3051 )
3052
3053 ;; Unpredicated DOT product.
3054 (define_insn "<sur>dot_prod<vsi2qi>"
3055   [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
3056         (plus:SVE_SDI
3057           (unspec:SVE_SDI
3058             [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
3059              (match_operand:<VSI2QI> 2 "register_operand" "w, w")]
3060             DOTPROD)
3061           (match_operand:SVE_SDI 3 "register_operand" "0, w")))]
3062   "TARGET_SVE"
3063   "@
3064    <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>
3065    movprfx\t%0, %3\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>"
3066   [(set_attr "movprfx" "*,yes")]
3067 )
3068
3069 ;; Unpredicated integer absolute difference.
3070 (define_expand "<su>abd<mode>_3"
3071   [(use (match_operand:SVE_I 0 "register_operand"))
3072    (USMAX:SVE_I (match_operand:SVE_I 1 "register_operand")
3073                 (match_operand:SVE_I 2 "register_operand"))]
3074   "TARGET_SVE"
3075   {
3076     rtx pred = aarch64_ptrue_reg (<VPRED>mode);
3077     emit_insn (gen_aarch64_<su>abd<mode>_3 (operands[0], pred, operands[1],
3078                                             operands[2]));
3079     DONE;
3080   }
3081 )
3082
3083 ;; Predicated integer absolute difference.
3084 (define_insn "aarch64_<su>abd<mode>_3"
3085   [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
3086         (unspec:SVE_I
3087           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3088            (minus:SVE_I
3089              (USMAX:SVE_I
3090                (match_operand:SVE_I 2 "register_operand" "0, w")
3091                (match_operand:SVE_I 3 "register_operand" "w, w"))
3092              (<max_opp>:SVE_I
3093                (match_dup 2)
3094                (match_dup 3)))]
3095           UNSPEC_MERGE_PTRUE))]
3096   "TARGET_SVE"
3097   "@
3098    <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3099    movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3100   [(set_attr "movprfx" "*,yes")]
3101 )
3102
3103 ;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in
3104 ;; operands 1 and 2.  The sequence also has to perform a widening reduction of
3105 ;; the difference into a vector and accumulate that into operand 3 before
3106 ;; copying that into the result operand 0.
3107 ;; Perform that with a sequence of:
3108 ;; MOV          ones.b, #1
3109 ;; [SU]ABD      diff.b, p0/m, op1.b, op2.b
3110 ;; MOVPRFX      op0, op3        // If necessary
3111 ;; UDOT         op0.s, diff.b, ones.b
3112
3113 (define_expand "<sur>sad<vsi2qi>"
3114   [(use (match_operand:SVE_SDI 0 "register_operand"))
3115    (unspec:<VSI2QI> [(use (match_operand:<VSI2QI> 1 "register_operand"))
3116                     (use (match_operand:<VSI2QI> 2 "register_operand"))] ABAL)
3117    (use (match_operand:SVE_SDI 3 "register_operand"))]
3118   "TARGET_SVE"
3119   {
3120     rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode));
3121     rtx diff = gen_reg_rtx (<VSI2QI>mode);
3122     emit_insn (gen_<sur>abd<vsi2qi>_3 (diff, operands[1], operands[2]));
3123     emit_insn (gen_udot_prod<vsi2qi> (operands[0], diff, ones, operands[3]));
3124     DONE;
3125   }
3126 )
3127
3128 ;; Standard pattern name vec_init<mode><Vel>.
3129 (define_expand "vec_init<mode><Vel>"
3130   [(match_operand:SVE_ALL 0 "register_operand" "")
3131     (match_operand 1 "" "")]
3132   "TARGET_SVE"
3133   {
3134     aarch64_sve_expand_vector_init (operands[0], operands[1]);
3135     DONE;
3136   }
3137 )