gcc/config/aarch64/aarch64-simd.md

   1 ;; Machine description for AArch64 AdvSIMD architecture.
   2 ;; Copyright (C) 2011-2021 Free Software Foundation, Inc.
   3 ;; Contributed by ARM Ltd.
   4 ;;
   5 ;; This file is part of GCC.
   6 ;;
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published by
   9 ;; the Free Software Foundation; either version 3, or (at your option)
  10 ;; any later version.
  11 ;;
  12 ;; GCC is distributed in the hope that it will be useful, but
  13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 ;; General Public License for more details.
  16 ;;
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 (define_expand "mov<mode>"
  22   [(set (match_operand:VALL_F16MOV 0 "nonimmediate_operand")
  23         (match_operand:VALL_F16MOV 1 "general_operand"))]
  24   "TARGET_SIMD"
  25   "
  26   /* Force the operand into a register if it is not an
  27      immediate whose use can be replaced with xzr.
  28      If the mode is 16 bytes wide, then we will be doing
  29      a stp in DI mode, so we check the validity of that.
  30      If the mode is 8 bytes wide, then we will do doing a
  31      normal str, so the check need not apply.  */
  32   if (GET_CODE (operands[0]) == MEM
  33       && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
  34            && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
  35                 && aarch64_mem_pair_operand (operands[0], DImode))
  36                || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
  37       operands[1] = force_reg (<MODE>mode, operands[1]);
  38
  39   /* If a constant is too complex to force to memory (e.g. because it
  40      contains CONST_POLY_INTs), build it up from individual elements instead.
  41      We should only need to do this before RA; aarch64_legitimate_constant_p
  42      should ensure that we don't try to rematerialize the constant later.  */
  43   if (GET_CODE (operands[1]) == CONST_VECTOR
  44       && targetm.cannot_force_const_mem (<MODE>mode, operands[1]))
  45     {
  46       aarch64_expand_vector_init (operands[0], operands[1]);
  47       DONE;
  48     }
  49   "
  50 )
  51
  52 (define_expand "movmisalign<mode>"
  53   [(set (match_operand:VALL 0 "nonimmediate_operand")
  54         (match_operand:VALL 1 "general_operand"))]
  55   "TARGET_SIMD && !STRICT_ALIGNMENT"
  56 {
  57   /* This pattern is not permitted to fail during expansion: if both arguments
  58      are non-registers (e.g. memory := constant, which can be created by the
  59      auto-vectorizer), force operand 1 into a register.  */
  60   if (!register_operand (operands[0], <MODE>mode)
  61       && !register_operand (operands[1], <MODE>mode))
  62     operands[1] = force_reg (<MODE>mode, operands[1]);
  63 })
  64
  65 (define_insn "aarch64_simd_dup<mode>"
  66   [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
  67         (vec_duplicate:VDQ_I
  68           (match_operand:<VEL> 1 "register_operand" "w,?r")))]
  69   "TARGET_SIMD"
  70   "@
  71    dup\\t%0.<Vtype>, %1.<Vetype>[0]
  72    dup\\t%0.<Vtype>, %<vwcore>1"
  73   [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
  74 )
  75
  76 (define_insn "aarch64_simd_dup<mode>"
  77   [(set (match_operand:VDQF_F16 0 "register_operand" "=w,w")
  78         (vec_duplicate:VDQF_F16
  79           (match_operand:<VEL> 1 "register_operand" "w,r")))]
  80   "TARGET_SIMD"
  81   "@
  82    dup\\t%0.<Vtype>, %1.<Vetype>[0]
  83    dup\\t%0.<Vtype>, %<vwcore>1"
  84   [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
  85 )
  86
  87 (define_insn "aarch64_dup_lane<mode>"
  88   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
  89         (vec_duplicate:VALL_F16
  90           (vec_select:<VEL>
  91             (match_operand:VALL_F16 1 "register_operand" "w")
  92             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  93           )))]
  94   "TARGET_SIMD"
  95   {
  96     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
  97     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
  98   }
  99   [(set_attr "type" "neon_dup<q>")]
 100 )
 101
 102 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
 103   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
 104         (vec_duplicate:VALL_F16_NO_V2Q
 105           (vec_select:<VEL>
 106             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
 107             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
 108           )))]
 109   "TARGET_SIMD"
 110   {
 111     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
 112     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
 113   }
 114   [(set_attr "type" "neon_dup<q>")]
 115 )
 116
 117 (define_insn "*aarch64_simd_mov<VDMOV:mode>"
 118   [(set (match_operand:VDMOV 0 "nonimmediate_operand"
 119                 "=w, m,  m,  w, ?r, ?w, ?r, w")
 120         (match_operand:VDMOV 1 "general_operand"
 121                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 122   "TARGET_SIMD
 123    && (register_operand (operands[0], <MODE>mode)
 124        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 125 {
 126    switch (which_alternative)
 127      {
 128      case 0: return "ldr\t%d0, %1";
 129      case 1: return "str\txzr, %0";
 130      case 2: return "str\t%d1, %0";
 131      case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 132      case 4: return "umov\t%0, %1.d[0]";
 133      case 5: return "fmov\t%d0, %1";
 134      case 6: return "mov\t%0, %1";
 135      case 7:
 136         return aarch64_output_simd_mov_immediate (operands[1], 64);
 137      default: gcc_unreachable ();
 138      }
 139 }
 140   [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
 141                      neon_logic<q>, neon_to_gp<q>, f_mcr,\
 142                      mov_reg, neon_move<q>")]
 143 )
 144
 145 (define_insn "*aarch64_simd_mov<VQMOV:mode>"
 146   [(set (match_operand:VQMOV 0 "nonimmediate_operand"
 147                 "=w, Umn,  m,  w, ?r, ?w, ?r, w")
 148         (match_operand:VQMOV 1 "general_operand"
 149                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 150   "TARGET_SIMD
 151    && (register_operand (operands[0], <MODE>mode)
 152        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 153 {
 154   switch (which_alternative)
 155     {
 156     case 0:
 157         return "ldr\t%q0, %1";
 158     case 1:
 159         return "stp\txzr, xzr, %0";
 160     case 2:
 161         return "str\t%q1, %0";
 162     case 3:
 163         return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 164     case 4:
 165     case 5:
 166     case 6:
 167         return "#";
 168     case 7:
 169         return aarch64_output_simd_mov_immediate (operands[1], 128);
 170     default:
 171         gcc_unreachable ();
 172     }
 173 }
 174   [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
 175                      neon_logic<q>, multiple, multiple,\
 176                      multiple, neon_move<q>")
 177    (set_attr "length" "4,4,4,4,8,8,8,4")]
 178 )
 179
 180 ;; When storing lane zero we can use the normal STR and its more permissive
 181 ;; addressing modes.
 182
 183 (define_insn "aarch64_store_lane0<mode>"
 184   [(set (match_operand:<VEL> 0 "memory_operand" "=m")
 185         (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
 186                         (parallel [(match_operand 2 "const_int_operand" "n")])))]
 187   "TARGET_SIMD
 188    && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
 189   "str\\t%<Vetype>1, %0"
 190   [(set_attr "type" "neon_store1_1reg<q>")]
 191 )
 192
 193 (define_insn "load_pair<DREG:mode><DREG2:mode>"
 194   [(set (match_operand:DREG 0 "register_operand" "=w")
 195         (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
 196    (set (match_operand:DREG2 2 "register_operand" "=w")
 197         (match_operand:DREG2 3 "memory_operand" "m"))]
 198   "TARGET_SIMD
 199    && rtx_equal_p (XEXP (operands[3], 0),
 200                    plus_constant (Pmode,
 201                                   XEXP (operands[1], 0),
 202                                   GET_MODE_SIZE (<DREG:MODE>mode)))"
 203   "ldp\\t%d0, %d2, %z1"
 204   [(set_attr "type" "neon_ldp")]
 205 )
 206
 207 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
 208   [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
 209         (match_operand:DREG 1 "register_operand" "w"))
 210    (set (match_operand:DREG2 2 "memory_operand" "=m")
 211         (match_operand:DREG2 3 "register_operand" "w"))]
 212   "TARGET_SIMD
 213    && rtx_equal_p (XEXP (operands[2], 0),
 214                    plus_constant (Pmode,
 215                                   XEXP (operands[0], 0),
 216                                   GET_MODE_SIZE (<DREG:MODE>mode)))"
 217   "stp\\t%d1, %d3, %z0"
 218   [(set_attr "type" "neon_stp")]
 219 )
 220
 221 (define_insn "load_pair<VQ:mode><VQ2:mode>"
 222   [(set (match_operand:VQ 0 "register_operand" "=w")
 223         (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
 224    (set (match_operand:VQ2 2 "register_operand" "=w")
 225         (match_operand:VQ2 3 "memory_operand" "m"))]
 226   "TARGET_SIMD
 227     && rtx_equal_p (XEXP (operands[3], 0),
 228                     plus_constant (Pmode,
 229                                XEXP (operands[1], 0),
 230                                GET_MODE_SIZE (<VQ:MODE>mode)))"
 231   "ldp\\t%q0, %q2, %z1"
 232   [(set_attr "type" "neon_ldp_q")]
 233 )
 234
 235 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
 236   [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
 237         (match_operand:VQ 1 "register_operand" "w"))
 238    (set (match_operand:VQ2 2 "memory_operand" "=m")
 239         (match_operand:VQ2 3 "register_operand" "w"))]
 240   "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
 241                 plus_constant (Pmode,
 242                                XEXP (operands[0], 0),
 243                                GET_MODE_SIZE (<VQ:MODE>mode)))"
 244   "stp\\t%q1, %q3, %z0"
 245   [(set_attr "type" "neon_stp_q")]
 246 )
 247
 248
 249 (define_split
 250   [(set (match_operand:VQMOV 0 "register_operand" "")
 251       (match_operand:VQMOV 1 "register_operand" ""))]
 252   "TARGET_SIMD && reload_completed
 253    && GP_REGNUM_P (REGNO (operands[0]))
 254    && GP_REGNUM_P (REGNO (operands[1]))"
 255   [(const_int 0)]
 256 {
 257   aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
 258   DONE;
 259 })
 260
 261 (define_split
 262   [(set (match_operand:VQMOV 0 "register_operand" "")
 263         (match_operand:VQMOV 1 "register_operand" ""))]
 264   "TARGET_SIMD && reload_completed
 265    && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
 266        || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
 267   [(const_int 0)]
 268 {
 269   aarch64_split_simd_move (operands[0], operands[1]);
 270   DONE;
 271 })
 272
 273 (define_expand "@aarch64_split_simd_mov<mode>"
 274   [(set (match_operand:VQMOV 0)
 275         (match_operand:VQMOV 1))]
 276   "TARGET_SIMD"
 277   {
 278     rtx dst = operands[0];
 279     rtx src = operands[1];
 280
 281     if (GP_REGNUM_P (REGNO (src)))
 282       {
 283         rtx src_low_part = gen_lowpart (<VHALF>mode, src);
 284         rtx src_high_part = gen_highpart (<VHALF>mode, src);
 285
 286         emit_insn
 287           (gen_move_lo_quad_<mode> (dst, src_low_part));
 288         emit_insn
 289           (gen_move_hi_quad_<mode> (dst, src_high_part));
 290       }
 291
 292     else
 293       {
 294         rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
 295         rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
 296         rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
 297         rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
 298         emit_insn (gen_aarch64_get_half<mode> (dst_low_part, src, lo));
 299         emit_insn (gen_aarch64_get_half<mode> (dst_high_part, src, hi));
 300       }
 301     DONE;
 302   }
 303 )
 304
 305 (define_expand "aarch64_get_half<mode>"
 306   [(set (match_operand:<VHALF> 0 "register_operand")
 307         (vec_select:<VHALF>
 308           (match_operand:VQMOV 1 "register_operand")
 309           (match_operand 2 "ascending_int_parallel")))]
 310   "TARGET_SIMD"
 311 )
 312
 313 (define_expand "aarch64_get_low<mode>"
 314   [(match_operand:<VHALF> 0 "register_operand")
 315    (match_operand:VQMOV 1 "register_operand")]
 316   "TARGET_SIMD"
 317   {
 318     rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
 319     emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], lo));
 320     DONE;
 321   }
 322 )
 323
 324 (define_expand "aarch64_get_high<mode>"
 325   [(match_operand:<VHALF> 0 "register_operand")
 326    (match_operand:VQMOV 1 "register_operand")]
 327   "TARGET_SIMD"
 328   {
 329     rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
 330     emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], hi));
 331     DONE;
 332   }
 333 )
 334
 335 (define_insn_and_split "aarch64_simd_mov_from_<mode>low"
 336   [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
 337         (vec_select:<VHALF>
 338           (match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
 339           (match_operand:VQMOV_NO2E 2 "vect_par_cnst_lo_half" "")))]
 340   "TARGET_SIMD"
 341   "@
 342    #
 343    umov\t%0, %1.d[0]"
 344   "&& reload_completed && aarch64_simd_register (operands[0], <VHALF>mode)"
 345   [(set (match_dup 0) (match_dup 1))]
 346   {
 347     operands[1] = aarch64_replace_reg_mode (operands[1], <VHALF>mode);
 348   }
 349   [(set_attr "type" "mov_reg,neon_to_gp<q>")
 350    (set_attr "length" "4")]
 351 )
 352
 353 (define_insn "aarch64_simd_mov_from_<mode>high"
 354   [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
 355         (vec_select:<VHALF>
 356           (match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
 357           (match_operand:VQMOV_NO2E 2 "vect_par_cnst_hi_half" "")))]
 358   "TARGET_SIMD"
 359   "@
 360    dup\\t%d0, %1.d[1]
 361    umov\t%0, %1.d[1]"
 362   [(set_attr "type" "neon_dup<q>,neon_to_gp<q>")
 363    (set_attr "length" "4")]
 364 )
 365
 366 (define_insn "orn<mode>3"
 367  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 368        (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 369                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 370  "TARGET_SIMD"
 371  "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 372   [(set_attr "type" "neon_logic<q>")]
 373 )
 374
 375 (define_insn "bic<mode>3"
 376  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 377        (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 378                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 379  "TARGET_SIMD"
 380  "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 381   [(set_attr "type" "neon_logic<q>")]
 382 )
 383
 384 (define_insn "add<mode>3"
 385   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 386         (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 387                   (match_operand:VDQ_I 2 "register_operand" "w")))]
 388   "TARGET_SIMD"
 389   "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 390   [(set_attr "type" "neon_add<q>")]
 391 )
 392
 393 (define_insn "sub<mode>3"
 394   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 395         (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 396                    (match_operand:VDQ_I 2 "register_operand" "w")))]
 397   "TARGET_SIMD"
 398   "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 399   [(set_attr "type" "neon_sub<q>")]
 400 )
 401
 402 (define_insn "mul<mode>3"
 403   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 404         (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
 405                    (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
 406   "TARGET_SIMD"
 407   "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 408   [(set_attr "type" "neon_mul_<Vetype><q>")]
 409 )
 410
 411 (define_insn "bswap<mode>2"
 412   [(set (match_operand:VDQHSD 0 "register_operand" "=w")
 413         (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
 414   "TARGET_SIMD"
 415   "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
 416   [(set_attr "type" "neon_rev<q>")]
 417 )
 418
 419 (define_insn "aarch64_rbit<mode>"
 420   [(set (match_operand:VB 0 "register_operand" "=w")
 421         (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
 422                    UNSPEC_RBIT))]
 423   "TARGET_SIMD"
 424   "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
 425   [(set_attr "type" "neon_rbit")]
 426 )
 427
 428 (define_expand "ctz<mode>2"
 429   [(set (match_operand:VS 0 "register_operand")
 430         (ctz:VS (match_operand:VS 1 "register_operand")))]
 431   "TARGET_SIMD"
 432   {
 433      emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
 434      rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
 435                                              <MODE>mode, 0);
 436      emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
 437      emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
 438      DONE;
 439   }
 440 )
 441
 442 (define_expand "xorsign<mode>3"
 443   [(match_operand:VHSDF 0 "register_operand")
 444    (match_operand:VHSDF 1 "register_operand")
 445    (match_operand:VHSDF 2 "register_operand")]
 446   "TARGET_SIMD"
 447 {
 448
 449   machine_mode imode = <V_INT_EQUIV>mode;
 450   rtx v_bitmask = gen_reg_rtx (imode);
 451   rtx op1x = gen_reg_rtx (imode);
 452   rtx op2x = gen_reg_rtx (imode);
 453
 454   rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
 455   rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
 456
 457   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 458
 459   emit_move_insn (v_bitmask,
 460                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 461                                                      HOST_WIDE_INT_M1U << bits));
 462
 463   emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
 464   emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
 465   emit_move_insn (operands[0],
 466                   lowpart_subreg (<MODE>mode, op1x, imode));
 467   DONE;
 468 }
 469 )
 470
 471 ;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
 472 ;; fact that their usage need to guarantee that the source vectors are
 473 ;; contiguous.  It would be wrong to describe the operation without being able
 474 ;; to describe the permute that is also required, but even if that is done
 475 ;; the permute would have been created as a LOAD_LANES which means the values
 476 ;; in the registers are in the wrong order.
 477 (define_insn "aarch64_fcadd<rot><mode>"
 478   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 479         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
 480                        (match_operand:VHSDF 2 "register_operand" "w")]
 481                        FCADD))]
 482   "TARGET_COMPLEX"
 483   "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
 484   [(set_attr "type" "neon_fcadd")]
 485 )
 486
 487 (define_expand "cadd<rot><mode>3"
 488   [(set (match_operand:VHSDF 0 "register_operand")
 489         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
 490                        (match_operand:VHSDF 2 "register_operand")]
 491                        FCADD))]
 492   "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
 493 )
 494
 495 (define_insn "aarch64_fcmla<rot><mode>"
 496   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 497         (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
 498                     (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
 499                                    (match_operand:VHSDF 3 "register_operand" "w")]
 500                                    FCMLA)))]
 501   "TARGET_COMPLEX"
 502   "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
 503   [(set_attr "type" "neon_fcmla")]
 504 )
 505
 506
 507 (define_insn "aarch64_fcmla_lane<rot><mode>"
 508   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 509         (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
 510                     (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
 511                                    (match_operand:VHSDF 3 "register_operand" "w")
 512                                    (match_operand:SI 4 "const_int_operand" "n")]
 513                                    FCMLA)))]
 514   "TARGET_COMPLEX"
 515 {
 516   operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
 517   return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
 518 }
 519   [(set_attr "type" "neon_fcmla")]
 520 )
 521
 522 (define_insn "aarch64_fcmla_laneq<rot>v4hf"
 523   [(set (match_operand:V4HF 0 "register_operand" "=w")
 524         (plus:V4HF (match_operand:V4HF 1 "register_operand" "0")
 525                    (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
 526                                  (match_operand:V8HF 3 "register_operand" "w")
 527                                  (match_operand:SI 4 "const_int_operand" "n")]
 528                                  FCMLA)))]
 529   "TARGET_COMPLEX"
 530 {
 531   operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
 532   return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
 533 }
 534   [(set_attr "type" "neon_fcmla")]
 535 )
 536
 537 (define_insn "aarch64_fcmlaq_lane<rot><mode>"
 538   [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
 539         (plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0")
 540                      (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
 541                                      (match_operand:<VHALF> 3 "register_operand" "w")
 542                                      (match_operand:SI 4 "const_int_operand" "n")]
 543                                      FCMLA)))]
 544   "TARGET_COMPLEX"
 545 {
 546   int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
 547   operands[4]
 548     = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
 549   return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
 550 }
 551   [(set_attr "type" "neon_fcmla")]
 552 )
 553
 554 ;; The complex mla/mls operations always need to expand to two instructions.
 555 ;; The first operation does half the computation and the second does the
 556 ;; remainder.  Because of this, expand early.
 557 (define_expand "cml<fcmac1><conj_op><mode>4"
 558   [(set (match_operand:VHSDF 0 "register_operand")
 559         (plus:VHSDF (match_operand:VHSDF 1 "register_operand")
 560                     (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand")
 561                                    (match_operand:VHSDF 3 "register_operand")]
 562                                    FCMLA_OP)))]
 563   "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
 564 {
 565   rtx tmp = gen_reg_rtx (<MODE>mode);
 566   emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[1],
 567                                                  operands[3], operands[2]));
 568   emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], tmp,
 569                                                  operands[3], operands[2]));
 570   DONE;
 571 })
 572
 573 ;; The complex mul operations always need to expand to two instructions.
 574 ;; The first operation does half the computation and the second does the
 575 ;; remainder.  Because of this, expand early.
 576 (define_expand "cmul<conj_op><mode>3"
 577   [(set (match_operand:VHSDF 0 "register_operand")
 578         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
 579                        (match_operand:VHSDF 2 "register_operand")]
 580                        FCMUL_OP))]
 581   "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
 582 {
 583   rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
 584   rtx res1 = gen_reg_rtx (<MODE>mode);
 585   emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (res1, tmp,
 586                                                  operands[2], operands[1]));
 587   emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], res1,
 588                                                  operands[2], operands[1]));
 589   DONE;
 590 })
 591
 592 ;; These expands map to the Dot Product optab the vectorizer checks for
 593 ;; and to the intrinsics patttern.
 594 ;; The auto-vectorizer expects a dot product builtin that also does an
 595 ;; accumulation into the provided register.
 596 ;; Given the following pattern
 597 ;;
 598 ;; for (i=0; i<len; i++) {
 599 ;;     c = a[i] * b[i];
 600 ;;     r += c;
 601 ;; }
 602 ;; return result;
 603 ;;
 604 ;; This can be auto-vectorized to
 605 ;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
 606 ;;
 607 ;; given enough iterations.  However the vectorizer can keep unrolling the loop
 608 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
 609 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
 610 ;; ...
 611 ;;
 612 ;; and so the vectorizer provides r, in which the result has to be accumulated.
 613 (define_insn "<sur>dot_prod<vsi2qi>"
 614   [(set (match_operand:VS 0 "register_operand" "=w")
 615         (plus:VS
 616           (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w")
 617                       (match_operand:<VSI2QI> 2 "register_operand" "w")]
 618                       DOTPROD)
 619           (match_operand:VS 3 "register_operand" "0")))]
 620   "TARGET_DOTPROD"
 621   "<sur>dot\\t%0.<Vtype>, %1.<Vdottype>, %2.<Vdottype>"
 622   [(set_attr "type" "neon_dot<q>")]
 623 )
 624
 625 ;; These instructions map to the __builtins for the Armv8.6-a I8MM usdot
 626 ;; (vector) Dot Product operation and the vectorized optab.
 627 (define_insn "usdot_prod<vsi2qi>"
 628   [(set (match_operand:VS 0 "register_operand" "=w")
 629         (plus:VS
 630           (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w")
 631                       (match_operand:<VSI2QI> 2 "register_operand" "w")]
 632           UNSPEC_USDOT)
 633           (match_operand:VS 3 "register_operand" "0")))]
 634   "TARGET_I8MM"
 635   "usdot\\t%0.<Vtype>, %1.<Vdottype>, %2.<Vdottype>"
 636   [(set_attr "type" "neon_dot<q>")]
 637 )
 638
 639 ;; These instructions map to the __builtins for the Dot Product
 640 ;; indexed operations.
 641 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
 642   [(set (match_operand:VS 0 "register_operand" "=w")
 643         (plus:VS
 644           (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 645                       (match_operand:V8QI 3 "register_operand" "<h_con>")
 646                       (match_operand:SI 4 "immediate_operand" "i")]
 647                       DOTPROD)
 648           (match_operand:VS 1 "register_operand" "0")))]
 649   "TARGET_DOTPROD"
 650   {
 651     operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
 652     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 653   }
 654   [(set_attr "type" "neon_dot<q>")]
 655 )
 656
 657 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
 658   [(set (match_operand:VS 0 "register_operand" "=w")
 659         (plus:VS
 660           (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 661                       (match_operand:V16QI 3 "register_operand" "<h_con>")
 662                       (match_operand:SI 4 "immediate_operand" "i")]
 663                       DOTPROD)
 664           (match_operand:VS 1 "register_operand" "0")))]
 665   "TARGET_DOTPROD"
 666   {
 667     operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
 668     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 669   }
 670   [(set_attr "type" "neon_dot<q>")]
 671 )
 672
 673 ;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot
 674 ;; (by element) Dot Product operations.
 675 (define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi>"
 676   [(set (match_operand:VS 0 "register_operand" "=w")
 677         (plus:VS
 678           (unspec:VS [(match_operand:<VS:VSI2QI> 2 "register_operand" "w")
 679                       (match_operand:VB 3 "register_operand" "w")
 680                       (match_operand:SI 4 "immediate_operand" "i")]
 681           DOTPROD_I8MM)
 682           (match_operand:VS 1 "register_operand" "0")))]
 683   "TARGET_I8MM"
 684   {
 685     int nunits = GET_MODE_NUNITS (<VB:MODE>mode).to_constant ();
 686     int lane = INTVAL (operands[4]);
 687     operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), SImode);
 688     return "<DOTPROD_I8MM:sur>dot\\t%0.<VS:Vtype>, %2.<VS:Vdottype>, %3.4b[%4]";
 689   }
 690   [(set_attr "type" "neon_dot<VS:q>")]
 691 )
 692
 693 (define_expand "copysign<mode>3"
 694   [(match_operand:VHSDF 0 "register_operand")
 695    (match_operand:VHSDF 1 "register_operand")
 696    (match_operand:VHSDF 2 "register_operand")]
 697   "TARGET_FLOAT && TARGET_SIMD"
 698 {
 699   rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
 700   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 701
 702   emit_move_insn (v_bitmask,
 703                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 704                                                      HOST_WIDE_INT_M1U << bits));
 705   emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
 706                                          operands[2], operands[1]));
 707   DONE;
 708 }
 709 )
 710
 711 (define_insn "mul_lane<mode>3"
 712  [(set (match_operand:VMULD 0 "register_operand" "=w")
 713        (mult:VMULD
 714          (vec_duplicate:VMULD
 715            (vec_select:<VEL>
 716              (match_operand:<VCOND> 2 "register_operand" "<h_con>")
 717              (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
 718          (match_operand:VMULD 1 "register_operand" "w")))]
 719   "TARGET_SIMD"
 720   {
 721     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
 722     return "<f>mul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]";
 723   }
 724   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 725 )
 726
 727 (define_insn "mul_laneq<mode>3"
 728   [(set (match_operand:VMUL 0 "register_operand" "=w")
 729      (mult:VMUL
 730        (vec_duplicate:VMUL
 731           (vec_select:<VEL>
 732             (match_operand:<VCONQ> 2 "register_operand" "<h_con>")
 733             (parallel [(match_operand:SI 3 "immediate_operand")])))
 734       (match_operand:VMUL 1 "register_operand" "w")))]
 735   "TARGET_SIMD"
 736   {
 737     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
 738     return "<f>mul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]";
 739   }
 740   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 741 )
 742
 743 (define_insn "mul_n<mode>3"
 744  [(set (match_operand:VMUL 0 "register_operand" "=w")
 745        (mult:VMUL
 746          (vec_duplicate:VMUL
 747            (match_operand:<VEL> 2 "register_operand" "<h_con>"))
 748          (match_operand:VMUL 1 "register_operand" "w")))]
 749   "TARGET_SIMD"
 750   "<f>mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
 751   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 752 )
 753
 754 (define_insn "@aarch64_rsqrte<mode>"
 755   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 756         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
 757                      UNSPEC_RSQRTE))]
 758   "TARGET_SIMD"
 759   "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 760   [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
 761
 762 (define_insn "@aarch64_rsqrts<mode>"
 763   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 764         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
 765                             (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
 766          UNSPEC_RSQRTS))]
 767   "TARGET_SIMD"
 768   "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 769   [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
 770
 771 (define_expand "rsqrt<mode>2"
 772   [(set (match_operand:VALLF 0 "register_operand")
 773         (unspec:VALLF [(match_operand:VALLF 1 "register_operand")]
 774                      UNSPEC_RSQRT))]
 775   "TARGET_SIMD"
 776 {
 777   aarch64_emit_approx_sqrt (operands[0], operands[1], true);
 778   DONE;
 779 })
 780
 781 (define_insn "aarch64_ursqrte<mode>"
 782 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
 783       (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
 784                    UNSPEC_RSQRTE))]
 785 "TARGET_SIMD"
 786 "ursqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 787 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
 788
 789 (define_insn "*aarch64_mul3_elt_to_64v2df"
 790   [(set (match_operand:DF 0 "register_operand" "=w")
 791      (mult:DF
 792        (vec_select:DF
 793          (match_operand:V2DF 1 "register_operand" "w")
 794          (parallel [(match_operand:SI 2 "immediate_operand")]))
 795        (match_operand:DF 3 "register_operand" "w")))]
 796   "TARGET_SIMD"
 797   {
 798     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
 799     return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
 800   }
 801   [(set_attr "type" "neon_fp_mul_d_scalar_q")]
 802 )
 803
 804 (define_insn "neg<mode>2"
 805   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 806         (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 807   "TARGET_SIMD"
 808   "neg\t%0.<Vtype>, %1.<Vtype>"
 809   [(set_attr "type" "neon_neg<q>")]
 810 )
 811
 812 (define_insn "abs<mode>2"
 813   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 814         (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 815   "TARGET_SIMD"
 816   "abs\t%0.<Vtype>, %1.<Vtype>"
 817   [(set_attr "type" "neon_abs<q>")]
 818 )
 819
 820 ;; The intrinsic version of integer ABS must not be allowed to
 821 ;; combine with any operation with an integerated ABS step, such
 822 ;; as SABD.
 823 (define_insn "aarch64_abs<mode>"
 824   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
 825           (unspec:VSDQ_I_DI
 826             [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
 827            UNSPEC_ABS))]
 828   "TARGET_SIMD"
 829   "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 830   [(set_attr "type" "neon_abs<q>")]
 831 )
 832
 833 ;; It's tempting to represent SABD as ABS (MINUS op1 op2).
 834 ;; This isn't accurate as ABS treats always its input as a signed value.
 835 ;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
 836 ;; Whereas SABD would return 192 (-64 signed) on the above example.
 837 ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
 838 (define_insn "aarch64_<su>abd<mode>"
 839   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 840         (minus:VDQ_BHSI
 841           (USMAX:VDQ_BHSI
 842             (match_operand:VDQ_BHSI 1 "register_operand" "w")
 843             (match_operand:VDQ_BHSI 2 "register_operand" "w"))
 844           (<max_opp>:VDQ_BHSI
 845             (match_dup 1)
 846             (match_dup 2))))]
 847   "TARGET_SIMD"
 848   "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 849   [(set_attr "type" "neon_abd<q>")]
 850 )
 851
 852
 853 (define_insn "aarch64_<sur>abdl<mode>"
 854   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
 855         (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
 856                          (match_operand:VD_BHSI 2 "register_operand" "w")]
 857         ABDL))]
 858   "TARGET_SIMD"
 859   "<sur>abdl\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
 860   [(set_attr "type" "neon_abd<q>")]
 861 )
 862
 863 (define_insn "aarch64_<sur>abdl2<mode>"
 864   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 865         (unspec:<VDBLW> [(match_operand:VQW 1 "register_operand" "w")
 866                          (match_operand:VQW 2 "register_operand" "w")]
 867         ABDL2))]
 868   "TARGET_SIMD"
 869   "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
 870   [(set_attr "type" "neon_abd<q>")]
 871 )
 872
 873 (define_insn "aarch64_<sur>abal<mode>"
 874   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
 875         (unspec:<VWIDE> [(match_operand:VD_BHSI 2 "register_operand" "w")
 876                           (match_operand:VD_BHSI 3 "register_operand" "w")
 877                          (match_operand:<VWIDE> 1 "register_operand" "0")]
 878         ABAL))]
 879   "TARGET_SIMD"
 880   "<sur>abal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
 881   [(set_attr "type" "neon_arith_acc<q>")]
 882 )
 883
 884 (define_insn "aarch64_<sur>abal2<mode>"
 885   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
 886         (unspec:<VWIDE> [(match_operand:VQW 2 "register_operand" "w")
 887                           (match_operand:VQW 3 "register_operand" "w")
 888                          (match_operand:<VWIDE> 1 "register_operand" "0")]
 889         ABAL2))]
 890   "TARGET_SIMD"
 891   "<sur>abal2\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
 892   [(set_attr "type" "neon_arith_acc<q>")]
 893 )
 894
 895 (define_insn "aarch64_<sur>adalp<mode>"
 896   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 897         (unspec:<VDBLW> [(match_operand:VDQV_L 2 "register_operand" "w")
 898                          (match_operand:<VDBLW> 1 "register_operand" "0")]
 899         ADALP))]
 900   "TARGET_SIMD"
 901   "<sur>adalp\t%0.<Vwhalf>, %2.<Vtype>"
 902   [(set_attr "type" "neon_reduc_add<q>")]
 903 )
 904
 905 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
 906 ;; inputs in operands 1 and 2.  The sequence also has to perform a widening
 907 ;; reduction of the difference into a V4SI vector and accumulate that into
 908 ;; operand 3 before copying that into the result operand 0.
 909 ;; Perform that with a sequence of:
 910 ;; UABDL2       tmp.8h, op1.16b, op2.16b
 911 ;; UABAL        tmp.8h, op1.8b, op2.8b
 912 ;; UADALP       op3.4s, tmp.8h
 913 ;; MOV          op0, op3 // should be eliminated in later passes.
 914 ;;
 915 ;; For TARGET_DOTPROD we do:
 916 ;; MOV  tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
 917 ;; UABD tmp2.16b, op1.16b, op2.16b
 918 ;; UDOT op3.4s, tmp2.16b, tmp1.16b
 919 ;; MOV  op0, op3 // RA will tie the operands of UDOT appropriately.
 920 ;;
 921 ;; The signed version just uses the signed variants of the above instructions
 922 ;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
 923 ;; unsigned.
 924
 925 (define_expand "<sur>sadv16qi"
 926   [(use (match_operand:V4SI 0 "register_operand"))
 927    (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
 928                   (use (match_operand:V16QI 2 "register_operand"))] ABAL)
 929    (use (match_operand:V4SI 3 "register_operand"))]
 930   "TARGET_SIMD"
 931   {
 932     if (TARGET_DOTPROD)
 933       {
 934         rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
 935         rtx abd = gen_reg_rtx (V16QImode);
 936         emit_insn (gen_aarch64_<sur>abdv16qi (abd, operands[1], operands[2]));
 937         emit_insn (gen_udot_prodv16qi (operands[0], abd, ones, operands[3]));
 938         DONE;
 939       }
 940     rtx reduc = gen_reg_rtx (V8HImode);
 941     emit_insn (gen_aarch64_<sur>abdl2v16qi (reduc, operands[1],
 942                                             operands[2]));
 943     emit_insn (gen_aarch64_<sur>abalv8qi (reduc, reduc,
 944                                           gen_lowpart (V8QImode, operands[1]),
 945                                           gen_lowpart (V8QImode,
 946                                                        operands[2])));
 947     emit_insn (gen_aarch64_<sur>adalpv8hi (operands[3], operands[3], reduc));
 948     emit_move_insn (operands[0], operands[3]);
 949     DONE;
 950   }
 951 )
 952
 953 (define_insn "aarch64_<su>aba<mode>"
 954   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 955         (plus:VDQ_BHSI (minus:VDQ_BHSI
 956                          (USMAX:VDQ_BHSI
 957                            (match_operand:VDQ_BHSI 2 "register_operand" "w")
 958                            (match_operand:VDQ_BHSI 3 "register_operand" "w"))
 959                          (<max_opp>:VDQ_BHSI
 960                            (match_dup 2)
 961                            (match_dup 3)))
 962                        (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
 963   "TARGET_SIMD"
 964   "<su>aba\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
 965   [(set_attr "type" "neon_arith_acc<q>")]
 966 )
 967
 968 (define_insn "fabd<mode>3"
 969   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 970         (abs:VHSDF_HSDF
 971           (minus:VHSDF_HSDF
 972             (match_operand:VHSDF_HSDF 1 "register_operand" "w")
 973             (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
 974   "TARGET_SIMD"
 975   "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 976   [(set_attr "type" "neon_fp_abd_<stype><q>")]
 977 )
 978
 979 ;; For AND (vector, register) and BIC (vector, immediate)
 980 (define_insn "and<mode>3"
 981   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
 982         (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
 983                    (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
 984   "TARGET_SIMD"
 985   {
 986     switch (which_alternative)
 987       {
 988       case 0:
 989         return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
 990       case 1:
 991         return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
 992                                                   AARCH64_CHECK_BIC);
 993       default:
 994         gcc_unreachable ();
 995       }
 996   }
 997   [(set_attr "type" "neon_logic<q>")]
 998 )
 999
1000 ;; For ORR (vector, register) and ORR (vector, immediate)
1001 (define_insn "ior<mode>3"
1002   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
1003         (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
1004                    (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
1005   "TARGET_SIMD"
1006   {
1007     switch (which_alternative)
1008       {
1009       case 0:
1010         return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
1011       case 1:
1012         return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
1013                                                   AARCH64_CHECK_ORR);
1014       default:
1015         gcc_unreachable ();
1016       }
1017   }
1018   [(set_attr "type" "neon_logic<q>")]
1019 )
1020
1021 (define_insn "xor<mode>3"
1022   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1023         (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1024                  (match_operand:VDQ_I 2 "register_operand" "w")))]
1025   "TARGET_SIMD"
1026   "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
1027   [(set_attr "type" "neon_logic<q>")]
1028 )
1029
1030 (define_insn "one_cmpl<mode>2"
1031   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1032         (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
1033   "TARGET_SIMD"
1034   "not\t%0.<Vbtype>, %1.<Vbtype>"
1035   [(set_attr "type" "neon_logic<q>")]
1036 )
1037
1038 (define_insn "aarch64_simd_vec_set<mode>"
1039   [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
1040         (vec_merge:VALL_F16
1041             (vec_duplicate:VALL_F16
1042                 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
1043             (match_operand:VALL_F16 3 "register_operand" "0,0,0")
1044             (match_operand:SI 2 "immediate_operand" "i,i,i")))]
1045   "TARGET_SIMD"
1046   {
1047    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1048    operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1049    switch (which_alternative)
1050      {
1051      case 0:
1052         return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
1053      case 1:
1054         return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
1055      case 2:
1056         return "ld1\\t{%0.<Vetype>}[%p2], %1";
1057      default:
1058         gcc_unreachable ();
1059      }
1060   }
1061   [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
1062 )
1063
1064 (define_insn "@aarch64_simd_vec_copy_lane<mode>"
1065   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
1066         (vec_merge:VALL_F16
1067             (vec_duplicate:VALL_F16
1068               (vec_select:<VEL>
1069                 (match_operand:VALL_F16 3 "register_operand" "w")
1070                 (parallel
1071                   [(match_operand:SI 4 "immediate_operand" "i")])))
1072             (match_operand:VALL_F16 1 "register_operand" "0")
1073             (match_operand:SI 2 "immediate_operand" "i")))]
1074   "TARGET_SIMD"
1075   {
1076     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1077     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
1078     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
1079
1080     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1081   }
1082   [(set_attr "type" "neon_ins<q>")]
1083 )
1084
1085 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
1086   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
1087         (vec_merge:VALL_F16_NO_V2Q
1088             (vec_duplicate:VALL_F16_NO_V2Q
1089               (vec_select:<VEL>
1090                 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
1091                 (parallel
1092                   [(match_operand:SI 4 "immediate_operand" "i")])))
1093             (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
1094             (match_operand:SI 2 "immediate_operand" "i")))]
1095   "TARGET_SIMD"
1096   {
1097     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1098     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
1099     operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
1100                                            INTVAL (operands[4]));
1101
1102     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1103   }
1104   [(set_attr "type" "neon_ins<q>")]
1105 )
1106
1107 (define_expand "signbit<mode>2"
1108   [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
1109    (use (match_operand:VDQSF 1 "register_operand"))]
1110   "TARGET_SIMD"
1111 {
1112   int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
1113   rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
1114                                                         shift_amount);
1115   operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
1116
1117   emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
1118                                                  shift_vector));
1119   DONE;
1120 })
1121
1122 (define_insn "aarch64_simd_lshr<mode>"
1123  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1124        (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1125                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
1126  "TARGET_SIMD"
1127  "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
1128   [(set_attr "type" "neon_shift_imm<q>")]
1129 )
1130
1131 (define_insn "aarch64_simd_ashr<mode>"
1132  [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
1133        (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,w")
1134                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "D1,Dr")))]
1135  "TARGET_SIMD"
1136  "@
1137   cmlt\t%0.<Vtype>, %1.<Vtype>, #0
1138   sshr\t%0.<Vtype>, %1.<Vtype>, %2"
1139   [(set_attr "type" "neon_compare<q>,neon_shift_imm<q>")]
1140 )
1141
1142 (define_insn "*aarch64_simd_sra<mode>"
1143  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1144         (plus:VDQ_I
1145            (SHIFTRT:VDQ_I
1146                 (match_operand:VDQ_I 1 "register_operand" "w")
1147                 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr"))
1148            (match_operand:VDQ_I 3 "register_operand" "0")))]
1149   "TARGET_SIMD"
1150   "<sra_op>sra\t%0.<Vtype>, %1.<Vtype>, %2"
1151   [(set_attr "type" "neon_shift_acc<q>")]
1152 )
1153
1154 (define_insn "aarch64_simd_imm_shl<mode>"
1155  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1156        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1157                    (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm" "Dl")))]
1158  "TARGET_SIMD"
1159   "shl\t%0.<Vtype>, %1.<Vtype>, %2"
1160   [(set_attr "type" "neon_shift_imm<q>")]
1161 )
1162
1163 (define_insn "aarch64_simd_reg_sshl<mode>"
1164  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1165        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1166                    (match_operand:VDQ_I 2 "register_operand" "w")))]
1167  "TARGET_SIMD"
1168  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1169   [(set_attr "type" "neon_shift_reg<q>")]
1170 )
1171
1172 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
1173  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1174        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1175                     (match_operand:VDQ_I 2 "register_operand" "w")]
1176                    UNSPEC_ASHIFT_UNSIGNED))]
1177  "TARGET_SIMD"
1178  "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1179   [(set_attr "type" "neon_shift_reg<q>")]
1180 )
1181
1182 (define_insn "aarch64_simd_reg_shl<mode>_signed"
1183  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1184        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1185                     (match_operand:VDQ_I 2 "register_operand" "w")]
1186                    UNSPEC_ASHIFT_SIGNED))]
1187  "TARGET_SIMD"
1188  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1189   [(set_attr "type" "neon_shift_reg<q>")]
1190 )
1191
1192 (define_expand "ashl<mode>3"
1193   [(match_operand:VDQ_I 0 "register_operand")
1194    (match_operand:VDQ_I 1 "register_operand")
1195    (match_operand:SI  2 "general_operand")]
1196  "TARGET_SIMD"
1197 {
1198   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1199   int shift_amount;
1200
1201   if (CONST_INT_P (operands[2]))
1202     {
1203       shift_amount = INTVAL (operands[2]);
1204       if (shift_amount >= 0 && shift_amount < bit_width)
1205         {
1206           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1207                                                        shift_amount);
1208           emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
1209                                                      operands[1],
1210                                                      tmp));
1211           DONE;
1212         }
1213     }
1214
1215   operands[2] = force_reg (SImode, operands[2]);
1216
1217   rtx tmp = gen_reg_rtx (<MODE>mode);
1218   emit_insn (gen_aarch64_simd_dup<mode> (tmp, convert_to_mode (<VEL>mode,
1219                                                                operands[2],
1220                                                                0)));
1221   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], tmp));
1222   DONE;
1223 })
1224
1225 (define_expand "lshr<mode>3"
1226   [(match_operand:VDQ_I 0 "register_operand")
1227    (match_operand:VDQ_I 1 "register_operand")
1228    (match_operand:SI  2 "general_operand")]
1229  "TARGET_SIMD"
1230 {
1231   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1232   int shift_amount;
1233
1234   if (CONST_INT_P (operands[2]))
1235     {
1236       shift_amount = INTVAL (operands[2]);
1237       if (shift_amount > 0 && shift_amount <= bit_width)
1238         {
1239           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1240                                                        shift_amount);
1241           emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
1242                                                   operands[1],
1243                                                   tmp));
1244           DONE;
1245         }
1246     }
1247
1248   operands[2] = force_reg (SImode, operands[2]);
1249
1250   rtx tmp = gen_reg_rtx (SImode);
1251   rtx tmp1 = gen_reg_rtx (<MODE>mode);
1252   emit_insn (gen_negsi2 (tmp, operands[2]));
1253   emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1254                                          convert_to_mode (<VEL>mode, tmp, 0)));
1255   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1256                                                       tmp1));
1257   DONE;
1258 })
1259
1260 (define_expand "ashr<mode>3"
1261   [(match_operand:VDQ_I 0 "register_operand")
1262    (match_operand:VDQ_I 1 "register_operand")
1263    (match_operand:SI  2 "general_operand")]
1264  "TARGET_SIMD"
1265 {
1266   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1267   int shift_amount;
1268
1269   if (CONST_INT_P (operands[2]))
1270     {
1271       shift_amount = INTVAL (operands[2]);
1272       if (shift_amount > 0 && shift_amount <= bit_width)
1273         {
1274           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1275                                                        shift_amount);
1276           emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1277                                                   operands[1],
1278                                                   tmp));
1279           DONE;
1280         }
1281     }
1282
1283   operands[2] = force_reg (SImode, operands[2]);
1284
1285   rtx tmp = gen_reg_rtx (SImode);
1286   rtx tmp1 = gen_reg_rtx (<MODE>mode);
1287   emit_insn (gen_negsi2 (tmp, operands[2]));
1288   emit_insn (gen_aarch64_simd_dup<mode> (tmp1, convert_to_mode (<VEL>mode,
1289                                                                 tmp, 0)));
1290   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1291                                                     tmp1));
1292   DONE;
1293 })
1294
1295 (define_expand "vashl<mode>3"
1296  [(match_operand:VDQ_I 0 "register_operand")
1297   (match_operand:VDQ_I 1 "register_operand")
1298   (match_operand:VDQ_I 2 "register_operand")]
1299  "TARGET_SIMD"
1300 {
1301   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1302                                               operands[2]));
1303   DONE;
1304 })
1305
1306 (define_expand "vashr<mode>3"
1307  [(match_operand:VDQ_I 0 "register_operand")
1308   (match_operand:VDQ_I 1 "register_operand")
1309   (match_operand:VDQ_I 2 "register_operand")]
1310  "TARGET_SIMD"
1311 {
1312   rtx neg = gen_reg_rtx (<MODE>mode);
1313   emit (gen_neg<mode>2 (neg, operands[2]));
1314   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1315                                                     neg));
1316   DONE;
1317 })
1318
1319 ;; DI vector shift
1320 (define_expand "aarch64_ashr_simddi"
1321   [(match_operand:DI 0 "register_operand")
1322    (match_operand:DI 1 "register_operand")
1323    (match_operand:SI 2 "aarch64_shift_imm64_di")]
1324   "TARGET_SIMD"
1325   {
1326     /* An arithmetic shift right by 64 fills the result with copies of the sign
1327        bit, just like asr by 63 - however the standard pattern does not handle
1328        a shift by 64.  */
1329     if (INTVAL (operands[2]) == 64)
1330       operands[2] = GEN_INT (63);
1331     emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1332     DONE;
1333   }
1334 )
1335
1336 (define_expand "vlshr<mode>3"
1337  [(match_operand:VDQ_I 0 "register_operand")
1338   (match_operand:VDQ_I 1 "register_operand")
1339   (match_operand:VDQ_I 2 "register_operand")]
1340  "TARGET_SIMD"
1341 {
1342   rtx neg = gen_reg_rtx (<MODE>mode);
1343   emit (gen_neg<mode>2 (neg, operands[2]));
1344   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1345                                                       neg));
1346   DONE;
1347 })
1348
1349 (define_expand "aarch64_lshr_simddi"
1350   [(match_operand:DI 0 "register_operand")
1351    (match_operand:DI 1 "register_operand")
1352    (match_operand:SI 2 "aarch64_shift_imm64_di")]
1353   "TARGET_SIMD"
1354   {
1355     if (INTVAL (operands[2]) == 64)
1356       emit_move_insn (operands[0], const0_rtx);
1357     else
1358       emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1359     DONE;
1360   }
1361 )
1362
1363 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1364 (define_insn "vec_shr_<mode>"
1365   [(set (match_operand:VD 0 "register_operand" "=w")
1366         (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1367                     (match_operand:SI 2 "immediate_operand" "i")]
1368                    UNSPEC_VEC_SHR))]
1369   "TARGET_SIMD"
1370   {
1371     if (BYTES_BIG_ENDIAN)
1372       return "shl %d0, %d1, %2";
1373     else
1374       return "ushr %d0, %d1, %2";
1375   }
1376   [(set_attr "type" "neon_shift_imm")]
1377 )
1378
1379 (define_expand "vec_set<mode>"
1380   [(match_operand:VALL_F16 0 "register_operand")
1381    (match_operand:<VEL> 1 "register_operand")
1382    (match_operand:SI 2 "immediate_operand")]
1383   "TARGET_SIMD"
1384   {
1385     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1386     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1387                                           GEN_INT (elem), operands[0]));
1388     DONE;
1389   }
1390 )
1391
1392
1393 (define_insn "aarch64_mla<mode>"
1394  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1395        (plus:VDQ_BHSI (mult:VDQ_BHSI
1396                         (match_operand:VDQ_BHSI 2 "register_operand" "w")
1397                         (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1398                       (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1399  "TARGET_SIMD"
1400  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1401   [(set_attr "type" "neon_mla_<Vetype><q>")]
1402 )
1403
1404 (define_insn "*aarch64_mla_elt<mode>"
1405  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1406        (plus:VDQHS
1407          (mult:VDQHS
1408            (vec_duplicate:VDQHS
1409               (vec_select:<VEL>
1410                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1411                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1412            (match_operand:VDQHS 3 "register_operand" "w"))
1413          (match_operand:VDQHS 4 "register_operand" "0")))]
1414  "TARGET_SIMD"
1415   {
1416     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1417     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1418   }
1419   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1420 )
1421
1422 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1423  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1424        (plus:VDQHS
1425          (mult:VDQHS
1426            (vec_duplicate:VDQHS
1427               (vec_select:<VEL>
1428                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1429                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1430            (match_operand:VDQHS 3 "register_operand" "w"))
1431          (match_operand:VDQHS 4 "register_operand" "0")))]
1432  "TARGET_SIMD"
1433   {
1434     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1435     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1436   }
1437   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1438 )
1439
1440 (define_insn "aarch64_mla_n<mode>"
1441  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1442         (plus:VDQHS
1443           (mult:VDQHS
1444             (vec_duplicate:VDQHS
1445               (match_operand:<VEL> 3 "register_operand" "<h_con>"))
1446             (match_operand:VDQHS 2 "register_operand" "w"))
1447           (match_operand:VDQHS 1 "register_operand" "0")))]
1448  "TARGET_SIMD"
1449  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[0]"
1450   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1451 )
1452
1453 (define_insn "aarch64_mls<mode>"
1454  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1455        (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1456                    (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1457                               (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1458  "TARGET_SIMD"
1459  "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1460   [(set_attr "type" "neon_mla_<Vetype><q>")]
1461 )
1462
1463 (define_insn "*aarch64_mls_elt<mode>"
1464  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1465        (minus:VDQHS
1466          (match_operand:VDQHS 4 "register_operand" "0")
1467          (mult:VDQHS
1468            (vec_duplicate:VDQHS
1469               (vec_select:<VEL>
1470                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1471                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1472            (match_operand:VDQHS 3 "register_operand" "w"))))]
1473  "TARGET_SIMD"
1474   {
1475     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1476     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1477   }
1478   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1479 )
1480
1481 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1482  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1483        (minus:VDQHS
1484          (match_operand:VDQHS 4 "register_operand" "0")
1485          (mult:VDQHS
1486            (vec_duplicate:VDQHS
1487               (vec_select:<VEL>
1488                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1489                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1490            (match_operand:VDQHS 3 "register_operand" "w"))))]
1491  "TARGET_SIMD"
1492   {
1493     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1494     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1495   }
1496   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1497 )
1498
1499 (define_insn "aarch64_mls_n<mode>"
1500   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1501         (minus:VDQHS
1502           (match_operand:VDQHS 1 "register_operand" "0")
1503           (mult:VDQHS
1504             (vec_duplicate:VDQHS
1505               (match_operand:<VEL> 3 "register_operand" "<h_con>"))
1506             (match_operand:VDQHS 2 "register_operand" "w"))))]
1507   "TARGET_SIMD"
1508   "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[0]"
1509   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1510 )
1511
1512 ;; Max/Min operations.
1513 (define_insn "<su><maxmin><mode>3"
1514  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1515        (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1516                     (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1517  "TARGET_SIMD"
1518  "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1519   [(set_attr "type" "neon_minmax<q>")]
1520 )
1521
1522 (define_expand "<su><maxmin>v2di3"
1523  [(set (match_operand:V2DI 0 "register_operand")
1524        (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand")
1525                     (match_operand:V2DI 2 "register_operand")))]
1526  "TARGET_SIMD"
1527 {
1528   enum rtx_code cmp_operator;
1529   rtx cmp_fmt;
1530
1531   switch (<CODE>)
1532     {
1533     case UMIN:
1534       cmp_operator = LTU;
1535       break;
1536     case SMIN:
1537       cmp_operator = LT;
1538       break;
1539     case UMAX:
1540       cmp_operator = GTU;
1541       break;
1542     case SMAX:
1543       cmp_operator = GT;
1544       break;
1545     default:
1546       gcc_unreachable ();
1547     }
1548
1549   cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1550   emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1551               operands[2], cmp_fmt, operands[1], operands[2]));
1552   DONE;
1553 })
1554
1555 ;; Pairwise Integer Max/Min operations.
1556 (define_insn "aarch64_<optab>p<mode>"
1557  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1558        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1559                          (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1560                         MAXMINV))]
1561  "TARGET_SIMD"
1562  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1563   [(set_attr "type" "neon_minmax<q>")]
1564 )
1565
1566 ;; Pairwise FP Max/Min operations.
1567 (define_insn "aarch64_<optab>p<mode>"
1568  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1569        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1570                       (match_operand:VHSDF 2 "register_operand" "w")]
1571                       FMAXMINV))]
1572  "TARGET_SIMD"
1573  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1574   [(set_attr "type" "neon_minmax<q>")]
1575 )
1576
1577 ;; vec_concat gives a new vector with the low elements from operand 1, and
1578 ;; the high elements from operand 2.  That is to say, given op1 = { a, b }
1579 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1580 ;; What that means, is that the RTL descriptions of the below patterns
1581 ;; need to change depending on endianness.
1582
1583 ;; Move to the low architectural bits of the register.
1584 ;; On little-endian this is { operand, zeroes }
1585 ;; On big-endian this is { zeroes, operand }
1586
1587 (define_insn "move_lo_quad_internal_<mode>"
1588   [(set (match_operand:VQMOV 0 "register_operand" "=w,w,w")
1589         (vec_concat:VQMOV
1590           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1591           (match_operand:<VHALF> 2 "aarch64_simd_or_scalar_imm_zero")))]
1592   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1593   "@
1594    dup\\t%d0, %1.d[0]
1595    fmov\\t%d0, %1
1596    dup\\t%d0, %1"
1597   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1598    (set_attr "length" "4")
1599    (set_attr "arch" "simd,fp,simd")]
1600 )
1601
1602 (define_insn "move_lo_quad_internal_be_<mode>"
1603   [(set (match_operand:VQMOV 0 "register_operand" "=w,w,w")
1604         (vec_concat:VQMOV
1605           (match_operand:<VHALF> 2 "aarch64_simd_or_scalar_imm_zero")
1606           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1607   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1608   "@
1609    dup\\t%d0, %1.d[0]
1610    fmov\\t%d0, %1
1611    dup\\t%d0, %1"
1612   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1613    (set_attr "length" "4")
1614    (set_attr "arch" "simd,fp,simd")]
1615 )
1616
1617 (define_expand "move_lo_quad_<mode>"
1618   [(match_operand:VQMOV 0 "register_operand")
1619    (match_operand:<VHALF> 1 "register_operand")]
1620   "TARGET_SIMD"
1621 {
1622   rtx zs = CONST0_RTX (<VHALF>mode);
1623   if (BYTES_BIG_ENDIAN)
1624     emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1], zs));
1625   else
1626     emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1], zs));
1627   DONE;
1628 }
1629 )
1630
1631 ;; Move operand1 to the high architectural bits of the register, keeping
1632 ;; the low architectural bits of operand2.
1633 ;; For little-endian this is { operand2, operand1 }
1634 ;; For big-endian this is { operand1, operand2 }
1635
1636 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1637   [(set (match_operand:VQMOV 0 "register_operand" "+w,w")
1638         (vec_concat:VQMOV
1639           (vec_select:<VHALF>
1640                 (match_dup 0)
1641                 (match_operand:VQMOV 2 "vect_par_cnst_lo_half" ""))
1642           (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1643   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1644   "@
1645    ins\\t%0.d[1], %1.d[0]
1646    ins\\t%0.d[1], %1"
1647   [(set_attr "type" "neon_ins")]
1648 )
1649
1650 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1651   [(set (match_operand:VQMOV 0 "register_operand" "+w,w")
1652         (vec_concat:VQMOV
1653           (match_operand:<VHALF> 1 "register_operand" "w,r")
1654           (vec_select:<VHALF>
1655                 (match_dup 0)
1656                 (match_operand:VQMOV 2 "vect_par_cnst_lo_half" ""))))]
1657   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1658   "@
1659    ins\\t%0.d[1], %1.d[0]
1660    ins\\t%0.d[1], %1"
1661   [(set_attr "type" "neon_ins")]
1662 )
1663
1664 (define_expand "move_hi_quad_<mode>"
1665  [(match_operand:VQMOV 0 "register_operand")
1666   (match_operand:<VHALF> 1 "register_operand")]
1667  "TARGET_SIMD"
1668 {
1669   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1670   if (BYTES_BIG_ENDIAN)
1671     emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1672                     operands[1], p));
1673   else
1674     emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1675                     operands[1], p));
1676   DONE;
1677 })
1678
1679 ;; Narrowing operations.
1680
1681 (define_insn "aarch64_xtn<mode>_insn_le"
1682   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1683         (vec_concat:<VNARROWQ2>
1684           (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1685           (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")))]
1686   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1687   "xtn\\t%0.<Vntype>, %1.<Vtype>"
1688   [(set_attr "type" "neon_move_narrow_q")]
1689 )
1690
1691 (define_insn "aarch64_xtn<mode>_insn_be"
1692   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1693         (vec_concat:<VNARROWQ2>
1694           (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")
1695           (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))))]
1696   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1697   "xtn\\t%0.<Vntype>, %1.<Vtype>"
1698   [(set_attr "type" "neon_move_narrow_q")]
1699 )
1700
1701 (define_expand "aarch64_xtn<mode>"
1702   [(set (match_operand:<VNARROWQ> 0 "register_operand")
1703         (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand")))]
1704   "TARGET_SIMD"
1705   {
1706     rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
1707     if (BYTES_BIG_ENDIAN)
1708       emit_insn (gen_aarch64_xtn<mode>_insn_be (tmp, operands[1],
1709                                 CONST0_RTX (<VNARROWQ>mode)));
1710     else
1711       emit_insn (gen_aarch64_xtn<mode>_insn_le (tmp, operands[1],
1712                                 CONST0_RTX (<VNARROWQ>mode)));
1713
1714     /* The intrinsic expects a narrow result, so emit a subreg that will get
1715        optimized away as appropriate.  */
1716     emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
1717                                                  <VNARROWQ2>mode));
1718     DONE;
1719   }
1720 )
1721
1722 (define_insn "aarch64_xtn2<mode>_insn_le"
1723   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1724         (vec_concat:<VNARROWQ2>
1725           (match_operand:<VNARROWQ> 1 "register_operand" "0")
1726           (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1727   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1728   "xtn2\t%0.<V2ntype>, %2.<Vtype>"
1729   [(set_attr "type" "neon_move_narrow_q")]
1730 )
1731
1732 (define_insn "aarch64_xtn2<mode>_insn_be"
1733   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1734         (vec_concat:<VNARROWQ2>
1735           (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))
1736           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
1737   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1738   "xtn2\t%0.<V2ntype>, %2.<Vtype>"
1739   [(set_attr "type" "neon_move_narrow_q")]
1740 )
1741
1742 (define_expand "aarch64_xtn2<mode>"
1743   [(match_operand:<VNARROWQ2> 0 "register_operand")
1744    (match_operand:<VNARROWQ> 1 "register_operand")
1745    (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))]
1746   "TARGET_SIMD"
1747   {
1748     if (BYTES_BIG_ENDIAN)
1749       emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[0], operands[1],
1750                                                  operands[2]));
1751     else
1752       emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[0], operands[1],
1753                                                  operands[2]));
1754     DONE;
1755   }
1756 )
1757
1758 (define_insn "*aarch64_narrow_trunc<mode>"
1759   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1760         (vec_concat:<VNARROWQ2>
1761           (truncate:<VNARROWQ>
1762             (match_operand:VQN 1 "register_operand" "w"))
1763           (truncate:<VNARROWQ>
1764             (match_operand:VQN 2 "register_operand" "w"))))]
1765   "TARGET_SIMD"
1766 {
1767   if (!BYTES_BIG_ENDIAN)
1768     return "uzp1\\t%0.<V2ntype>, %1.<V2ntype>, %2.<V2ntype>";
1769   else
1770     return "uzp1\\t%0.<V2ntype>, %2.<V2ntype>, %1.<V2ntype>";
1771 }
1772   [(set_attr "type" "neon_permute<q>")]
1773 )
1774
1775 ;; Packing doubles.
1776
1777 (define_expand "vec_pack_trunc_<mode>"
1778  [(match_operand:<VNARROWD> 0 "register_operand")
1779   (match_operand:VDN 1 "register_operand")
1780   (match_operand:VDN 2 "register_operand")]
1781  "TARGET_SIMD"
1782 {
1783   rtx tempreg = gen_reg_rtx (<VDBL>mode);
1784   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1785   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1786
1787   emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1788   emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1789   emit_insn (gen_trunc<Vdbl><Vnarrowd>2 (operands[0], tempreg));
1790   DONE;
1791 })
1792
1793 ;; Packing quads.
1794
1795 (define_expand "vec_pack_trunc_<mode>"
1796  [(set (match_operand:<VNARROWQ2> 0 "register_operand")
1797        (vec_concat:<VNARROWQ2>
1798          (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand"))
1799          (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))))]
1800  "TARGET_SIMD"
1801  {
1802    rtx tmpreg = gen_reg_rtx (<VNARROWQ>mode);
1803    int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1804    int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1805
1806    emit_insn (gen_trunc<mode><Vnarrowq>2 (tmpreg, operands[lo]));
1807
1808    if (BYTES_BIG_ENDIAN)
1809      emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[0], tmpreg,
1810                                                 operands[hi]));
1811    else
1812      emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[0], tmpreg,
1813                                                 operands[hi]));
1814    DONE;
1815  }
1816 )
1817
1818 (define_insn "aarch64_shrn<mode>_insn_le"
1819   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1820         (vec_concat:<VNARROWQ2>
1821           (truncate:<VNARROWQ>
1822             (lshiftrt:VQN (match_operand:VQN 1 "register_operand" "w")
1823               (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")))
1824           (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
1825   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1826   "shrn\\t%0.<Vntype>, %1.<Vtype>, %2"
1827   [(set_attr "type" "neon_shift_imm_narrow_q")]
1828 )
1829
1830 (define_insn "aarch64_shrn<mode>_insn_be"
1831   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1832         (vec_concat:<VNARROWQ2>
1833           (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
1834           (truncate:<VNARROWQ>
1835             (lshiftrt:VQN (match_operand:VQN 1 "register_operand" "w")
1836               (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
1837   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1838   "shrn\\t%0.<Vntype>, %1.<Vtype>, %2"
1839   [(set_attr "type" "neon_shift_imm_narrow_q")]
1840 )
1841
1842 (define_insn "*aarch64_<srn_op>shrn<mode>_vect"
1843   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1844         (truncate:<VNARROWQ>
1845           (SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
1846             (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
1847   "TARGET_SIMD"
1848   "shrn\\t%0.<Vntype>, %1.<Vtype>, %2"
1849   [(set_attr "type" "neon_shift_imm_narrow_q")]
1850 )
1851
1852 (define_insn "*aarch64_<srn_op>shrn<mode>2_vect_le"
1853   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1854         (vec_concat:<VNARROWQ2>
1855           (match_operand:<VNARROWQ> 1 "register_operand" "0")
1856           (truncate:<VNARROWQ>
1857             (SHIFTRT:VQN (match_operand:VQN 2 "register_operand" "w")
1858               (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
1859   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1860   "shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
1861   [(set_attr "type" "neon_shift_imm_narrow_q")]
1862 )
1863
1864 (define_insn "*aarch64_<srn_op>shrn<mode>2_vect_be"
1865   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1866         (vec_concat:<VNARROWQ2>
1867           (truncate:<VNARROWQ>
1868             (SHIFTRT:VQN (match_operand:VQN 2 "register_operand" "w")
1869               (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))
1870           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
1871   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1872   "shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
1873   [(set_attr "type" "neon_shift_imm_narrow_q")]
1874 )
1875
1876 (define_insn "*aarch64_<srn_op>topbits_shuffle<mode>_le"
1877   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1878         (vec_concat:<VNARROWQ2>
1879           (truncate:<VNARROWQ>
1880             (SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
1881               (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_exact_top")))
1882           (truncate:<VNARROWQ>
1883             (SHIFTRT:VQN (match_operand:VQN 3 "register_operand" "w")
1884               (match_dup 2)))))]
1885   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1886   "uzp2\\t%0.<V2ntype>, %1.<V2ntype>, %3.<V2ntype>"
1887   [(set_attr "type" "neon_permute<q>")]
1888 )
1889
1890 (define_insn "*aarch64_<srn_op>topbits_shuffle<mode>_be"
1891   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1892         (vec_concat:<VNARROWQ2>
1893           (truncate:<VNARROWQ>
1894             (SHIFTRT:VQN (match_operand:VQN 3 "register_operand" "w")
1895               (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_exact_top")))
1896           (truncate:<VNARROWQ>
1897             (SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
1898               (match_dup 2)))))]
1899   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1900   "uzp2\\t%0.<V2ntype>, %1.<V2ntype>, %3.<V2ntype>"
1901   [(set_attr "type" "neon_permute<q>")]
1902 )
1903
1904 (define_expand "aarch64_shrn<mode>"
1905   [(set (match_operand:<VNARROWQ> 0 "register_operand")
1906         (truncate:<VNARROWQ>
1907           (lshiftrt:VQN (match_operand:VQN 1 "register_operand")
1908             (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>"))))]
1909   "TARGET_SIMD"
1910   {
1911     operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1912                                                  INTVAL (operands[2]));
1913     rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
1914     if (BYTES_BIG_ENDIAN)
1915       emit_insn (gen_aarch64_shrn<mode>_insn_be (tmp, operands[1],
1916                                 operands[2], CONST0_RTX (<VNARROWQ>mode)));
1917     else
1918       emit_insn (gen_aarch64_shrn<mode>_insn_le (tmp, operands[1],
1919                                 operands[2], CONST0_RTX (<VNARROWQ>mode)));
1920
1921     /* The intrinsic expects a narrow result, so emit a subreg that will get
1922        optimized away as appropriate.  */
1923     emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
1924                                                  <VNARROWQ2>mode));
1925     DONE;
1926   }
1927 )
1928
1929 (define_insn "aarch64_rshrn<mode>_insn_le"
1930   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1931         (vec_concat:<VNARROWQ2>
1932           (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
1933                 (match_operand:VQN 2
1934                   "aarch64_simd_shift_imm_vec_<vn_mode>")] UNSPEC_RSHRN)
1935           (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
1936   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1937   "rshrn\\t%0.<Vntype>, %1.<Vtype>, %2"
1938   [(set_attr "type" "neon_shift_imm_narrow_q")]
1939 )
1940
1941 (define_insn "aarch64_rshrn<mode>_insn_be"
1942   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1943         (vec_concat:<VNARROWQ2>
1944           (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
1945           (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
1946                 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
1947                   UNSPEC_RSHRN)))]
1948   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1949   "rshrn\\t%0.<Vntype>, %1.<Vtype>, %2"
1950   [(set_attr "type" "neon_shift_imm_narrow_q")]
1951 )
1952
1953 (define_expand "aarch64_rshrn<mode>"
1954   [(match_operand:<VNARROWQ> 0 "register_operand")
1955    (match_operand:VQN 1 "register_operand")
1956    (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>")]
1957   "TARGET_SIMD"
1958   {
1959     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ>mode))
1960       {
1961         rtx tmp0 = aarch64_gen_shareable_zero (<MODE>mode);
1962         emit_insn (gen_aarch64_raddhn<mode> (operands[0], operands[1], tmp0));
1963       }
1964     else
1965       {
1966         rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
1967         operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1968                                                          INTVAL (operands[2]));
1969         if (BYTES_BIG_ENDIAN)
1970           emit_insn (
1971                 gen_aarch64_rshrn<mode>_insn_be (tmp, operands[1],
1972                                                  operands[2],
1973                                                  CONST0_RTX (<VNARROWQ>mode)));
1974         else
1975           emit_insn (
1976                 gen_aarch64_rshrn<mode>_insn_le (tmp, operands[1],
1977                                                  operands[2],
1978                                                  CONST0_RTX (<VNARROWQ>mode)));
1979
1980         /* The intrinsic expects a narrow result, so emit a subreg that will
1981            get optimized away as appropriate.  */
1982         emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
1983                                                      <VNARROWQ2>mode));
1984       }
1985     DONE;
1986   }
1987 )
1988
1989 (define_insn "aarch64_shrn2<mode>_insn_le"
1990   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1991         (vec_concat:<VNARROWQ2>
1992           (match_operand:<VNARROWQ> 1 "register_operand" "0")
1993           (truncate:<VNARROWQ>
1994             (lshiftrt:VQN (match_operand:VQN 2 "register_operand" "w")
1995               (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
1996   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1997   "shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
1998   [(set_attr "type" "neon_shift_imm_narrow_q")]
1999 )
2000
2001 (define_insn "aarch64_shrn2<mode>_insn_be"
2002   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
2003         (vec_concat:<VNARROWQ2>
2004           (truncate:<VNARROWQ>
2005             (lshiftrt:VQN (match_operand:VQN 2 "register_operand" "w")
2006               (match_operand:VQN 3
2007                 "aarch64_simd_shift_imm_vec_<vn_mode>")))
2008           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
2009   "TARGET_SIMD && BYTES_BIG_ENDIAN"
2010   "shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
2011   [(set_attr "type" "neon_shift_imm_narrow_q")]
2012 )
2013
2014 (define_expand "aarch64_shrn2<mode>"
2015   [(match_operand:<VNARROWQ2> 0 "register_operand")
2016    (match_operand:<VNARROWQ> 1 "register_operand")
2017    (match_operand:VQN 2 "register_operand")
2018    (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
2019   "TARGET_SIMD"
2020   {
2021     operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
2022                                                  INTVAL (operands[3]));
2023     if (BYTES_BIG_ENDIAN)
2024       emit_insn (gen_aarch64_shrn2<mode>_insn_be (operands[0], operands[1],
2025                                                   operands[2], operands[3]));
2026     else
2027       emit_insn (gen_aarch64_shrn2<mode>_insn_le (operands[0], operands[1],
2028                                                   operands[2], operands[3]));
2029     DONE;
2030   }
2031 )
2032
2033 (define_insn "aarch64_rshrn2<mode>_insn_le"
2034   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
2035         (vec_concat:<VNARROWQ2>
2036           (match_operand:<VNARROWQ> 1 "register_operand" "0")
2037           (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
2038             (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")]
2039                 UNSPEC_RSHRN)))]
2040   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2041   "rshrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
2042   [(set_attr "type" "neon_shift_imm_narrow_q")]
2043 )
2044
2045 (define_insn "aarch64_rshrn2<mode>_insn_be"
2046   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
2047         (vec_concat:<VNARROWQ2>
2048           (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
2049                 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")]
2050                   UNSPEC_RSHRN)
2051           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
2052   "TARGET_SIMD && BYTES_BIG_ENDIAN"
2053   "rshrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
2054   [(set_attr "type" "neon_shift_imm_narrow_q")]
2055 )
2056
2057 (define_expand "aarch64_rshrn2<mode>"
2058   [(match_operand:<VNARROWQ2> 0 "register_operand")
2059    (match_operand:<VNARROWQ> 1 "register_operand")
2060    (match_operand:VQN 2 "register_operand")
2061    (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
2062   "TARGET_SIMD"
2063   {
2064     if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ2>mode))
2065       {
2066         rtx tmp = aarch64_gen_shareable_zero (<MODE>mode);
2067         emit_insn (gen_aarch64_raddhn2<mode> (operands[0], operands[1],
2068                                               operands[2], tmp));
2069       }
2070     else
2071       {
2072         operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
2073                                                          INTVAL (operands[3]));
2074         if (BYTES_BIG_ENDIAN)
2075           emit_insn (gen_aarch64_rshrn2<mode>_insn_be (operands[0],
2076                                                        operands[1],
2077                                                        operands[2],
2078                                                        operands[3]));
2079         else
2080           emit_insn (gen_aarch64_rshrn2<mode>_insn_le (operands[0],
2081                                                        operands[1],
2082                                                        operands[2],
2083                                                        operands[3]));
2084       }
2085     DONE;
2086   }
2087 )
2088
2089 ;; Widening operations.
2090
2091 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
2092   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2093         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2094                                (match_operand:VQW 1 "register_operand" "w")
2095                                (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
2096                             )))]
2097   "TARGET_SIMD"
2098   "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
2099   [(set_attr "type" "neon_shift_imm_long")]
2100 )
2101
2102 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
2103   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2104         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2105                                (match_operand:VQW 1 "register_operand" "w")
2106                                (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
2107                             )))]
2108   "TARGET_SIMD"
2109   "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
2110   [(set_attr "type" "neon_shift_imm_long")]
2111 )
2112
2113 (define_expand "vec_unpack<su>_hi_<mode>"
2114   [(match_operand:<VWIDE> 0 "register_operand")
2115    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
2116   "TARGET_SIMD"
2117   {
2118     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2119     emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
2120                                                           operands[1], p));
2121     DONE;
2122   }
2123 )
2124
2125 (define_expand "vec_unpack<su>_lo_<mode>"
2126   [(match_operand:<VWIDE> 0 "register_operand")
2127    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
2128   "TARGET_SIMD"
2129   {
2130     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2131     emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
2132                                                           operands[1], p));
2133     DONE;
2134   }
2135 )
2136
2137 ;; Widening arithmetic.
2138
2139 (define_insn "*aarch64_<su>mlal_lo<mode>"
2140   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2141         (plus:<VWIDE>
2142           (mult:<VWIDE>
2143               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2144                  (match_operand:VQW 2 "register_operand" "w")
2145                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2146               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2147                  (match_operand:VQW 4 "register_operand" "w")
2148                  (match_dup 3))))
2149           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2150   "TARGET_SIMD"
2151   "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
2152   [(set_attr "type" "neon_mla_<Vetype>_long")]
2153 )
2154
2155 (define_insn "aarch64_<su>mlal_hi<mode>_insn"
2156   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2157         (plus:<VWIDE>
2158           (mult:<VWIDE>
2159               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2160                  (match_operand:VQW 2 "register_operand" "w")
2161                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2162               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2163                  (match_operand:VQW 4 "register_operand" "w")
2164                  (match_dup 3))))
2165           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2166   "TARGET_SIMD"
2167   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
2168   [(set_attr "type" "neon_mla_<Vetype>_long")]
2169 )
2170
2171 (define_expand "aarch64_<su>mlal_hi<mode>"
2172   [(match_operand:<VWIDE> 0 "register_operand")
2173    (match_operand:<VWIDE> 1 "register_operand")
2174    (ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand"))
2175    (match_operand:VQW 3 "register_operand")]
2176   "TARGET_SIMD"
2177 {
2178   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2179   emit_insn (gen_aarch64_<su>mlal_hi<mode>_insn (operands[0], operands[1],
2180                                                  operands[2], p, operands[3]));
2181   DONE;
2182 }
2183 )
2184
2185 (define_insn "aarch64_<su>mlal_hi_n<mode>_insn"
2186   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2187         (plus:<VWIDE>
2188           (mult:<VWIDE>
2189             (ANY_EXTEND:<VWIDE>
2190               (vec_select:<VHALF>
2191                 (match_operand:VQ_HSI 2 "register_operand" "w")
2192                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2193             (vec_duplicate:<VWIDE>
2194               (ANY_EXTEND:<VWIDE_S>
2195                 (match_operand:<VEL> 4 "register_operand" "<h_con>"))))
2196           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2197   "TARGET_SIMD"
2198   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
2199   [(set_attr "type" "neon_mla_<Vetype>_long")]
2200 )
2201
2202 (define_expand "aarch64_<su>mlal_hi_n<mode>"
2203   [(match_operand:<VWIDE> 0 "register_operand")
2204    (match_operand:<VWIDE> 1 "register_operand")
2205    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2206    (match_operand:<VEL> 3 "register_operand")]
2207   "TARGET_SIMD"
2208 {
2209   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2210   emit_insn (gen_aarch64_<su>mlal_hi_n<mode>_insn (operands[0],
2211              operands[1], operands[2], p, operands[3]));
2212   DONE;
2213 }
2214 )
2215
2216 (define_insn "*aarch64_<su>mlsl_lo<mode>"
2217   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2218         (minus:<VWIDE>
2219           (match_operand:<VWIDE> 1 "register_operand" "0")
2220           (mult:<VWIDE>
2221               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2222                  (match_operand:VQW 2 "register_operand" "w")
2223                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2224               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2225                  (match_operand:VQW 4 "register_operand" "w")
2226                  (match_dup 3))))))]
2227   "TARGET_SIMD"
2228   "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
2229   [(set_attr "type" "neon_mla_<Vetype>_long")]
2230 )
2231
2232 (define_insn "aarch64_<su>mlsl_hi<mode>_insn"
2233   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2234         (minus:<VWIDE>
2235           (match_operand:<VWIDE> 1 "register_operand" "0")
2236           (mult:<VWIDE>
2237               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2238                  (match_operand:VQW 2 "register_operand" "w")
2239                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2240               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2241                  (match_operand:VQW 4 "register_operand" "w")
2242                  (match_dup 3))))))]
2243   "TARGET_SIMD"
2244   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
2245   [(set_attr "type" "neon_mla_<Vetype>_long")]
2246 )
2247
2248 (define_expand "aarch64_<su>mlsl_hi<mode>"
2249   [(match_operand:<VWIDE> 0 "register_operand")
2250    (match_operand:<VWIDE> 1 "register_operand")
2251    (ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand"))
2252    (match_operand:VQW 3 "register_operand")]
2253   "TARGET_SIMD"
2254 {
2255   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2256   emit_insn (gen_aarch64_<su>mlsl_hi<mode>_insn (operands[0], operands[1],
2257                                                  operands[2], p, operands[3]));
2258   DONE;
2259 }
2260 )
2261
2262 (define_insn "aarch64_<su>mlsl_hi_n<mode>_insn"
2263   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2264         (minus:<VWIDE>
2265           (match_operand:<VWIDE> 1 "register_operand" "0")
2266           (mult:<VWIDE>
2267             (ANY_EXTEND:<VWIDE>
2268               (vec_select:<VHALF>
2269                 (match_operand:VQ_HSI 2 "register_operand" "w")
2270                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2271             (vec_duplicate:<VWIDE>
2272               (ANY_EXTEND:<VWIDE_S>
2273                 (match_operand:<VEL> 4 "register_operand" "<h_con>"))))))]
2274   "TARGET_SIMD"
2275   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
2276   [(set_attr "type" "neon_mla_<Vetype>_long")]
2277 )
2278
2279 (define_expand "aarch64_<su>mlsl_hi_n<mode>"
2280   [(match_operand:<VWIDE> 0 "register_operand")
2281    (match_operand:<VWIDE> 1 "register_operand")
2282    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2283    (match_operand:<VEL> 3 "register_operand")]
2284   "TARGET_SIMD"
2285 {
2286   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2287   emit_insn (gen_aarch64_<su>mlsl_hi_n<mode>_insn (operands[0],
2288              operands[1], operands[2], p, operands[3]));
2289   DONE;
2290 }
2291 )
2292
2293 (define_insn "aarch64_<su>mlal<mode>"
2294   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2295         (plus:<VWIDE>
2296           (mult:<VWIDE>
2297             (ANY_EXTEND:<VWIDE>
2298               (match_operand:VD_BHSI 2 "register_operand" "w"))
2299             (ANY_EXTEND:<VWIDE>
2300               (match_operand:VD_BHSI 3 "register_operand" "w")))
2301           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2302   "TARGET_SIMD"
2303   "<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
2304   [(set_attr "type" "neon_mla_<Vetype>_long")]
2305 )
2306
2307 (define_insn "aarch64_<su>mlal_n<mode>"
2308   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2309         (plus:<VWIDE>
2310           (mult:<VWIDE>
2311             (ANY_EXTEND:<VWIDE>
2312               (match_operand:VD_HSI 2 "register_operand" "w"))
2313             (vec_duplicate:<VWIDE>
2314               (ANY_EXTEND:<VWIDE_S>
2315                 (match_operand:<VEL> 3 "register_operand" "<h_con>"))))
2316           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2317   "TARGET_SIMD"
2318   "<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
2319   [(set_attr "type" "neon_mla_<Vetype>_long")]
2320 )
2321
2322 (define_insn "aarch64_<su>mlsl<mode>"
2323   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2324         (minus:<VWIDE>
2325           (match_operand:<VWIDE> 1 "register_operand" "0")
2326           (mult:<VWIDE>
2327             (ANY_EXTEND:<VWIDE>
2328               (match_operand:VD_BHSI 2 "register_operand" "w"))
2329             (ANY_EXTEND:<VWIDE>
2330               (match_operand:VD_BHSI 3 "register_operand" "w")))))]
2331   "TARGET_SIMD"
2332   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
2333   [(set_attr "type" "neon_mla_<Vetype>_long")]
2334 )
2335
2336 (define_insn "aarch64_<su>mlsl_n<mode>"
2337   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2338         (minus:<VWIDE>
2339           (match_operand:<VWIDE> 1 "register_operand" "0")
2340           (mult:<VWIDE>
2341             (ANY_EXTEND:<VWIDE>
2342               (match_operand:VD_HSI 2 "register_operand" "w"))
2343             (vec_duplicate:<VWIDE>
2344               (ANY_EXTEND:<VWIDE_S>
2345                 (match_operand:<VEL> 3 "register_operand" "<h_con>"))))))]
2346   "TARGET_SIMD"
2347   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
2348   [(set_attr "type" "neon_mla_<Vetype>_long")]
2349 )
2350
2351 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
2352  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2353        (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2354                            (match_operand:VQW 1 "register_operand" "w")
2355                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2356                      (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2357                            (match_operand:VQW 2 "register_operand" "w")
2358                            (match_dup 3)))))]
2359   "TARGET_SIMD"
2360   "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2361   [(set_attr "type" "neon_mul_<Vetype>_long")]
2362 )
2363
2364 (define_insn "aarch64_intrinsic_vec_<su>mult_lo_<mode>"
2365   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2366         (mult:<VWIDE> (ANY_EXTEND:<VWIDE>
2367                          (match_operand:VD_BHSI 1 "register_operand" "w"))
2368                       (ANY_EXTEND:<VWIDE>
2369                          (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2370   "TARGET_SIMD"
2371   "<su>mull\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2372   [(set_attr "type" "neon_mul_<Vetype>_long")]
2373 )
2374
2375 (define_expand "vec_widen_<su>mult_lo_<mode>"
2376   [(match_operand:<VWIDE> 0 "register_operand")
2377    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
2378    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
2379  "TARGET_SIMD"
2380  {
2381    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2382    emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
2383                                                        operands[1],
2384                                                        operands[2], p));
2385    DONE;
2386  }
2387 )
2388
2389 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
2390  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2391       (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2392                             (match_operand:VQW 1 "register_operand" "w")
2393                             (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2394                     (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2395                             (match_operand:VQW 2 "register_operand" "w")
2396                             (match_dup 3)))))]
2397   "TARGET_SIMD"
2398   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2399   [(set_attr "type" "neon_mul_<Vetype>_long")]
2400 )
2401
2402 (define_expand "vec_widen_<su>mult_hi_<mode>"
2403   [(match_operand:<VWIDE> 0 "register_operand")
2404    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
2405    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
2406  "TARGET_SIMD"
2407  {
2408    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2409    emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
2410                                                        operands[1],
2411                                                        operands[2], p));
2412    DONE;
2413
2414  }
2415 )
2416
2417 ;; vmull_lane_s16 intrinsics
2418 (define_insn "aarch64_vec_<su>mult_lane<Qlane>"
2419   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2420         (mult:<VWIDE>
2421           (ANY_EXTEND:<VWIDE>
2422             (match_operand:<VCOND> 1 "register_operand" "w"))
2423           (vec_duplicate:<VWIDE>
2424             (ANY_EXTEND:<VWIDE_S>
2425               (vec_select:<VEL>
2426                 (match_operand:VDQHS 2 "register_operand" "<vwx>")
2427                 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))]
2428   "TARGET_SIMD"
2429   {
2430     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
2431     return "<su>mull\\t%0.<Vwtype>, %1.<Vcondtype>, %2.<Vetype>[%3]";
2432   }
2433   [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2434 )
2435
2436 (define_insn "aarch64_<su>mull_hi_lane<mode>_insn"
2437   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2438         (mult:<VWIDE>
2439           (ANY_EXTEND:<VWIDE>
2440             (vec_select:<VHALF>
2441               (match_operand:VQ_HSI 1 "register_operand" "w")
2442               (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
2443           (vec_duplicate:<VWIDE>
2444             (ANY_EXTEND:<VWIDE_S>
2445               (vec_select:<VEL>
2446                 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
2447                 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
2448   "TARGET_SIMD"
2449   {
2450     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
2451     return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %3.<Vetype>[%4]";
2452   }
2453   [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2454 )
2455
2456 (define_expand "aarch64_<su>mull_hi_lane<mode>"
2457   [(match_operand:<VWIDE> 0 "register_operand")
2458    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 1 "register_operand"))
2459    (match_operand:<VCOND> 2 "register_operand")
2460    (match_operand:SI 3 "immediate_operand")]
2461   "TARGET_SIMD"
2462 {
2463   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2464   emit_insn (gen_aarch64_<su>mull_hi_lane<mode>_insn (operands[0],
2465              operands[1], p, operands[2], operands[3]));
2466   DONE;
2467 }
2468 )
2469
2470 (define_insn "aarch64_<su>mull_hi_laneq<mode>_insn"
2471   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2472         (mult:<VWIDE>
2473           (ANY_EXTEND:<VWIDE>
2474             (vec_select:<VHALF>
2475               (match_operand:VQ_HSI 1 "register_operand" "w")
2476               (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
2477           (vec_duplicate:<VWIDE>
2478             (ANY_EXTEND:<VWIDE_S>
2479               (vec_select:<VEL>
2480                 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
2481                 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
2482   "TARGET_SIMD"
2483   {
2484     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
2485     return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %3.<Vetype>[%4]";
2486   }
2487   [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2488 )
2489
2490 (define_expand "aarch64_<su>mull_hi_laneq<mode>"
2491   [(match_operand:<VWIDE> 0 "register_operand")
2492    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 1 "register_operand"))
2493    (match_operand:<VCONQ> 2 "register_operand")
2494    (match_operand:SI 3 "immediate_operand")]
2495   "TARGET_SIMD"
2496 {
2497   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2498   emit_insn (gen_aarch64_<su>mull_hi_laneq<mode>_insn (operands[0],
2499              operands[1], p, operands[2], operands[3]));
2500   DONE;
2501 }
2502 )
2503
2504 (define_insn "aarch64_<su>mull_n<mode>"
2505   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2506         (mult:<VWIDE>
2507           (ANY_EXTEND:<VWIDE>
2508             (match_operand:VD_HSI 1 "register_operand" "w"))
2509           (vec_duplicate:<VWIDE>
2510             (ANY_EXTEND:<VWIDE_S>
2511               (match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
2512   "TARGET_SIMD"
2513   "<su>mull\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
2514   [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2515 )
2516
2517 (define_insn "aarch64_<su>mull_hi_n<mode>_insn"
2518   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2519         (mult:<VWIDE>
2520           (ANY_EXTEND:<VWIDE>
2521             (vec_select:<VHALF>
2522               (match_operand:VQ_HSI 1 "register_operand" "w")
2523               (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2524           (vec_duplicate:<VWIDE>
2525             (ANY_EXTEND:<VWIDE_S>
2526               (match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
2527   "TARGET_SIMD"
2528   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
2529   [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2530 )
2531
2532 (define_expand "aarch64_<su>mull_hi_n<mode>"
2533   [(match_operand:<VWIDE> 0 "register_operand")
2534    (ANY_EXTEND:<VWIDE> (match_operand:VQ_HSI 1 "register_operand"))
2535    (match_operand:<VEL> 2 "register_operand")]
2536  "TARGET_SIMD"
2537  {
2538    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2539    emit_insn (gen_aarch64_<su>mull_hi_n<mode>_insn (operands[0], operands[1],
2540                                                     operands[2], p));
2541    DONE;
2542  }
2543 )
2544
2545 ;; vmlal_lane_s16 intrinsics
2546 (define_insn "aarch64_vec_<su>mlal_lane<Qlane>"
2547   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2548         (plus:<VWIDE>
2549           (mult:<VWIDE>
2550             (ANY_EXTEND:<VWIDE>
2551               (match_operand:<VCOND> 2 "register_operand" "w"))
2552             (vec_duplicate:<VWIDE>
2553               (ANY_EXTEND:<VWIDE_S>
2554                 (vec_select:<VEL>
2555                   (match_operand:VDQHS 3 "register_operand" "<vwx>")
2556                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))
2557           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2558   "TARGET_SIMD"
2559   {
2560     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
2561     return "<su>mlal\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
2562   }
2563   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2564 )
2565
2566 (define_insn "aarch64_<su>mlal_hi_lane<mode>_insn"
2567   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2568         (plus:<VWIDE>
2569           (mult:<VWIDE>
2570             (ANY_EXTEND:<VWIDE>
2571               (vec_select:<VHALF>
2572                 (match_operand:VQ_HSI 2 "register_operand" "w")
2573                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2574             (vec_duplicate:<VWIDE>
2575               (ANY_EXTEND:<VWIDE_S>
2576                 (vec_select:<VEL>
2577                   (match_operand:<VCOND> 4 "register_operand" "<vwx>")
2578                   (parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
2579           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2580   "TARGET_SIMD"
2581   {
2582     operands[5] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[5]));
2583     return "<su>mlal2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2584   }
2585   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2586 )
2587
2588 (define_expand "aarch64_<su>mlal_hi_lane<mode>"
2589   [(match_operand:<VWIDE> 0 "register_operand")
2590    (match_operand:<VWIDE> 1 "register_operand")
2591    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2592    (match_operand:<VCOND> 3 "register_operand")
2593    (match_operand:SI 4 "immediate_operand")]
2594   "TARGET_SIMD"
2595 {
2596   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2597   emit_insn (gen_aarch64_<su>mlal_hi_lane<mode>_insn (operands[0],
2598              operands[1], operands[2], p, operands[3], operands[4]));
2599   DONE;
2600 }
2601 )
2602
2603 (define_insn "aarch64_<su>mlal_hi_laneq<mode>_insn"
2604   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2605         (plus:<VWIDE>
2606           (mult:<VWIDE>
2607             (ANY_EXTEND:<VWIDE>
2608               (vec_select:<VHALF>
2609                 (match_operand:VQ_HSI 2 "register_operand" "w")
2610                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2611             (vec_duplicate:<VWIDE>
2612               (ANY_EXTEND:<VWIDE_S>
2613                 (vec_select:<VEL>
2614                   (match_operand:<VCONQ> 4 "register_operand" "<vwx>")
2615                   (parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
2616           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2617   "TARGET_SIMD"
2618   {
2619     operands[5] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[5]));
2620     return "<su>mlal2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2621   }
2622   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2623 )
2624
2625 (define_expand "aarch64_<su>mlal_hi_laneq<mode>"
2626   [(match_operand:<VWIDE> 0 "register_operand")
2627    (match_operand:<VWIDE> 1 "register_operand")
2628    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2629    (match_operand:<VCONQ> 3 "register_operand")
2630    (match_operand:SI 4 "immediate_operand")]
2631   "TARGET_SIMD"
2632 {
2633   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2634   emit_insn (gen_aarch64_<su>mlal_hi_laneq<mode>_insn (operands[0],
2635              operands[1], operands[2], p, operands[3], operands[4]));
2636   DONE;
2637 }
2638 )
2639
2640 (define_insn "aarch64_vec_<su>mlsl_lane<Qlane>"
2641   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2642    (minus:<VWIDE>
2643      (match_operand:<VWIDE> 1 "register_operand" "0")
2644      (mult:<VWIDE>
2645        (ANY_EXTEND:<VWIDE>
2646          (match_operand:<VCOND> 2 "register_operand" "w"))
2647        (vec_duplicate:<VWIDE>
2648          (ANY_EXTEND:<VWIDE_S>
2649            (vec_select:<VEL>
2650              (match_operand:VDQHS 3 "register_operand" "<vwx>")
2651              (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))))]
2652   "TARGET_SIMD"
2653   {
2654     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
2655     return "<su>mlsl\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
2656   }
2657   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2658 )
2659
2660 (define_insn "aarch64_<su>mlsl_hi_lane<mode>_insn"
2661   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2662         (minus:<VWIDE>
2663           (match_operand:<VWIDE> 1 "register_operand" "0")
2664           (mult:<VWIDE>
2665             (ANY_EXTEND:<VWIDE>
2666               (vec_select:<VHALF>
2667                 (match_operand:VQ_HSI 2 "register_operand" "w")
2668                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2669             (vec_duplicate:<VWIDE>
2670               (ANY_EXTEND:<VWIDE_S>
2671                 (vec_select:<VEL>
2672                   (match_operand:<VCOND> 4 "register_operand" "<vwx>")
2673                   (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
2674           )))]
2675   "TARGET_SIMD"
2676   {
2677     operands[5] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[5]));
2678     return "<su>mlsl2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2679   }
2680   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2681 )
2682
2683 (define_expand "aarch64_<su>mlsl_hi_lane<mode>"
2684   [(match_operand:<VWIDE> 0 "register_operand")
2685    (match_operand:<VWIDE> 1 "register_operand")
2686    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2687    (match_operand:<VCOND> 3 "register_operand")
2688    (match_operand:SI 4 "immediate_operand")]
2689   "TARGET_SIMD"
2690 {
2691   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2692   emit_insn (gen_aarch64_<su>mlsl_hi_lane<mode>_insn (operands[0],
2693              operands[1], operands[2], p, operands[3], operands[4]));
2694   DONE;
2695 }
2696 )
2697
2698 (define_insn "aarch64_<su>mlsl_hi_laneq<mode>_insn"
2699   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2700         (minus:<VWIDE>
2701           (match_operand:<VWIDE> 1 "register_operand" "0")
2702           (mult:<VWIDE>
2703             (ANY_EXTEND:<VWIDE>
2704               (vec_select:<VHALF>
2705                 (match_operand:VQ_HSI 2 "register_operand" "w")
2706                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2707             (vec_duplicate:<VWIDE>
2708               (ANY_EXTEND:<VWIDE_S>
2709                 (vec_select:<VEL>
2710                   (match_operand:<VCONQ> 4 "register_operand" "<vwx>")
2711                   (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
2712           )))]
2713   "TARGET_SIMD"
2714   {
2715     operands[5] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[5]));
2716     return "<su>mlsl2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2717   }
2718   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2719 )
2720
2721 (define_expand "aarch64_<su>mlsl_hi_laneq<mode>"
2722   [(match_operand:<VWIDE> 0 "register_operand")
2723    (match_operand:<VWIDE> 1 "register_operand")
2724    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2725    (match_operand:<VCONQ> 3 "register_operand")
2726    (match_operand:SI 4 "immediate_operand")]
2727   "TARGET_SIMD"
2728 {
2729   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2730   emit_insn (gen_aarch64_<su>mlsl_hi_laneq<mode>_insn (operands[0],
2731              operands[1], operands[2], p, operands[3], operands[4]));
2732   DONE;
2733 }
2734 )
2735
2736 ;; FP vector operations.
2737 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
2738 ;; double-precision (64-bit) floating-point data types and arithmetic as
2739 ;; defined by the IEEE 754-2008 standard.  This makes them vectorizable
2740 ;; without the need for -ffast-math or -funsafe-math-optimizations.
2741 ;;
2742 ;; Floating-point operations can raise an exception.  Vectorizing such
2743 ;; operations are safe because of reasons explained below.
2744 ;;
2745 ;; ARMv8 permits an extension to enable trapped floating-point
2746 ;; exception handling, however this is an optional feature.  In the
2747 ;; event of a floating-point exception being raised by vectorised
2748 ;; code then:
2749 ;; 1.  If trapped floating-point exceptions are available, then a trap
2750 ;;     will be taken when any lane raises an enabled exception.  A trap
2751 ;;     handler may determine which lane raised the exception.
2752 ;; 2.  Alternatively a sticky exception flag is set in the
2753 ;;     floating-point status register (FPSR).  Software may explicitly
2754 ;;     test the exception flags, in which case the tests will either
2755 ;;     prevent vectorisation, allowing precise identification of the
2756 ;;     failing operation, or if tested outside of vectorisable regions
2757 ;;     then the specific operation and lane are not of interest.
2758
2759 ;; FP arithmetic operations.
2760
2761 (define_insn "add<mode>3"
2762  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2763        (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2764                    (match_operand:VHSDF 2 "register_operand" "w")))]
2765  "TARGET_SIMD"
2766  "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2767   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
2768 )
2769
2770 (define_insn "sub<mode>3"
2771  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2772        (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2773                     (match_operand:VHSDF 2 "register_operand" "w")))]
2774  "TARGET_SIMD"
2775  "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2776   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
2777 )
2778
2779 (define_insn "mul<mode>3"
2780  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2781        (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2782                    (match_operand:VHSDF 2 "register_operand" "w")))]
2783  "TARGET_SIMD"
2784  "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2785   [(set_attr "type" "neon_fp_mul_<stype><q>")]
2786 )
2787
2788 (define_expand "div<mode>3"
2789  [(set (match_operand:VHSDF 0 "register_operand")
2790        (div:VHSDF (match_operand:VHSDF 1 "register_operand")
2791                   (match_operand:VHSDF 2 "register_operand")))]
2792  "TARGET_SIMD"
2793 {
2794   if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
2795     DONE;
2796
2797   operands[1] = force_reg (<MODE>mode, operands[1]);
2798 })
2799
2800 (define_insn "*div<mode>3"
2801  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2802        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2803                  (match_operand:VHSDF 2 "register_operand" "w")))]
2804  "TARGET_SIMD"
2805  "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2806   [(set_attr "type" "neon_fp_div_<stype><q>")]
2807 )
2808
2809 (define_insn "neg<mode>2"
2810  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2811        (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2812  "TARGET_SIMD"
2813  "fneg\\t%0.<Vtype>, %1.<Vtype>"
2814   [(set_attr "type" "neon_fp_neg_<stype><q>")]
2815 )
2816
2817 (define_insn "abs<mode>2"
2818  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2819        (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2820  "TARGET_SIMD"
2821  "fabs\\t%0.<Vtype>, %1.<Vtype>"
2822   [(set_attr "type" "neon_fp_abs_<stype><q>")]
2823 )
2824
2825 (define_expand "aarch64_float_mla<mode>"
2826   [(set (match_operand:VDQF_DF 0 "register_operand")
2827         (plus:VDQF_DF
2828           (mult:VDQF_DF
2829             (match_operand:VDQF_DF 2 "register_operand")
2830             (match_operand:VDQF_DF 3 "register_operand"))
2831           (match_operand:VDQF_DF 1 "register_operand")))]
2832   "TARGET_SIMD"
2833   {
2834     rtx scratch = gen_reg_rtx (<MODE>mode);
2835     emit_insn (gen_mul<mode>3 (scratch, operands[2], operands[3]));
2836     emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2837     DONE;
2838   }
2839 )
2840
2841 (define_expand "aarch64_float_mls<mode>"
2842   [(set (match_operand:VDQF_DF 0 "register_operand")
2843         (minus:VDQF_DF
2844           (match_operand:VDQF_DF 1 "register_operand")
2845           (mult:VDQF_DF
2846             (match_operand:VDQF_DF 2 "register_operand")
2847             (match_operand:VDQF_DF 3 "register_operand"))))]
2848   "TARGET_SIMD"
2849   {
2850     rtx scratch = gen_reg_rtx (<MODE>mode);
2851     emit_insn (gen_mul<mode>3 (scratch, operands[2], operands[3]));
2852     emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2853     DONE;
2854   }
2855 )
2856
2857 (define_expand "aarch64_float_mla_n<mode>"
2858   [(set (match_operand:VDQSF 0 "register_operand")
2859         (plus:VDQSF
2860           (mult:VDQSF
2861             (vec_duplicate:VDQSF
2862               (match_operand:<VEL> 3 "register_operand"))
2863             (match_operand:VDQSF 2 "register_operand"))
2864           (match_operand:VDQSF 1 "register_operand")))]
2865   "TARGET_SIMD"
2866   {
2867     rtx scratch = gen_reg_rtx (<MODE>mode);
2868     emit_insn (gen_mul_n<mode>3 (scratch, operands[2], operands[3]));
2869     emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2870     DONE;
2871   }
2872 )
2873
2874 (define_expand "aarch64_float_mls_n<mode>"
2875   [(set (match_operand:VDQSF 0 "register_operand")
2876         (minus:VDQSF
2877           (match_operand:VDQSF 1 "register_operand")
2878           (mult:VDQSF
2879             (vec_duplicate:VDQSF
2880               (match_operand:<VEL> 3 "register_operand"))
2881             (match_operand:VDQSF 2 "register_operand"))))]
2882   "TARGET_SIMD"
2883   {
2884     rtx scratch = gen_reg_rtx (<MODE>mode);
2885     emit_insn (gen_mul_n<mode>3 (scratch, operands[2], operands[3]));
2886     emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2887     DONE;
2888   }
2889 )
2890
2891 (define_expand "aarch64_float_mla_lane<mode>"
2892   [(set (match_operand:VDQSF 0 "register_operand")
2893         (plus:VDQSF
2894           (mult:VDQSF
2895             (vec_duplicate:VDQSF
2896               (vec_select:<VEL>
2897                 (match_operand:V2SF 3 "register_operand")
2898                 (parallel [(match_operand:SI 4 "immediate_operand")])))
2899             (match_operand:VDQSF 2 "register_operand"))
2900           (match_operand:VDQSF 1 "register_operand")))]
2901   "TARGET_SIMD"
2902   {
2903     rtx scratch = gen_reg_rtx (<MODE>mode);
2904     emit_insn (gen_mul_lane<mode>3 (scratch, operands[2],
2905                                     operands[3], operands[4]));
2906     emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2907     DONE;
2908   }
2909 )
2910
2911 (define_expand "aarch64_float_mls_lane<mode>"
2912   [(set (match_operand:VDQSF 0 "register_operand")
2913         (minus:VDQSF
2914           (match_operand:VDQSF 1 "register_operand")
2915           (mult:VDQSF
2916             (vec_duplicate:VDQSF
2917               (vec_select:<VEL>
2918                 (match_operand:V2SF 3 "register_operand")
2919                 (parallel [(match_operand:SI 4 "immediate_operand")])))
2920             (match_operand:VDQSF 2 "register_operand"))))]
2921   "TARGET_SIMD"
2922   {
2923     rtx scratch = gen_reg_rtx (<MODE>mode);
2924     emit_insn (gen_mul_lane<mode>3 (scratch, operands[2],
2925                                     operands[3], operands[4]));
2926     emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2927     DONE;
2928   }
2929 )
2930
2931 (define_expand "aarch64_float_mla_laneq<mode>"
2932   [(set (match_operand:VDQSF 0 "register_operand")
2933         (plus:VDQSF
2934           (mult:VDQSF
2935             (vec_duplicate:VDQSF
2936               (vec_select:<VEL>
2937                 (match_operand:V4SF 3 "register_operand")
2938                 (parallel [(match_operand:SI 4 "immediate_operand")])))
2939             (match_operand:VDQSF 2 "register_operand"))
2940           (match_operand:VDQSF 1 "register_operand")))]
2941   "TARGET_SIMD"
2942   {
2943     rtx scratch = gen_reg_rtx (<MODE>mode);
2944     emit_insn (gen_mul_laneq<mode>3 (scratch, operands[2],
2945                                      operands[3], operands[4]));
2946     emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2947     DONE;
2948   }
2949 )
2950
2951 (define_expand "aarch64_float_mls_laneq<mode>"
2952   [(set (match_operand:VDQSF 0 "register_operand")
2953         (minus:VDQSF
2954           (match_operand:VDQSF 1 "register_operand")
2955           (mult:VDQSF
2956             (vec_duplicate:VDQSF
2957               (vec_select:<VEL>
2958                 (match_operand:V4SF 3 "register_operand")
2959                 (parallel [(match_operand:SI 4 "immediate_operand")])))
2960             (match_operand:VDQSF 2 "register_operand"))))]
2961   "TARGET_SIMD"
2962   {
2963     rtx scratch = gen_reg_rtx (<MODE>mode);
2964     emit_insn (gen_mul_laneq<mode>3 (scratch, operands[2],
2965                                      operands[3], operands[4]));
2966     emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2967     DONE;
2968   }
2969 )
2970
2971 (define_insn "fma<mode>4"
2972   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2973        (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2974                   (match_operand:VHSDF 2 "register_operand" "w")
2975                   (match_operand:VHSDF 3 "register_operand" "0")))]
2976   "TARGET_SIMD"
2977  "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2978   [(set_attr "type" "neon_fp_mla_<stype><q>")]
2979 )
2980
2981 (define_insn "*aarch64_fma4_elt<mode>"
2982   [(set (match_operand:VDQF 0 "register_operand" "=w")
2983     (fma:VDQF
2984       (vec_duplicate:VDQF
2985         (vec_select:<VEL>
2986           (match_operand:VDQF 1 "register_operand" "<h_con>")
2987           (parallel [(match_operand:SI 2 "immediate_operand")])))
2988       (match_operand:VDQF 3 "register_operand" "w")
2989       (match_operand:VDQF 4 "register_operand" "0")))]
2990   "TARGET_SIMD"
2991   {
2992     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2993     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2994   }
2995   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2996 )
2997
2998 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
2999   [(set (match_operand:VDQSF 0 "register_operand" "=w")
3000     (fma:VDQSF
3001       (vec_duplicate:VDQSF
3002         (vec_select:<VEL>
3003           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
3004           (parallel [(match_operand:SI 2 "immediate_operand")])))
3005       (match_operand:VDQSF 3 "register_operand" "w")
3006       (match_operand:VDQSF 4 "register_operand" "0")))]
3007   "TARGET_SIMD"
3008   {
3009     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
3010     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
3011   }
3012   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
3013 )
3014
3015 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
3016   [(set (match_operand:VMUL 0 "register_operand" "=w")
3017     (fma:VMUL
3018       (vec_duplicate:VMUL
3019           (match_operand:<VEL> 1 "register_operand" "<h_con>"))
3020       (match_operand:VMUL 2 "register_operand" "w")
3021       (match_operand:VMUL 3 "register_operand" "0")))]
3022   "TARGET_SIMD"
3023   "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
3024   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
3025 )
3026
3027 (define_insn "*aarch64_fma4_elt_to_64v2df"
3028   [(set (match_operand:DF 0 "register_operand" "=w")
3029     (fma:DF
3030         (vec_select:DF
3031           (match_operand:V2DF 1 "register_operand" "w")
3032           (parallel [(match_operand:SI 2 "immediate_operand")]))
3033       (match_operand:DF 3 "register_operand" "w")
3034       (match_operand:DF 4 "register_operand" "0")))]
3035   "TARGET_SIMD"
3036   {
3037     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
3038     return "fmla\\t%0.2d, %3.2d, %1.d[%2]";
3039   }
3040   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
3041 )
3042
3043 (define_insn "fnma<mode>4"
3044   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3045         (fma:VHSDF
3046           (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
3047           (match_operand:VHSDF 2 "register_operand" "w")
3048           (match_operand:VHSDF 3 "register_operand" "0")))]
3049   "TARGET_SIMD"
3050   "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3051   [(set_attr "type" "neon_fp_mla_<stype><q>")]
3052 )
3053
3054 (define_insn "*aarch64_fnma4_elt<mode>"
3055   [(set (match_operand:VDQF 0 "register_operand" "=w")
3056     (fma:VDQF
3057       (neg:VDQF
3058         (match_operand:VDQF 3 "register_operand" "w"))
3059       (vec_duplicate:VDQF
3060         (vec_select:<VEL>
3061           (match_operand:VDQF 1 "register_operand" "<h_con>")
3062           (parallel [(match_operand:SI 2 "immediate_operand")])))
3063       (match_operand:VDQF 4 "register_operand" "0")))]
3064   "TARGET_SIMD"
3065   {
3066     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3067     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
3068   }
3069   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
3070 )
3071
3072 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
3073   [(set (match_operand:VDQSF 0 "register_operand" "=w")
3074     (fma:VDQSF
3075       (neg:VDQSF
3076         (match_operand:VDQSF 3 "register_operand" "w"))
3077       (vec_duplicate:VDQSF
3078         (vec_select:<VEL>
3079           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
3080           (parallel [(match_operand:SI 2 "immediate_operand")])))
3081       (match_operand:VDQSF 4 "register_operand" "0")))]
3082   "TARGET_SIMD"
3083   {
3084     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
3085     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
3086   }
3087   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
3088 )
3089
3090 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
3091   [(set (match_operand:VMUL 0 "register_operand" "=w")
3092     (fma:VMUL
3093       (neg:VMUL
3094         (match_operand:VMUL 2 "register_operand" "w"))
3095       (vec_duplicate:VMUL
3096         (match_operand:<VEL> 1 "register_operand" "<h_con>"))
3097       (match_operand:VMUL 3 "register_operand" "0")))]
3098   "TARGET_SIMD"
3099   "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
3100   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
3101 )
3102
3103 (define_insn "*aarch64_fnma4_elt_to_64v2df"
3104   [(set (match_operand:DF 0 "register_operand" "=w")
3105     (fma:DF
3106       (vec_select:DF
3107         (match_operand:V2DF 1 "register_operand" "w")
3108         (parallel [(match_operand:SI 2 "immediate_operand")]))
3109       (neg:DF
3110         (match_operand:DF 3 "register_operand" "w"))
3111       (match_operand:DF 4 "register_operand" "0")))]
3112   "TARGET_SIMD"
3113   {
3114     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
3115     return "fmls\\t%0.2d, %3.2d, %1.d[%2]";
3116   }
3117   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
3118 )
3119
3120 ;; Vector versions of the floating-point frint patterns.
3121 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
3122 (define_insn "<frint_pattern><mode>2"
3123   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3124         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
3125                        FRINT))]
3126   "TARGET_SIMD"
3127   "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
3128   [(set_attr "type" "neon_fp_round_<stype><q>")]
3129 )
3130
3131 ;; Vector versions of the fcvt standard patterns.
3132 ;; Expands to lbtrunc, lround, lceil, lfloor
3133 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
3134   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
3135         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3136                                [(match_operand:VHSDF 1 "register_operand" "w")]
3137                                FCVT)))]
3138   "TARGET_SIMD"
3139   "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
3140   [(set_attr "type" "neon_fp_to_int_<stype><q>")]
3141 )
3142
3143 ;; HF Scalar variants of related SIMD instructions.
3144 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
3145   [(set (match_operand:HI 0 "register_operand" "=w")
3146         (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
3147                       FCVT)))]
3148   "TARGET_SIMD_F16INST"
3149   "fcvt<frint_suffix><su>\t%h0, %h1"
3150   [(set_attr "type" "neon_fp_to_int_s")]
3151 )
3152
3153 (define_insn "<optab>_trunchfhi2"
3154   [(set (match_operand:HI 0 "register_operand" "=w")
3155         (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
3156   "TARGET_SIMD_F16INST"
3157   "fcvtz<su>\t%h0, %h1"
3158   [(set_attr "type" "neon_fp_to_int_s")]
3159 )
3160
3161 (define_insn "<optab>hihf2"
3162   [(set (match_operand:HF 0 "register_operand" "=w")
3163         (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
3164   "TARGET_SIMD_F16INST"
3165   "<su_optab>cvtf\t%h0, %h1"
3166   [(set_attr "type" "neon_int_to_fp_s")]
3167 )
3168
3169 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
3170   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
3171         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3172                                [(mult:VDQF
3173          (match_operand:VDQF 1 "register_operand" "w")
3174          (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
3175                                UNSPEC_FRINTZ)))]
3176   "TARGET_SIMD
3177    && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
3178                 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
3179   {
3180     int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
3181     char buf[64];
3182     snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
3183     output_asm_insn (buf, operands);
3184     return "";
3185   }
3186   [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
3187 )
3188
3189 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
3190   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
3191         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3192                                [(match_operand:VHSDF 1 "register_operand")]
3193                                 UNSPEC_FRINTZ)))]
3194   "TARGET_SIMD"
3195   {})
3196
3197 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
3198   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
3199         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3200                                [(match_operand:VHSDF 1 "register_operand")]
3201                                 UNSPEC_FRINTZ)))]
3202   "TARGET_SIMD"
3203   {})
3204
3205 (define_expand "ftrunc<VHSDF:mode>2"
3206   [(set (match_operand:VHSDF 0 "register_operand")
3207         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
3208                        UNSPEC_FRINTZ))]
3209   "TARGET_SIMD"
3210   {})
3211
3212 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
3213   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3214         (FLOATUORS:VHSDF
3215           (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
3216   "TARGET_SIMD"
3217   "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
3218   [(set_attr "type" "neon_int_to_fp_<stype><q>")]
3219 )
3220
3221 ;; Conversions between vectors of floats and doubles.
3222 ;; Contains a mix of patterns to match standard pattern names
3223 ;; and those for intrinsics.
3224
3225 ;; Float widening operations.
3226
3227 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
3228   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3229         (float_extend:<VWIDE> (vec_select:<VHALF>
3230                                (match_operand:VQ_HSF 1 "register_operand" "w")
3231                                (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
3232                             )))]
3233   "TARGET_SIMD"
3234   "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
3235   [(set_attr "type" "neon_fp_cvt_widen_s")]
3236 )
3237
3238 ;; Convert between fixed-point and floating-point (vector modes)
3239
3240 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
3241   [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
3242         (unspec:<VHSDF:FCVT_TARGET>
3243           [(match_operand:VHSDF 1 "register_operand" "w")
3244            (match_operand:SI 2 "immediate_operand" "i")]
3245          FCVT_F2FIXED))]
3246   "TARGET_SIMD"
3247   "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
3248   [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
3249 )
3250
3251 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
3252   [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
3253         (unspec:<VDQ_HSDI:FCVT_TARGET>
3254           [(match_operand:VDQ_HSDI 1 "register_operand" "w")
3255            (match_operand:SI 2 "immediate_operand" "i")]
3256          FCVT_FIXED2F))]
3257   "TARGET_SIMD"
3258   "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
3259   [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
3260 )
3261
3262 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
3263 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
3264 ;; the meaning of HI and LO changes depending on the target endianness.
3265 ;; While elsewhere we map the higher numbered elements of a vector to
3266 ;; the lower architectural lanes of the vector, for these patterns we want
3267 ;; to always treat "hi" as referring to the higher architectural lanes.
3268 ;; Consequently, while the patterns below look inconsistent with our
3269 ;; other big-endian patterns their behavior is as required.
3270
3271 (define_expand "vec_unpacks_lo_<mode>"
3272   [(match_operand:<VWIDE> 0 "register_operand")
3273    (match_operand:VQ_HSF 1 "register_operand")]
3274   "TARGET_SIMD"
3275   {
3276     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3277     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
3278                                                        operands[1], p));
3279     DONE;
3280   }
3281 )
3282
3283 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
3284   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3285         (float_extend:<VWIDE> (vec_select:<VHALF>
3286                                (match_operand:VQ_HSF 1 "register_operand" "w")
3287                                (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
3288                             )))]
3289   "TARGET_SIMD"
3290   "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
3291   [(set_attr "type" "neon_fp_cvt_widen_s")]
3292 )
3293
3294 (define_expand "vec_unpacks_hi_<mode>"
3295   [(match_operand:<VWIDE> 0 "register_operand")
3296    (match_operand:VQ_HSF 1 "register_operand")]
3297   "TARGET_SIMD"
3298   {
3299     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3300     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
3301                                                        operands[1], p));
3302     DONE;
3303   }
3304 )
3305 (define_insn "aarch64_float_extend_lo_<Vwide>"
3306   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3307         (float_extend:<VWIDE>
3308           (match_operand:VDF 1 "register_operand" "w")))]
3309   "TARGET_SIMD"
3310   "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
3311   [(set_attr "type" "neon_fp_cvt_widen_s")]
3312 )
3313
3314 ;; Float narrowing operations.
3315
3316 (define_insn "aarch64_float_trunc_rodd_df"
3317   [(set (match_operand:SF 0 "register_operand" "=w")
3318         (unspec:SF [(match_operand:DF 1 "register_operand" "w")]
3319                 UNSPEC_FCVTXN))]
3320   "TARGET_SIMD"
3321   "fcvtxn\\t%s0, %d1"
3322   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3323 )
3324
3325 (define_insn "aarch64_float_trunc_rodd_lo_v2sf"
3326   [(set (match_operand:V2SF 0 "register_operand" "=w")
3327         (unspec:V2SF [(match_operand:V2DF 1 "register_operand" "w")]
3328                 UNSPEC_FCVTXN))]
3329   "TARGET_SIMD"
3330   "fcvtxn\\t%0.2s, %1.2d"
3331   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3332 )
3333
3334 (define_insn "aarch64_float_trunc_rodd_hi_v4sf_le"
3335   [(set (match_operand:V4SF 0 "register_operand" "=w")
3336         (vec_concat:V4SF
3337           (match_operand:V2SF 1 "register_operand" "0")
3338           (unspec:V2SF [(match_operand:V2DF 2 "register_operand" "w")]
3339                 UNSPEC_FCVTXN)))]
3340   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3341   "fcvtxn2\\t%0.4s, %2.2d"
3342   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3343 )
3344
3345 (define_insn "aarch64_float_trunc_rodd_hi_v4sf_be"
3346   [(set (match_operand:V4SF 0 "register_operand" "=w")
3347         (vec_concat:V4SF
3348           (unspec:V2SF [(match_operand:V2DF 2 "register_operand" "w")]
3349                 UNSPEC_FCVTXN)
3350           (match_operand:V2SF 1 "register_operand" "0")))]
3351   "TARGET_SIMD && BYTES_BIG_ENDIAN"
3352   "fcvtxn2\\t%0.4s, %2.2d"
3353   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3354 )
3355
3356 (define_expand "aarch64_float_trunc_rodd_hi_v4sf"
3357   [(match_operand:V4SF 0 "register_operand")
3358    (match_operand:V2SF 1 "register_operand")
3359    (match_operand:V2DF 2 "register_operand")]
3360   "TARGET_SIMD"
3361 {
3362   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
3363                              ? gen_aarch64_float_trunc_rodd_hi_v4sf_be
3364                              : gen_aarch64_float_trunc_rodd_hi_v4sf_le;
3365   emit_insn (gen (operands[0], operands[1], operands[2]));
3366   DONE;
3367 }
3368 )
3369
3370 (define_insn "aarch64_float_truncate_lo_<mode>"
3371   [(set (match_operand:VDF 0 "register_operand" "=w")
3372       (float_truncate:VDF
3373         (match_operand:<VWIDE> 1 "register_operand" "w")))]
3374   "TARGET_SIMD"
3375   "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
3376   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3377 )
3378
3379 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
3380   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3381     (vec_concat:<VDBL>
3382       (match_operand:VDF 1 "register_operand" "0")
3383       (float_truncate:VDF
3384         (match_operand:<VWIDE> 2 "register_operand" "w"))))]
3385   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3386   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
3387   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3388 )
3389
3390 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
3391   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3392     (vec_concat:<VDBL>
3393       (float_truncate:VDF
3394         (match_operand:<VWIDE> 2 "register_operand" "w"))
3395       (match_operand:VDF 1 "register_operand" "0")))]
3396   "TARGET_SIMD && BYTES_BIG_ENDIAN"
3397   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
3398   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3399 )
3400
3401 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
3402   [(match_operand:<VDBL> 0 "register_operand")
3403    (match_operand:VDF 1 "register_operand")
3404    (match_operand:<VWIDE> 2 "register_operand")]
3405   "TARGET_SIMD"
3406 {
3407   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
3408                              ? gen_aarch64_float_truncate_hi_<Vdbl>_be
3409                              : gen_aarch64_float_truncate_hi_<Vdbl>_le;
3410   emit_insn (gen (operands[0], operands[1], operands[2]));
3411   DONE;
3412 }
3413 )
3414
3415 (define_expand "vec_pack_trunc_v2df"
3416   [(set (match_operand:V4SF 0 "register_operand")
3417       (vec_concat:V4SF
3418         (float_truncate:V2SF
3419             (match_operand:V2DF 1 "register_operand"))
3420         (float_truncate:V2SF
3421             (match_operand:V2DF 2 "register_operand"))
3422           ))]
3423   "TARGET_SIMD"
3424   {
3425     rtx tmp = gen_reg_rtx (V2SFmode);
3426     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
3427     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
3428
3429     emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
3430     emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
3431                                                    tmp, operands[hi]));
3432     DONE;
3433   }
3434 )
3435
3436 (define_expand "vec_pack_trunc_df"
3437   [(set (match_operand:V2SF 0 "register_operand")
3438       (vec_concat:V2SF
3439         (float_truncate:SF
3440             (match_operand:DF 1 "register_operand"))
3441         (float_truncate:SF
3442             (match_operand:DF 2 "register_operand"))
3443           ))]
3444   "TARGET_SIMD"
3445   {
3446     rtx tmp = gen_reg_rtx (V2SFmode);
3447     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
3448     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
3449
3450     emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
3451     emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
3452     emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
3453     DONE;
3454   }
3455 )
3456
3457 ;; FP Max/Min
3458 ;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
3459 ;; expression like:
3460 ;;      a = (b < c) ? b : c;
3461 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
3462 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
3463 ;; -ffast-math.
3464 ;;
3465 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
3466 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
3467 ;; operand will be returned when both operands are zero (i.e. they may not
3468 ;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
3469 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
3470 ;; NaNs.
3471
3472 (define_insn "<su><maxmin><mode>3"
3473   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3474         (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
3475                        (match_operand:VHSDF 2 "register_operand" "w")))]
3476   "TARGET_SIMD"
3477   "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3478   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
3479 )
3480
3481 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
3482 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
3483 ;; which implement the IEEE fmax ()/fmin () functions.
3484 (define_insn "<fmaxmin><mode>3"
3485   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3486        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
3487                       (match_operand:VHSDF 2 "register_operand" "w")]
3488                       FMAXMIN_UNS))]
3489   "TARGET_SIMD"
3490   "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3491   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
3492 )
3493
3494 ;; 'across lanes' add.
3495
3496 (define_expand "reduc_plus_scal_<mode>"
3497   [(match_operand:<VEL> 0 "register_operand")
3498    (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand")]
3499                UNSPEC_ADDV)]
3500   "TARGET_SIMD"
3501   {
3502     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
3503     rtx scratch = gen_reg_rtx (<MODE>mode);
3504     emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
3505     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
3506     DONE;
3507   }
3508 )
3509
3510 (define_insn "aarch64_faddp<mode>"
3511  [(set (match_operand:VHSDF 0 "register_operand" "=w")
3512        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
3513                       (match_operand:VHSDF 2 "register_operand" "w")]
3514         UNSPEC_FADDV))]
3515  "TARGET_SIMD"
3516  "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3517   [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
3518 )
3519
3520 (define_insn "aarch64_reduc_plus_internal<mode>"
3521  [(set (match_operand:VDQV 0 "register_operand" "=w")
3522        (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
3523                     UNSPEC_ADDV))]
3524  "TARGET_SIMD"
3525  "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
3526   [(set_attr "type" "neon_reduc_add<q>")]
3527 )
3528
3529 (define_insn "aarch64_<su>addlv<mode>"
3530  [(set (match_operand:<VWIDE_S> 0 "register_operand" "=w")
3531        (unspec:<VWIDE_S> [(match_operand:VDQV_L 1 "register_operand" "w")]
3532                     USADDLV))]
3533  "TARGET_SIMD"
3534  "<su>addl<vp>\\t%<Vwstype>0<Vwsuf>, %1.<Vtype>"
3535   [(set_attr "type" "neon_reduc_add<q>")]
3536 )
3537
3538 (define_insn "aarch64_<su>addlp<mode>"
3539  [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
3540        (unspec:<VDBLW> [(match_operand:VDQV_L 1 "register_operand" "w")]
3541                     USADDLP))]
3542  "TARGET_SIMD"
3543  "<su>addlp\\t%0.<Vwhalf>, %1.<Vtype>"
3544   [(set_attr "type" "neon_reduc_add<q>")]
3545 )
3546
3547 ;; ADDV with result zero-extended to SI/DImode (for popcount).
3548 (define_insn "aarch64_zero_extend<GPI:mode>_reduc_plus_<VDQV_E:mode>"
3549  [(set (match_operand:GPI 0 "register_operand" "=w")
3550        (zero_extend:GPI
3551         (unspec:<VDQV_E:VEL> [(match_operand:VDQV_E 1 "register_operand" "w")]
3552                              UNSPEC_ADDV)))]
3553  "TARGET_SIMD"
3554  "add<VDQV_E:vp>\\t%<VDQV_E:Vetype>0, %1.<VDQV_E:Vtype>"
3555   [(set_attr "type" "neon_reduc_add<VDQV_E:q>")]
3556 )
3557
3558 (define_insn "aarch64_reduc_plus_internalv2si"
3559  [(set (match_operand:V2SI 0 "register_operand" "=w")
3560        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
3561                     UNSPEC_ADDV))]
3562  "TARGET_SIMD"
3563  "addp\\t%0.2s, %1.2s, %1.2s"
3564   [(set_attr "type" "neon_reduc_add")]
3565 )
3566
3567 (define_insn "reduc_plus_scal_<mode>"
3568  [(set (match_operand:<VEL> 0 "register_operand" "=w")
3569        (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
3570                    UNSPEC_FADDV))]
3571  "TARGET_SIMD"
3572  "faddp\\t%<Vetype>0, %1.<Vtype>"
3573   [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
3574 )
3575
3576 (define_expand "reduc_plus_scal_v4sf"
3577  [(set (match_operand:SF 0 "register_operand")
3578        (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
3579                     UNSPEC_FADDV))]
3580  "TARGET_SIMD"
3581 {
3582   rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
3583   rtx scratch = gen_reg_rtx (V4SFmode);
3584   emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
3585   emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
3586   emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
3587   DONE;
3588 })
3589
3590 (define_insn "clrsb<mode>2"
3591   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3592         (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
3593   "TARGET_SIMD"
3594   "cls\\t%0.<Vtype>, %1.<Vtype>"
3595   [(set_attr "type" "neon_cls<q>")]
3596 )
3597
3598 (define_insn "clz<mode>2"
3599  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3600        (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
3601  "TARGET_SIMD"
3602  "clz\\t%0.<Vtype>, %1.<Vtype>"
3603   [(set_attr "type" "neon_cls<q>")]
3604 )
3605
3606 (define_insn "popcount<mode>2"
3607   [(set (match_operand:VB 0 "register_operand" "=w")
3608         (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
3609   "TARGET_SIMD"
3610   "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
3611   [(set_attr "type" "neon_cnt<q>")]
3612 )
3613
3614 ;; 'across lanes' max and min ops.
3615
3616 ;; Template for outputting a scalar, so we can create __builtins which can be
3617 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function.  (This is FP smax/smin).
3618 (define_expand "reduc_<optab>_scal_<mode>"
3619   [(match_operand:<VEL> 0 "register_operand")
3620    (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
3621                  FMAXMINV)]
3622   "TARGET_SIMD"
3623   {
3624     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
3625     rtx scratch = gen_reg_rtx (<MODE>mode);
3626     emit_insn (gen_aarch64_reduc_<optab>_internal<mode> (scratch,
3627                                                          operands[1]));
3628     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
3629     DONE;
3630   }
3631 )
3632
3633 (define_expand "reduc_<fmaxmin>_scal_<mode>"
3634   [(match_operand:<VEL> 0 "register_operand")
3635    (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
3636                  FMAXMINNMV)]
3637   "TARGET_SIMD"
3638   {
3639     emit_insn (gen_reduc_<optab>_scal_<mode> (operands[0], operands[1]));
3640     DONE;
3641   }
3642 )
3643
3644 ;; Likewise for integer cases, signed and unsigned.
3645 (define_expand "reduc_<optab>_scal_<mode>"
3646   [(match_operand:<VEL> 0 "register_operand")
3647    (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
3648                     MAXMINV)]
3649   "TARGET_SIMD"
3650   {
3651     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
3652     rtx scratch = gen_reg_rtx (<MODE>mode);
3653     emit_insn (gen_aarch64_reduc_<optab>_internal<mode> (scratch,
3654                                                          operands[1]));
3655     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
3656     DONE;
3657   }
3658 )
3659
3660 (define_insn "aarch64_reduc_<optab>_internal<mode>"
3661  [(set (match_operand:VDQV_S 0 "register_operand" "=w")
3662        (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
3663                     MAXMINV))]
3664  "TARGET_SIMD"
3665  "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
3666   [(set_attr "type" "neon_reduc_minmax<q>")]
3667 )
3668
3669 (define_insn "aarch64_reduc_<optab>_internalv2si"
3670  [(set (match_operand:V2SI 0 "register_operand" "=w")
3671        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
3672                     MAXMINV))]
3673  "TARGET_SIMD"
3674  "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
3675   [(set_attr "type" "neon_reduc_minmax")]
3676 )
3677
3678 (define_insn "aarch64_reduc_<optab>_internal<mode>"
3679  [(set (match_operand:VHSDF 0 "register_operand" "=w")
3680        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
3681                       FMAXMINV))]
3682  "TARGET_SIMD"
3683  "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
3684   [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
3685 )
3686
3687 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
3688 ;; allocation.
3689 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
3690 ;; to select.
3691 ;;
3692 ;; Thus our BSL is of the form:
3693 ;;   op0 = bsl (mask, op2, op3)
3694 ;; We can use any of:
3695 ;;
3696 ;;   if (op0 = mask)
3697 ;;     bsl mask, op1, op2
3698 ;;   if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
3699 ;;     bit op0, op2, mask
3700 ;;   if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
3701 ;;     bif op0, op1, mask
3702 ;;
3703 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
3704 ;; Some forms of straight-line code may generate the equivalent form
3705 ;; in *aarch64_simd_bsl<mode>_alt.
3706
3707 (define_insn "aarch64_simd_bsl<mode>_internal"
3708   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
3709         (xor:VDQ_I
3710            (and:VDQ_I
3711              (xor:VDQ_I
3712                (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
3713                (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
3714              (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
3715           (match_dup:<V_INT_EQUIV> 3)
3716         ))]
3717   "TARGET_SIMD"
3718   "@
3719   bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
3720   bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
3721   bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
3722   [(set_attr "type" "neon_bsl<q>")]
3723 )
3724
3725 ;; We need this form in addition to the above pattern to match the case
3726 ;; when combine tries merging three insns such that the second operand of
3727 ;; the outer XOR matches the second operand of the inner XOR rather than
3728 ;; the first.  The two are equivalent but since recog doesn't try all
3729 ;; permutations of commutative operations, we have to have a separate pattern.
3730
3731 (define_insn "*aarch64_simd_bsl<mode>_alt"
3732   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
3733         (xor:VDQ_I
3734            (and:VDQ_I
3735              (xor:VDQ_I
3736                (match_operand:VDQ_I 3 "register_operand" "w,w,0")
3737                (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
3738               (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
3739           (match_dup:<V_INT_EQUIV> 2)))]
3740   "TARGET_SIMD"
3741   "@
3742   bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
3743   bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
3744   bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
3745   [(set_attr "type" "neon_bsl<q>")]
3746 )
3747
3748 ;; DImode is special, we want to avoid computing operations which are
3749 ;; more naturally computed in general purpose registers in the vector
3750 ;; registers.  If we do that, we need to move all three operands from general
3751 ;; purpose registers to vector registers, then back again.  However, we
3752 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
3753 ;; optimizations based on the component operations of a BSL.
3754 ;;
3755 ;; That means we need a splitter back to the individual operations, if they
3756 ;; would be better calculated on the integer side.
3757
3758 (define_insn_and_split "aarch64_simd_bsldi_internal"
3759   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
3760         (xor:DI
3761            (and:DI
3762              (xor:DI
3763                (match_operand:DI 3 "register_operand" "w,0,w,r")
3764                (match_operand:DI 2 "register_operand" "w,w,0,r"))
3765              (match_operand:DI 1 "register_operand" "0,w,w,r"))
3766           (match_dup:DI 3)
3767         ))]
3768   "TARGET_SIMD"
3769   "@
3770   bsl\\t%0.8b, %2.8b, %3.8b
3771   bit\\t%0.8b, %2.8b, %1.8b
3772   bif\\t%0.8b, %3.8b, %1.8b
3773   #"
3774   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
3775   [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
3776 {
3777   /* Split back to individual operations.  If we're before reload, and
3778      able to create a temporary register, do so.  If we're after reload,
3779      we've got an early-clobber destination register, so use that.
3780      Otherwise, we can't create pseudos and we can't yet guarantee that
3781      operands[0] is safe to write, so FAIL to split.  */
3782
3783   rtx scratch;
3784   if (reload_completed)
3785     scratch = operands[0];
3786   else if (can_create_pseudo_p ())
3787     scratch = gen_reg_rtx (DImode);
3788   else
3789     FAIL;
3790
3791   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
3792   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
3793   emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
3794   DONE;
3795 }
3796   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
3797    (set_attr "length" "4,4,4,12")]
3798 )
3799
3800 (define_insn_and_split "aarch64_simd_bsldi_alt"
3801   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
3802         (xor:DI
3803            (and:DI
3804              (xor:DI
3805                (match_operand:DI 3 "register_operand" "w,w,0,r")
3806                (match_operand:DI 2 "register_operand" "w,0,w,r"))
3807              (match_operand:DI 1 "register_operand" "0,w,w,r"))
3808           (match_dup:DI 2)
3809         ))]
3810   "TARGET_SIMD"
3811   "@
3812   bsl\\t%0.8b, %3.8b, %2.8b
3813   bit\\t%0.8b, %3.8b, %1.8b
3814   bif\\t%0.8b, %2.8b, %1.8b
3815   #"
3816   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
3817   [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
3818 {
3819   /* Split back to individual operations.  If we're before reload, and
3820      able to create a temporary register, do so.  If we're after reload,
3821      we've got an early-clobber destination register, so use that.
3822      Otherwise, we can't create pseudos and we can't yet guarantee that
3823      operands[0] is safe to write, so FAIL to split.  */
3824
3825   rtx scratch;
3826   if (reload_completed)
3827     scratch = operands[0];
3828   else if (can_create_pseudo_p ())
3829     scratch = gen_reg_rtx (DImode);
3830   else
3831     FAIL;
3832
3833   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
3834   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
3835   emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
3836   DONE;
3837 }
3838   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
3839    (set_attr "length" "4,4,4,12")]
3840 )
3841
3842 (define_expand "aarch64_simd_bsl<mode>"
3843   [(match_operand:VALLDIF 0 "register_operand")
3844    (match_operand:<V_INT_EQUIV> 1 "register_operand")
3845    (match_operand:VALLDIF 2 "register_operand")
3846    (match_operand:VALLDIF 3 "register_operand")]
3847  "TARGET_SIMD"
3848 {
3849   /* We can't alias operands together if they have different modes.  */
3850   rtx tmp = operands[0];
3851   if (FLOAT_MODE_P (<MODE>mode))
3852     {
3853       operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
3854       operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
3855       tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
3856     }
3857   operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
3858   emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
3859                                                          operands[1],
3860                                                          operands[2],
3861                                                          operands[3]));
3862   if (tmp != operands[0])
3863     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
3864
3865   DONE;
3866 })
3867
3868 (define_expand "vcond_mask_<mode><v_int_equiv>"
3869   [(match_operand:VALLDI 0 "register_operand")
3870    (match_operand:VALLDI 1 "nonmemory_operand")
3871    (match_operand:VALLDI 2 "nonmemory_operand")
3872    (match_operand:<V_INT_EQUIV> 3 "register_operand")]
3873   "TARGET_SIMD"
3874 {
3875   /* If we have (a = (P) ? -1 : 0);
3876      Then we can simply move the generated mask (result must be int).  */
3877   if (operands[1] == CONSTM1_RTX (<MODE>mode)
3878       && operands[2] == CONST0_RTX (<MODE>mode))
3879     emit_move_insn (operands[0], operands[3]);
3880   /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask.  */
3881   else if (operands[1] == CONST0_RTX (<MODE>mode)
3882            && operands[2] == CONSTM1_RTX (<MODE>mode))
3883     emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
3884   else
3885     {
3886       if (!REG_P (operands[1]))
3887         operands[1] = force_reg (<MODE>mode, operands[1]);
3888       if (!REG_P (operands[2]))
3889         operands[2] = force_reg (<MODE>mode, operands[2]);
3890       emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
3891                                              operands[1], operands[2]));
3892     }
3893
3894   DONE;
3895 })
3896
3897 ;; Patterns comparing two vectors to produce a mask.
3898
3899 (define_expand "vec_cmp<mode><mode>"
3900   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3901           (match_operator 1 "comparison_operator"
3902             [(match_operand:VSDQ_I_DI 2 "register_operand")
3903              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
3904   "TARGET_SIMD"
3905 {
3906   rtx mask = operands[0];
3907   enum rtx_code code = GET_CODE (operands[1]);
3908
3909   switch (code)
3910     {
3911     case NE:
3912     case LE:
3913     case LT:
3914     case GE:
3915     case GT:
3916     case EQ:
3917       if (operands[3] == CONST0_RTX (<MODE>mode))
3918         break;
3919
3920       /* Fall through.  */
3921     default:
3922       if (!REG_P (operands[3]))
3923         operands[3] = force_reg (<MODE>mode, operands[3]);
3924
3925       break;
3926     }
3927
3928   switch (code)
3929     {
3930     case LT:
3931       emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
3932       break;
3933
3934     case GE:
3935       emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
3936       break;
3937
3938     case LE:
3939       emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
3940       break;
3941
3942     case GT:
3943       emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
3944       break;
3945
3946     case LTU:
3947       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
3948       break;
3949
3950     case GEU:
3951       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
3952       break;
3953
3954     case LEU:
3955       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
3956       break;
3957
3958     case GTU:
3959       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
3960       break;
3961
3962     case NE:
3963       /* Handle NE as !EQ.  */
3964       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
3965       emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
3966       break;
3967
3968     case EQ:
3969       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
3970       break;
3971
3972     default:
3973       gcc_unreachable ();
3974     }
3975
3976   DONE;
3977 })
3978
3979 (define_expand "vec_cmp<mode><v_int_equiv>"
3980   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
3981         (match_operator 1 "comparison_operator"
3982             [(match_operand:VDQF 2 "register_operand")
3983              (match_operand:VDQF 3 "nonmemory_operand")]))]
3984   "TARGET_SIMD"
3985 {
3986   int use_zero_form = 0;
3987   enum rtx_code code = GET_CODE (operands[1]);
3988   rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
3989
3990   rtx (*comparison) (rtx, rtx, rtx) = NULL;
3991
3992   switch (code)
3993     {
3994     case LE:
3995     case LT:
3996     case GE:
3997     case GT:
3998     case EQ:
3999       if (operands[3] == CONST0_RTX (<MODE>mode))
4000         {
4001           use_zero_form = 1;
4002           break;
4003         }
4004       /* Fall through.  */
4005     default:
4006       if (!REG_P (operands[3]))
4007         operands[3] = force_reg (<MODE>mode, operands[3]);
4008
4009       break;
4010     }
4011
4012   switch (code)
4013     {
4014     case LT:
4015       if (use_zero_form)
4016         {
4017           comparison = gen_aarch64_cmlt<mode>;
4018           break;
4019         }
4020       /* Fall through.  */
4021     case UNLT:
4022       std::swap (operands[2], operands[3]);
4023       /* Fall through.  */
4024     case UNGT:
4025     case GT:
4026       comparison = gen_aarch64_cmgt<mode>;
4027       break;
4028     case LE:
4029       if (use_zero_form)
4030         {
4031           comparison = gen_aarch64_cmle<mode>;
4032           break;
4033         }
4034       /* Fall through.  */
4035     case UNLE:
4036       std::swap (operands[2], operands[3]);
4037       /* Fall through.  */
4038     case UNGE:
4039     case GE:
4040       comparison = gen_aarch64_cmge<mode>;
4041       break;
4042     case NE:
4043     case EQ:
4044       comparison = gen_aarch64_cmeq<mode>;
4045       break;
4046     case UNEQ:
4047     case ORDERED:
4048     case UNORDERED:
4049     case LTGT:
4050       break;
4051     default:
4052       gcc_unreachable ();
4053     }
4054
4055   switch (code)
4056     {
4057     case UNGE:
4058     case UNGT:
4059     case UNLE:
4060     case UNLT:
4061       {
4062         /* All of the above must not raise any FP exceptions.  Thus we first
4063            check each operand for NaNs and force any elements containing NaN to
4064            zero before using them in the compare.
4065            Example: UN<cc> (a, b) -> UNORDERED (a, b) |
4066                                      (cm<cc> (isnan (a) ? 0.0 : a,
4067                                               isnan (b) ? 0.0 : b))
4068            We use the following transformations for doing the comparisions:
4069            a UNGE b -> a GE b
4070            a UNGT b -> a GT b
4071            a UNLE b -> b GE a
4072            a UNLT b -> b GT a.  */
4073
4074         rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
4075         rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
4076         rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
4077         emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
4078         emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
4079         emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
4080         emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
4081                                           lowpart_subreg (<V_INT_EQUIV>mode,
4082                                                           operands[2],
4083                                                           <MODE>mode)));
4084         emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
4085                                           lowpart_subreg (<V_INT_EQUIV>mode,
4086                                                           operands[3],
4087                                                           <MODE>mode)));
4088         gcc_assert (comparison != NULL);
4089         emit_insn (comparison (operands[0],
4090                                lowpart_subreg (<MODE>mode,
4091                                                tmp0, <V_INT_EQUIV>mode),
4092                                lowpart_subreg (<MODE>mode,
4093                                                tmp1, <V_INT_EQUIV>mode)));
4094         emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
4095       }
4096       break;
4097
4098     case LT:
4099     case LE:
4100     case GT:
4101     case GE:
4102     case EQ:
4103     case NE:
4104       /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
4105          As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
4106          a GE b -> a GE b
4107          a GT b -> a GT b
4108          a LE b -> b GE a
4109          a LT b -> b GT a
4110          a EQ b -> a EQ b
4111          a NE b -> ~(a EQ b)  */
4112       gcc_assert (comparison != NULL);
4113       emit_insn (comparison (operands[0], operands[2], operands[3]));
4114       if (code == NE)
4115         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
4116       break;
4117
4118     case LTGT:
4119       /* LTGT is not guranteed to not generate a FP exception.  So let's
4120          go the faster way : ((a > b) || (b > a)).  */
4121       emit_insn (gen_aarch64_cmgt<mode> (operands[0],
4122                                          operands[2], operands[3]));
4123       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
4124       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
4125       break;
4126
4127     case ORDERED:
4128     case UNORDERED:
4129     case UNEQ:
4130       /* cmeq (a, a) & cmeq (b, b).  */
4131       emit_insn (gen_aarch64_cmeq<mode> (operands[0],
4132                                          operands[2], operands[2]));
4133       emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
4134       emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
4135
4136       if (code == UNORDERED)
4137         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
4138       else if (code == UNEQ)
4139         {
4140           emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
4141           emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
4142         }
4143       break;
4144
4145     default:
4146       gcc_unreachable ();
4147     }
4148
4149   DONE;
4150 })
4151
4152 (define_expand "vec_cmpu<mode><mode>"
4153   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
4154           (match_operator 1 "comparison_operator"
4155             [(match_operand:VSDQ_I_DI 2 "register_operand")
4156              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
4157   "TARGET_SIMD"
4158 {
4159   emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
4160                                       operands[2], operands[3]));
4161   DONE;
4162 })
4163
4164 (define_expand "vcond<mode><mode>"
4165   [(set (match_operand:VALLDI 0 "register_operand")
4166         (if_then_else:VALLDI
4167           (match_operator 3 "comparison_operator"
4168             [(match_operand:VALLDI 4 "register_operand")
4169              (match_operand:VALLDI 5 "nonmemory_operand")])
4170           (match_operand:VALLDI 1 "nonmemory_operand")
4171           (match_operand:VALLDI 2 "nonmemory_operand")))]
4172   "TARGET_SIMD"
4173 {
4174   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
4175   enum rtx_code code = GET_CODE (operands[3]);
4176
4177   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4178      it as well as switch operands 1/2 in order to avoid the additional
4179      NOT instruction.  */
4180   if (code == NE)
4181     {
4182       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4183                                     operands[4], operands[5]);
4184       std::swap (operands[1], operands[2]);
4185     }
4186   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
4187                                              operands[4], operands[5]));
4188   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
4189                                                  operands[2], mask));
4190
4191   DONE;
4192 })
4193
4194 (define_expand "vcond<v_cmp_mixed><mode>"
4195   [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
4196         (if_then_else:<V_cmp_mixed>
4197           (match_operator 3 "comparison_operator"
4198             [(match_operand:VDQF_COND 4 "register_operand")
4199              (match_operand:VDQF_COND 5 "nonmemory_operand")])
4200           (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
4201           (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
4202   "TARGET_SIMD"
4203 {
4204   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
4205   enum rtx_code code = GET_CODE (operands[3]);
4206
4207   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4208      it as well as switch operands 1/2 in order to avoid the additional
4209      NOT instruction.  */
4210   if (code == NE)
4211     {
4212       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4213                                     operands[4], operands[5]);
4214       std::swap (operands[1], operands[2]);
4215     }
4216   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
4217                                              operands[4], operands[5]));
4218   emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
4219                                                 operands[0], operands[1],
4220                                                 operands[2], mask));
4221
4222   DONE;
4223 })
4224
4225 (define_expand "vcondu<mode><mode>"
4226   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
4227         (if_then_else:VSDQ_I_DI
4228           (match_operator 3 "comparison_operator"
4229             [(match_operand:VSDQ_I_DI 4 "register_operand")
4230              (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
4231           (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
4232           (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
4233   "TARGET_SIMD"
4234 {
4235   rtx mask = gen_reg_rtx (<MODE>mode);
4236   enum rtx_code code = GET_CODE (operands[3]);
4237
4238   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4239      it as well as switch operands 1/2 in order to avoid the additional
4240      NOT instruction.  */
4241   if (code == NE)
4242     {
4243       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4244                                     operands[4], operands[5]);
4245       std::swap (operands[1], operands[2]);
4246     }
4247   emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
4248                                       operands[4], operands[5]));
4249   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
4250                                                  operands[2], mask));
4251   DONE;
4252 })
4253
4254 (define_expand "vcondu<mode><v_cmp_mixed>"
4255   [(set (match_operand:VDQF 0 "register_operand")
4256         (if_then_else:VDQF
4257           (match_operator 3 "comparison_operator"
4258             [(match_operand:<V_cmp_mixed> 4 "register_operand")
4259              (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
4260           (match_operand:VDQF 1 "nonmemory_operand")
4261           (match_operand:VDQF 2 "nonmemory_operand")))]
4262   "TARGET_SIMD"
4263 {
4264   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
4265   enum rtx_code code = GET_CODE (operands[3]);
4266
4267   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4268      it as well as switch operands 1/2 in order to avoid the additional
4269      NOT instruction.  */
4270   if (code == NE)
4271     {
4272       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4273                                     operands[4], operands[5]);
4274       std::swap (operands[1], operands[2]);
4275     }
4276   emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
4277                                                   mask, operands[3],
4278                                                   operands[4], operands[5]));
4279   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
4280                                                  operands[2], mask));
4281   DONE;
4282 })
4283
4284 ;; Patterns for AArch64 SIMD Intrinsics.
4285
4286 ;; Lane extraction with sign extension to general purpose register.
4287 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
4288   [(set (match_operand:GPI 0 "register_operand" "=r")
4289         (sign_extend:GPI
4290           (vec_select:<VDQQH:VEL>
4291             (match_operand:VDQQH 1 "register_operand" "w")
4292             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
4293   "TARGET_SIMD"
4294   {
4295     operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
4296                                            INTVAL (operands[2]));
4297     return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
4298   }
4299   [(set_attr "type" "neon_to_gp<VDQQH:q>")]
4300 )
4301
4302 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
4303   [(set (match_operand:GPI 0 "register_operand" "=r")
4304         (zero_extend:GPI
4305           (vec_select:<VDQQH:VEL>
4306             (match_operand:VDQQH 1 "register_operand" "w")
4307             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
4308   "TARGET_SIMD"
4309   {
4310     operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
4311                                            INTVAL (operands[2]));
4312     return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
4313   }
4314   [(set_attr "type" "neon_to_gp<VDQQH:q>")]
4315 )
4316
4317 ;; Lane extraction of a value, neither sign nor zero extension
4318 ;; is guaranteed so upper bits should be considered undefined.
4319 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
4320 ;; Extracting lane zero is split into a simple move when it is between SIMD
4321 ;; registers or a store.
4322 (define_insn_and_split "aarch64_get_lane<mode>"
4323   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
4324         (vec_select:<VEL>
4325           (match_operand:VALL_F16 1 "register_operand" "w, w, w")
4326           (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
4327   "TARGET_SIMD"
4328   {
4329     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4330     switch (which_alternative)
4331       {
4332         case 0:
4333           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
4334         case 1:
4335           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
4336         case 2:
4337           return "st1\\t{%1.<Vetype>}[%2], %0";
4338         default:
4339           gcc_unreachable ();
4340       }
4341   }
4342  "&& reload_completed
4343   && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
4344  [(set (match_dup 0) (match_dup 1))]
4345  {
4346    operands[1] = aarch64_replace_reg_mode (operands[1], <VEL>mode);
4347  }
4348   [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
4349 )
4350
4351 (define_insn "load_pair_lanes<mode>"
4352   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
4353         (vec_concat:<VDBL>
4354            (match_operand:VDC 1 "memory_operand" "Utq")
4355            (match_operand:VDC 2 "memory_operand" "m")))]
4356   "TARGET_SIMD && !STRICT_ALIGNMENT
4357    && rtx_equal_p (XEXP (operands[2], 0),
4358                    plus_constant (Pmode,
4359                                   XEXP (operands[1], 0),
4360                                   GET_MODE_SIZE (<MODE>mode)))"
4361   "ldr\\t%q0, %1"
4362   [(set_attr "type" "neon_load1_1reg_q")]
4363 )
4364
4365 (define_insn "store_pair_lanes<mode>"
4366   [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
4367         (vec_concat:<VDBL>
4368            (match_operand:VDC 1 "register_operand" "w, r")
4369            (match_operand:VDC 2 "register_operand" "w, r")))]
4370   "TARGET_SIMD"
4371   "@
4372    stp\\t%d1, %d2, %y0
4373    stp\\t%x1, %x2, %y0"
4374   [(set_attr "type" "neon_stp, store_16")]
4375 )
4376
4377 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
4378 ;; dest vector.
4379
4380 (define_insn "@aarch64_combinez<mode>"
4381   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
4382         (vec_concat:<VDBL>
4383           (match_operand:VDC 1 "general_operand" "w,?r,m")
4384           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
4385   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4386   "@
4387    mov\\t%0.8b, %1.8b
4388    fmov\t%d0, %1
4389    ldr\\t%d0, %1"
4390   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
4391    (set_attr "arch" "simd,fp,simd")]
4392 )
4393
4394 (define_insn "@aarch64_combinez_be<mode>"
4395   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
4396         (vec_concat:<VDBL>
4397           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
4398           (match_operand:VDC 1 "general_operand" "w,?r,m")))]
4399   "TARGET_SIMD && BYTES_BIG_ENDIAN"
4400   "@
4401    mov\\t%0.8b, %1.8b
4402    fmov\t%d0, %1
4403    ldr\\t%d0, %1"
4404   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
4405    (set_attr "arch" "simd,fp,simd")]
4406 )
4407
4408 (define_expand "aarch64_combine<mode>"
4409   [(match_operand:<VDBL> 0 "register_operand")
4410    (match_operand:VDC 1 "register_operand")
4411    (match_operand:VDC 2 "aarch64_simd_reg_or_zero")]
4412   "TARGET_SIMD"
4413 {
4414   if (operands[2] == CONST0_RTX (<MODE>mode))
4415     {
4416       if (BYTES_BIG_ENDIAN)
4417         emit_insn (gen_aarch64_combinez_be<mode> (operands[0], operands[1],
4418                                                   operands[2]));
4419       else
4420         emit_insn (gen_aarch64_combinez<mode> (operands[0], operands[1],
4421                                                operands[2]));
4422     }
4423   else
4424     aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
4425   DONE;
4426 }
4427 )
4428
4429 (define_expand "@aarch64_simd_combine<mode>"
4430   [(match_operand:<VDBL> 0 "register_operand")
4431    (match_operand:VDC 1 "register_operand")
4432    (match_operand:VDC 2 "register_operand")]
4433   "TARGET_SIMD"
4434   {
4435     emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
4436     emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
4437     DONE;
4438   }
4439 [(set_attr "type" "multiple")]
4440 )
4441
4442 ;; <su><addsub>l<q>.
4443
4444 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
4445  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4446        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4447                            (match_operand:VQW 1 "register_operand" "w")
4448                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
4449                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4450                            (match_operand:VQW 2 "register_operand" "w")
4451                            (match_dup 3)))))]
4452   "TARGET_SIMD"
4453   "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
4454   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4455 )
4456
4457 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
4458  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4459        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4460                            (match_operand:VQW 1 "register_operand" "w")
4461                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
4462                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4463                            (match_operand:VQW 2 "register_operand" "w")
4464                            (match_dup 3)))))]
4465   "TARGET_SIMD"
4466   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
4467   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4468 )
4469
4470 (define_expand "vec_widen_<su>addl_lo_<mode>"
4471   [(match_operand:<VWIDE> 0 "register_operand")
4472    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4473    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4474   "TARGET_SIMD"
4475 {
4476   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4477   emit_insn (gen_aarch64_<su>addl<mode>_lo_internal (operands[0], operands[1],
4478                                                      operands[2], p));
4479   DONE;
4480 })
4481
4482 (define_expand "vec_widen_<su>addl_hi_<mode>"
4483   [(match_operand:<VWIDE> 0 "register_operand")
4484    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4485    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4486   "TARGET_SIMD"
4487 {
4488   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4489   emit_insn (gen_aarch64_<su>addl<mode>_hi_internal (operands[0], operands[1],
4490                                                      operands[2], p));
4491   DONE;
4492 })
4493
4494 (define_expand "vec_widen_<su>subl_lo_<mode>"
4495   [(match_operand:<VWIDE> 0 "register_operand")
4496    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4497    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4498   "TARGET_SIMD"
4499 {
4500   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4501   emit_insn (gen_aarch64_<su>subl<mode>_lo_internal (operands[0], operands[1],
4502                                                      operands[2], p));
4503   DONE;
4504 })
4505
4506 (define_expand "vec_widen_<su>subl_hi_<mode>"
4507   [(match_operand:<VWIDE> 0 "register_operand")
4508    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4509    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4510   "TARGET_SIMD"
4511 {
4512   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4513   emit_insn (gen_aarch64_<su>subl<mode>_hi_internal (operands[0], operands[1],
4514                                                      operands[2], p));
4515   DONE;
4516 })
4517
4518 (define_expand "aarch64_saddl2<mode>"
4519   [(match_operand:<VWIDE> 0 "register_operand")
4520    (match_operand:VQW 1 "register_operand")
4521    (match_operand:VQW 2 "register_operand")]
4522   "TARGET_SIMD"
4523 {
4524   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4525   emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
4526                                                   operands[2], p));
4527   DONE;
4528 })
4529
4530 (define_expand "aarch64_uaddl2<mode>"
4531   [(match_operand:<VWIDE> 0 "register_operand")
4532    (match_operand:VQW 1 "register_operand")
4533    (match_operand:VQW 2 "register_operand")]
4534   "TARGET_SIMD"
4535 {
4536   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4537   emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
4538                                                   operands[2], p));
4539   DONE;
4540 })
4541
4542 (define_expand "aarch64_ssubl2<mode>"
4543   [(match_operand:<VWIDE> 0 "register_operand")
4544    (match_operand:VQW 1 "register_operand")
4545    (match_operand:VQW 2 "register_operand")]
4546   "TARGET_SIMD"
4547 {
4548   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4549   emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
4550                                                 operands[2], p));
4551   DONE;
4552 })
4553
4554 (define_expand "aarch64_usubl2<mode>"
4555   [(match_operand:<VWIDE> 0 "register_operand")
4556    (match_operand:VQW 1 "register_operand")
4557    (match_operand:VQW 2 "register_operand")]
4558   "TARGET_SIMD"
4559 {
4560   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4561   emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
4562                                                 operands[2], p));
4563   DONE;
4564 })
4565
4566 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
4567  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4568        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
4569                            (match_operand:VD_BHSI 1 "register_operand" "w"))
4570                        (ANY_EXTEND:<VWIDE>
4571                            (match_operand:VD_BHSI 2 "register_operand" "w"))))]
4572   "TARGET_SIMD"
4573   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
4574   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4575 )
4576
4577 ;; <su><addsub>w<q>.
4578
4579 (define_expand "widen_ssum<mode>3"
4580   [(set (match_operand:<VDBLW> 0 "register_operand")
4581         (plus:<VDBLW> (sign_extend:<VDBLW>
4582                         (match_operand:VQW 1 "register_operand"))
4583                       (match_operand:<VDBLW> 2 "register_operand")))]
4584   "TARGET_SIMD"
4585   {
4586     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4587     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
4588
4589     emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
4590                                                 operands[1], p));
4591     emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
4592     DONE;
4593   }
4594 )
4595
4596 (define_expand "widen_ssum<mode>3"
4597   [(set (match_operand:<VWIDE> 0 "register_operand")
4598         (plus:<VWIDE> (sign_extend:<VWIDE>
4599                         (match_operand:VD_BHSI 1 "register_operand"))
4600                       (match_operand:<VWIDE> 2 "register_operand")))]
4601   "TARGET_SIMD"
4602 {
4603   emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
4604   DONE;
4605 })
4606
4607 (define_expand "widen_usum<mode>3"
4608   [(set (match_operand:<VDBLW> 0 "register_operand")
4609         (plus:<VDBLW> (zero_extend:<VDBLW>
4610                         (match_operand:VQW 1 "register_operand"))
4611                       (match_operand:<VDBLW> 2 "register_operand")))]
4612   "TARGET_SIMD"
4613   {
4614     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4615     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
4616
4617     emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
4618                                                  operands[1], p));
4619     emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
4620     DONE;
4621   }
4622 )
4623
4624 (define_expand "widen_usum<mode>3"
4625   [(set (match_operand:<VWIDE> 0 "register_operand")
4626         (plus:<VWIDE> (zero_extend:<VWIDE>
4627                         (match_operand:VD_BHSI 1 "register_operand"))
4628                       (match_operand:<VWIDE> 2 "register_operand")))]
4629   "TARGET_SIMD"
4630 {
4631   emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
4632   DONE;
4633 })
4634
4635 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
4636   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4637         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4638           (ANY_EXTEND:<VWIDE>
4639             (match_operand:VD_BHSI 2 "register_operand" "w"))))]
4640   "TARGET_SIMD"
4641   "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4642   [(set_attr "type" "neon_sub_widen")]
4643 )
4644
4645 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
4646   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4647         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4648           (ANY_EXTEND:<VWIDE>
4649             (vec_select:<VHALF>
4650               (match_operand:VQW 2 "register_operand" "w")
4651               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
4652   "TARGET_SIMD"
4653   "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
4654   [(set_attr "type" "neon_sub_widen")]
4655 )
4656
4657 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
4658   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4659         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4660           (ANY_EXTEND:<VWIDE>
4661             (vec_select:<VHALF>
4662               (match_operand:VQW 2 "register_operand" "w")
4663               (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
4664   "TARGET_SIMD"
4665   "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4666   [(set_attr "type" "neon_sub_widen")]
4667 )
4668
4669 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
4670   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4671         (plus:<VWIDE>
4672           (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
4673           (match_operand:<VWIDE> 1 "register_operand" "w")))]
4674   "TARGET_SIMD"
4675   "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4676   [(set_attr "type" "neon_add_widen")]
4677 )
4678
4679 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
4680   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4681         (plus:<VWIDE>
4682           (ANY_EXTEND:<VWIDE>
4683             (vec_select:<VHALF>
4684               (match_operand:VQW 2 "register_operand" "w")
4685               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
4686           (match_operand:<VWIDE> 1 "register_operand" "w")))]
4687   "TARGET_SIMD"
4688   "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
4689   [(set_attr "type" "neon_add_widen")]
4690 )
4691
4692 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
4693   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4694         (plus:<VWIDE>
4695           (ANY_EXTEND:<VWIDE>
4696             (vec_select:<VHALF>
4697               (match_operand:VQW 2 "register_operand" "w")
4698               (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
4699           (match_operand:<VWIDE> 1 "register_operand" "w")))]
4700   "TARGET_SIMD"
4701   "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4702   [(set_attr "type" "neon_add_widen")]
4703 )
4704
4705 (define_expand "aarch64_saddw2<mode>"
4706   [(match_operand:<VWIDE> 0 "register_operand")
4707    (match_operand:<VWIDE> 1 "register_operand")
4708    (match_operand:VQW 2 "register_operand")]
4709   "TARGET_SIMD"
4710 {
4711   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4712   emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
4713                                                 operands[2], p));
4714   DONE;
4715 })
4716
4717 (define_expand "aarch64_uaddw2<mode>"
4718   [(match_operand:<VWIDE> 0 "register_operand")
4719    (match_operand:<VWIDE> 1 "register_operand")
4720    (match_operand:VQW 2 "register_operand")]
4721   "TARGET_SIMD"
4722 {
4723   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4724   emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
4725                                                 operands[2], p));
4726   DONE;
4727 })
4728
4729
4730 (define_expand "aarch64_ssubw2<mode>"
4731   [(match_operand:<VWIDE> 0 "register_operand")
4732    (match_operand:<VWIDE> 1 "register_operand")
4733    (match_operand:VQW 2 "register_operand")]
4734   "TARGET_SIMD"
4735 {
4736   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4737   emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
4738                                                 operands[2], p));
4739   DONE;
4740 })
4741
4742 (define_expand "aarch64_usubw2<mode>"
4743   [(match_operand:<VWIDE> 0 "register_operand")
4744    (match_operand:<VWIDE> 1 "register_operand")
4745    (match_operand:VQW 2 "register_operand")]
4746   "TARGET_SIMD"
4747 {
4748   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4749   emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
4750                                                 operands[2], p));
4751   DONE;
4752 })
4753
4754 ;; <su><r>h<addsub>.
4755
4756 (define_expand "<u>avg<mode>3_floor"
4757   [(set (match_operand:VDQ_BHSI 0 "register_operand")
4758         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
4759                           (match_operand:VDQ_BHSI 2 "register_operand")]
4760                          HADD))]
4761   "TARGET_SIMD"
4762 )
4763
4764 (define_expand "<u>avg<mode>3_ceil"
4765   [(set (match_operand:VDQ_BHSI 0 "register_operand")
4766         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
4767                           (match_operand:VDQ_BHSI 2 "register_operand")]
4768                          RHADD))]
4769   "TARGET_SIMD"
4770 )
4771
4772 (define_insn "aarch64_<sur>h<addsub><mode>"
4773   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
4774         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
4775                       (match_operand:VDQ_BHSI 2 "register_operand" "w")]
4776                      HADDSUB))]
4777   "TARGET_SIMD"
4778   "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
4779   [(set_attr "type" "neon_<addsub>_halve<q>")]
4780 )
4781
4782 ;; <r><addsub>hn<q>.
4783
4784 (define_insn "aarch64_<sur><addsub>hn<mode>_insn_le"
4785   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4786         (vec_concat:<VNARROWQ2>
4787           (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
4788                               (match_operand:VQN 2 "register_operand" "w")]
4789                              ADDSUBHN)
4790           (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
4791   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4792   "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
4793   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
4794 )
4795
4796 (define_insn "aarch64_<sur><addsub>hn<mode>_insn_be"
4797   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4798         (vec_concat:<VNARROWQ2>
4799           (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
4800           (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
4801                               (match_operand:VQN 2 "register_operand" "w")]
4802                              ADDSUBHN)))]
4803   "TARGET_SIMD && BYTES_BIG_ENDIAN"
4804   "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
4805   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
4806 )
4807
4808 (define_expand "aarch64_<sur><addsub>hn<mode>"
4809   [(set (match_operand:<VNARROWQ> 0 "register_operand")
4810         (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand")
4811                             (match_operand:VQN 2 "register_operand")]
4812                            ADDSUBHN))]
4813   "TARGET_SIMD"
4814   {
4815     rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
4816     if (BYTES_BIG_ENDIAN)
4817       emit_insn (gen_aarch64_<sur><addsub>hn<mode>_insn_be (tmp, operands[1],
4818                                 operands[2], CONST0_RTX (<VNARROWQ>mode)));
4819     else
4820       emit_insn (gen_aarch64_<sur><addsub>hn<mode>_insn_le (tmp, operands[1],
4821                                 operands[2], CONST0_RTX (<VNARROWQ>mode)));
4822
4823     /* The intrinsic expects a narrow result, so emit a subreg that will get
4824        optimized away as appropriate.  */
4825     emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
4826                                                  <VNARROWQ2>mode));
4827     DONE;
4828   }
4829 )
4830
4831 (define_insn "aarch64_<sur><addsub>hn2<mode>_insn_le"
4832   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4833         (vec_concat:<VNARROWQ2>
4834           (match_operand:<VNARROWQ> 1 "register_operand" "0")
4835           (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
4836                               (match_operand:VQN 3 "register_operand" "w")]
4837                              ADDSUBHN)))]
4838   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4839   "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4840   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
4841 )
4842
4843 (define_insn "aarch64_<sur><addsub>hn2<mode>_insn_be"
4844   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4845         (vec_concat:<VNARROWQ2>
4846           (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
4847                               (match_operand:VQN 3 "register_operand" "w")]
4848                              ADDSUBHN)
4849           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
4850   "TARGET_SIMD && BYTES_BIG_ENDIAN"
4851   "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4852   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
4853 )
4854
4855 (define_expand "aarch64_<sur><addsub>hn2<mode>"
4856   [(match_operand:<VNARROWQ2> 0 "register_operand")
4857    (match_operand:<VNARROWQ> 1 "register_operand")
4858    (unspec [(match_operand:VQN 2 "register_operand")
4859             (match_operand:VQN 3 "register_operand")]
4860            ADDSUBHN)]
4861   "TARGET_SIMD"
4862   {
4863     if (BYTES_BIG_ENDIAN)
4864       emit_insn (gen_aarch64_<sur><addsub>hn2<mode>_insn_be (operands[0],
4865                                 operands[1], operands[2], operands[3]));
4866     else
4867       emit_insn (gen_aarch64_<sur><addsub>hn2<mode>_insn_le (operands[0],
4868                                 operands[1], operands[2], operands[3]));
4869     DONE;
4870   }
4871 )
4872
4873 ;; pmul.
4874
4875 (define_insn "aarch64_pmul<mode>"
4876   [(set (match_operand:VB 0 "register_operand" "=w")
4877         (unspec:VB [(match_operand:VB 1 "register_operand" "w")
4878                     (match_operand:VB 2 "register_operand" "w")]
4879                    UNSPEC_PMUL))]
4880  "TARGET_SIMD"
4881  "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
4882   [(set_attr "type" "neon_mul_<Vetype><q>")]
4883 )
4884
4885 (define_insn "aarch64_pmullv8qi"
4886   [(set (match_operand:V8HI 0 "register_operand" "=w")
4887         (unspec:V8HI [(match_operand:V8QI 1 "register_operand" "w")
4888                       (match_operand:V8QI 2 "register_operand" "w")]
4889                      UNSPEC_PMULL))]
4890  "TARGET_SIMD"
4891  "pmull\\t%0.8h, %1.8b, %2.8b"
4892   [(set_attr "type" "neon_mul_b_long")]
4893 )
4894
4895 (define_insn "aarch64_pmull_hiv16qi_insn"
4896   [(set (match_operand:V8HI 0 "register_operand" "=w")
4897         (unspec:V8HI
4898           [(vec_select:V8QI
4899              (match_operand:V16QI 1 "register_operand" "w")
4900              (match_operand:V16QI 3 "vect_par_cnst_hi_half" ""))
4901            (vec_select:V8QI
4902              (match_operand:V16QI 2 "register_operand" "w")
4903              (match_dup 3))]
4904           UNSPEC_PMULL))]
4905  "TARGET_SIMD"
4906  "pmull2\\t%0.8h, %1.16b, %2.16b"
4907   [(set_attr "type" "neon_mul_b_long")]
4908 )
4909
4910 (define_expand "aarch64_pmull_hiv16qi"
4911   [(match_operand:V8HI 0 "register_operand")
4912    (match_operand:V16QI 1 "register_operand")
4913    (match_operand:V16QI 2 "register_operand")]
4914  "TARGET_SIMD"
4915  {
4916    rtx p = aarch64_simd_vect_par_cnst_half (V16QImode, 16, true);
4917    emit_insn (gen_aarch64_pmull_hiv16qi_insn (operands[0], operands[1],
4918                                               operands[2], p));
4919    DONE;
4920  }
4921 )
4922
4923 ;; fmulx.
4924
4925 (define_insn "aarch64_fmulx<mode>"
4926   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
4927         (unspec:VHSDF_HSDF
4928           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
4929            (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
4930            UNSPEC_FMULX))]
4931  "TARGET_SIMD"
4932  "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4933  [(set_attr "type" "neon_fp_mul_<stype>")]
4934 )
4935
4936 ;; vmulxq_lane_f32, and vmulx_laneq_f32
4937
4938 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
4939   [(set (match_operand:VDQSF 0 "register_operand" "=w")
4940         (unspec:VDQSF
4941          [(match_operand:VDQSF 1 "register_operand" "w")
4942           (vec_duplicate:VDQSF
4943            (vec_select:<VEL>
4944             (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
4945             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
4946          UNSPEC_FMULX))]
4947   "TARGET_SIMD"
4948   {
4949     operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
4950     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4951   }
4952   [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
4953 )
4954
4955 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
4956
4957 (define_insn "*aarch64_mulx_elt<mode>"
4958   [(set (match_operand:VDQF 0 "register_operand" "=w")
4959         (unspec:VDQF
4960          [(match_operand:VDQF 1 "register_operand" "w")
4961           (vec_duplicate:VDQF
4962            (vec_select:<VEL>
4963             (match_operand:VDQF 2 "register_operand" "w")
4964             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
4965          UNSPEC_FMULX))]
4966   "TARGET_SIMD"
4967   {
4968     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4969     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4970   }
4971   [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
4972 )
4973
4974 ;; vmulxq_lane
4975
4976 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
4977   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4978         (unspec:VHSDF
4979          [(match_operand:VHSDF 1 "register_operand" "w")
4980           (vec_duplicate:VHSDF
4981             (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
4982          UNSPEC_FMULX))]
4983   "TARGET_SIMD"
4984   "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
4985   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
4986 )
4987
4988 ;; vmulxs_lane_f32, vmulxs_laneq_f32
4989 ;; vmulxd_lane_f64 ==  vmulx_lane_f64
4990 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
4991
4992 (define_insn "*aarch64_vgetfmulx<mode>"
4993   [(set (match_operand:<VEL> 0 "register_operand" "=w")
4994         (unspec:<VEL>
4995          [(match_operand:<VEL> 1 "register_operand" "w")
4996           (vec_select:<VEL>
4997            (match_operand:VDQF 2 "register_operand" "w")
4998             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
4999          UNSPEC_FMULX))]
5000   "TARGET_SIMD"
5001   {
5002     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5003     return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
5004   }
5005   [(set_attr "type" "fmul<Vetype>")]
5006 )
5007 ;; <su>q<addsub>
5008
5009 (define_insn "aarch64_<su_optab>q<addsub><mode>"
5010   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5011         (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
5012                         (match_operand:VSDQ_I 2 "register_operand" "w")))]
5013   "TARGET_SIMD"
5014   "<su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5015   [(set_attr "type" "neon_q<addsub><q>")]
5016 )
5017
5018 ;; suqadd and usqadd
5019
5020 (define_insn "aarch64_<sur>qadd<mode>"
5021   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5022         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
5023                         (match_operand:VSDQ_I 2 "register_operand" "w")]
5024                        USSUQADD))]
5025   "TARGET_SIMD"
5026   "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
5027   [(set_attr "type" "neon_qadd<q>")]
5028 )
5029
5030 ;; sqmovn and uqmovn
5031
5032 (define_insn "aarch64_<su>qmovn<mode>"
5033   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5034         (SAT_TRUNC:<VNARROWQ>
5035           (match_operand:SD_HSDI 1 "register_operand" "w")))]
5036   "TARGET_SIMD"
5037   "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5038   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5039 )
5040
5041 (define_insn "aarch64_<su>qmovn<mode>_insn_le"
5042   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5043         (vec_concat:<VNARROWQ2>
5044           (SAT_TRUNC:<VNARROWQ>
5045             (match_operand:VQN 1 "register_operand" "w"))
5046           (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")))]
5047   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5048   "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5049   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5050 )
5051
5052 (define_insn "aarch64_<su>qmovn<mode>_insn_be"
5053   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5054         (vec_concat:<VNARROWQ2>
5055           (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")
5056           (SAT_TRUNC:<VNARROWQ>
5057             (match_operand:VQN 1 "register_operand" "w"))))]
5058   "TARGET_SIMD && BYTES_BIG_ENDIAN"
5059   "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5060   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5061 )
5062
5063 (define_expand "aarch64_<su>qmovn<mode>"
5064   [(set (match_operand:<VNARROWQ> 0 "register_operand")
5065         (SAT_TRUNC:<VNARROWQ>
5066           (match_operand:VQN 1 "register_operand")))]
5067   "TARGET_SIMD"
5068   {
5069     rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
5070     if (BYTES_BIG_ENDIAN)
5071       emit_insn (gen_aarch64_<su>qmovn<mode>_insn_be (tmp, operands[1],
5072                                 CONST0_RTX (<VNARROWQ>mode)));
5073     else
5074       emit_insn (gen_aarch64_<su>qmovn<mode>_insn_le (tmp, operands[1],
5075                                 CONST0_RTX (<VNARROWQ>mode)));
5076
5077     /* The intrinsic expects a narrow result, so emit a subreg that will get
5078        optimized away as appropriate.  */
5079     emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
5080                                                  <VNARROWQ2>mode));
5081     DONE;
5082   }
5083 )
5084
5085 (define_insn "aarch64_<su>qxtn2<mode>_le"
5086   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5087         (vec_concat:<VNARROWQ2>
5088           (match_operand:<VNARROWQ> 1 "register_operand" "0")
5089           (SAT_TRUNC:<VNARROWQ>
5090             (match_operand:VQN 2 "register_operand" "w"))))]
5091   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5092   "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
5093    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5094 )
5095
5096 (define_insn "aarch64_<su>qxtn2<mode>_be"
5097   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5098         (vec_concat:<VNARROWQ2>
5099           (SAT_TRUNC:<VNARROWQ>
5100             (match_operand:VQN 2 "register_operand" "w"))
5101           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
5102   "TARGET_SIMD && BYTES_BIG_ENDIAN"
5103   "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
5104    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5105 )
5106
5107 (define_expand "aarch64_<su>qxtn2<mode>"
5108   [(match_operand:<VNARROWQ2> 0 "register_operand")
5109    (match_operand:<VNARROWQ> 1 "register_operand")
5110    (SAT_TRUNC:<VNARROWQ>
5111      (match_operand:VQN 2 "register_operand"))]
5112   "TARGET_SIMD"
5113   {
5114     if (BYTES_BIG_ENDIAN)
5115       emit_insn (gen_aarch64_<su>qxtn2<mode>_be (operands[0], operands[1],
5116                                                  operands[2]));
5117     else
5118       emit_insn (gen_aarch64_<su>qxtn2<mode>_le (operands[0], operands[1],
5119                                                  operands[2]));
5120     DONE;
5121   }
5122 )
5123
5124 ;; sqmovun
5125
5126 (define_insn "aarch64_sqmovun<mode>"
5127   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5128         (unspec:<VNARROWQ> [(match_operand:SD_HSDI 1 "register_operand" "w")]
5129                            UNSPEC_SQXTUN))]
5130    "TARGET_SIMD"
5131    "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5132    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5133 )
5134
5135 (define_insn "aarch64_sqmovun<mode>_insn_le"
5136   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5137         (vec_concat:<VNARROWQ2>
5138           (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")]
5139                              UNSPEC_SQXTUN)
5140           (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")))]
5141   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5142   "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5143   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5144 )
5145
5146 (define_insn "aarch64_sqmovun<mode>_insn_be"
5147   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5148         (vec_concat:<VNARROWQ2>
5149           (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")
5150           (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")]
5151                              UNSPEC_SQXTUN)))]
5152   "TARGET_SIMD && BYTES_BIG_ENDIAN"
5153   "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5154   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5155 )
5156
5157 (define_expand "aarch64_sqmovun<mode>"
5158   [(set (match_operand:<VNARROWQ> 0 "register_operand")
5159         (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand")]
5160                            UNSPEC_SQXTUN))]
5161   "TARGET_SIMD"
5162   {
5163     rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
5164     if (BYTES_BIG_ENDIAN)
5165       emit_insn (gen_aarch64_sqmovun<mode>_insn_be (tmp, operands[1],
5166                                 CONST0_RTX (<VNARROWQ>mode)));
5167     else
5168       emit_insn (gen_aarch64_sqmovun<mode>_insn_le (tmp, operands[1],
5169                                 CONST0_RTX (<VNARROWQ>mode)));
5170
5171     /* The intrinsic expects a narrow result, so emit a subreg that will get
5172        optimized away as appropriate.  */
5173     emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
5174                                                  <VNARROWQ2>mode));
5175     DONE;
5176   }
5177 )
5178
5179 (define_insn "aarch64_sqxtun2<mode>_le"
5180   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5181         (vec_concat:<VNARROWQ2>
5182           (match_operand:<VNARROWQ> 1 "register_operand" "0")
5183           (unspec:<VNARROWQ>
5184             [(match_operand:VQN 2 "register_operand" "w")] UNSPEC_SQXTUN)))]
5185   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5186   "sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
5187    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5188 )
5189
5190 (define_insn "aarch64_sqxtun2<mode>_be"
5191   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5192         (vec_concat:<VNARROWQ2>
5193           (unspec:<VNARROWQ>
5194             [(match_operand:VQN 2 "register_operand" "w")] UNSPEC_SQXTUN)
5195           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
5196   "TARGET_SIMD && BYTES_BIG_ENDIAN"
5197   "sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
5198    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5199 )
5200
5201 (define_expand "aarch64_sqxtun2<mode>"
5202   [(match_operand:<VNARROWQ2> 0 "register_operand")
5203    (match_operand:<VNARROWQ> 1 "register_operand")
5204    (unspec:<VNARROWQ>
5205      [(match_operand:VQN 2 "register_operand")] UNSPEC_SQXTUN)]
5206   "TARGET_SIMD"
5207   {
5208     if (BYTES_BIG_ENDIAN)
5209       emit_insn (gen_aarch64_sqxtun2<mode>_be (operands[0], operands[1],
5210                                               operands[2]));
5211     else
5212       emit_insn (gen_aarch64_sqxtun2<mode>_le (operands[0], operands[1],
5213                                                operands[2]));
5214     DONE;
5215   }
5216 )
5217
5218 ;; <su>q<absneg>
5219
5220 (define_insn "aarch64_s<optab><mode>"
5221   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5222         (UNQOPS:VSDQ_I
5223           (match_operand:VSDQ_I 1 "register_operand" "w")))]
5224   "TARGET_SIMD"
5225   "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
5226   [(set_attr "type" "neon_<optab><q>")]
5227 )
5228
5229 ;; sq<r>dmulh.
5230
5231 (define_insn "aarch64_sq<r>dmulh<mode>"
5232   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
5233         (unspec:VSDQ_HSI
5234           [(match_operand:VSDQ_HSI 1 "register_operand" "w")
5235            (match_operand:VSDQ_HSI 2 "register_operand" "w")]
5236          VQDMULH))]
5237   "TARGET_SIMD"
5238   "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5239   [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
5240 )
5241
5242 (define_insn "aarch64_sq<r>dmulh_n<mode>"
5243   [(set (match_operand:VDQHS 0 "register_operand" "=w")
5244         (unspec:VDQHS
5245           [(match_operand:VDQHS 1 "register_operand" "w")
5246            (vec_duplicate:VDQHS
5247              (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
5248          VQDMULH))]
5249   "TARGET_SIMD"
5250   "sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]"
5251   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5252 )
5253
5254 ;; sq<r>dmulh_lane
5255
5256 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
5257   [(set (match_operand:VDQHS 0 "register_operand" "=w")
5258         (unspec:VDQHS
5259           [(match_operand:VDQHS 1 "register_operand" "w")
5260            (vec_select:<VEL>
5261              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
5262              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5263          VQDMULH))]
5264   "TARGET_SIMD"
5265   "*
5266    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
5267    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
5268   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5269 )
5270
5271 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
5272   [(set (match_operand:VDQHS 0 "register_operand" "=w")
5273         (unspec:VDQHS
5274           [(match_operand:VDQHS 1 "register_operand" "w")
5275            (vec_select:<VEL>
5276              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
5277              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5278          VQDMULH))]
5279   "TARGET_SIMD"
5280   "*
5281    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
5282    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
5283   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5284 )
5285
5286 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
5287   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5288         (unspec:SD_HSI
5289           [(match_operand:SD_HSI 1 "register_operand" "w")
5290            (vec_select:<VEL>
5291              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
5292              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5293          VQDMULH))]
5294   "TARGET_SIMD"
5295   "*
5296    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
5297    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
5298   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5299 )
5300
5301 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
5302   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5303         (unspec:SD_HSI
5304           [(match_operand:SD_HSI 1 "register_operand" "w")
5305            (vec_select:<VEL>
5306              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
5307              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5308          VQDMULH))]
5309   "TARGET_SIMD"
5310   "*
5311    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
5312    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
5313   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5314 )
5315
5316 ;; sqrdml[as]h.
5317
5318 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
5319   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
5320         (unspec:VSDQ_HSI
5321           [(match_operand:VSDQ_HSI 1 "register_operand" "0")
5322            (match_operand:VSDQ_HSI 2 "register_operand" "w")
5323            (match_operand:VSDQ_HSI 3 "register_operand" "w")]
5324           SQRDMLH_AS))]
5325    "TARGET_SIMD_RDMA"
5326    "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5327    [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5328 )
5329
5330 ;; sqrdml[as]h_lane.
5331
5332 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
5333   [(set (match_operand:VDQHS 0 "register_operand" "=w")
5334         (unspec:VDQHS
5335           [(match_operand:VDQHS 1 "register_operand" "0")
5336            (match_operand:VDQHS 2 "register_operand" "w")
5337            (vec_select:<VEL>
5338              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5339              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5340           SQRDMLH_AS))]
5341    "TARGET_SIMD_RDMA"
5342    {
5343      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5344      return
5345       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
5346    }
5347    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5348 )
5349
5350 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
5351   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5352         (unspec:SD_HSI
5353           [(match_operand:SD_HSI 1 "register_operand" "0")
5354            (match_operand:SD_HSI 2 "register_operand" "w")
5355            (vec_select:<VEL>
5356              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5357              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5358           SQRDMLH_AS))]
5359    "TARGET_SIMD_RDMA"
5360    {
5361      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5362      return
5363       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
5364    }
5365    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5366 )
5367
5368 ;; sqrdml[as]h_laneq.
5369
5370 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
5371   [(set (match_operand:VDQHS 0 "register_operand" "=w")
5372         (unspec:VDQHS
5373           [(match_operand:VDQHS 1 "register_operand" "0")
5374            (match_operand:VDQHS 2 "register_operand" "w")
5375            (vec_select:<VEL>
5376              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5377              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5378           SQRDMLH_AS))]
5379    "TARGET_SIMD_RDMA"
5380    {
5381      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5382      return
5383       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
5384    }
5385    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5386 )
5387
5388 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
5389   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5390         (unspec:SD_HSI
5391           [(match_operand:SD_HSI 1 "register_operand" "0")
5392            (match_operand:SD_HSI 2 "register_operand" "w")
5393            (vec_select:<VEL>
5394              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5395              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5396           SQRDMLH_AS))]
5397    "TARGET_SIMD_RDMA"
5398    {
5399      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5400      return
5401       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
5402    }
5403    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5404 )
5405
5406 ;; vqdml[sa]l
5407
5408 (define_insn "aarch64_sqdmlal<mode>"
5409   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5410         (ss_plus:<VWIDE>
5411           (ss_ashift:<VWIDE>
5412               (mult:<VWIDE>
5413                 (sign_extend:<VWIDE>
5414                       (match_operand:VSD_HSI 2 "register_operand" "w"))
5415                 (sign_extend:<VWIDE>
5416                       (match_operand:VSD_HSI 3 "register_operand" "w")))
5417               (const_int 1))
5418           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5419   "TARGET_SIMD"
5420   "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5421   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5422 )
5423
5424 (define_insn "aarch64_sqdmlsl<mode>"
5425   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5426         (ss_minus:<VWIDE>
5427           (match_operand:<VWIDE> 1 "register_operand" "0")
5428           (ss_ashift:<VWIDE>
5429               (mult:<VWIDE>
5430                 (sign_extend:<VWIDE>
5431                       (match_operand:VSD_HSI 2 "register_operand" "w"))
5432                 (sign_extend:<VWIDE>
5433                       (match_operand:VSD_HSI 3 "register_operand" "w")))
5434               (const_int 1))))]
5435   "TARGET_SIMD"
5436   "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5437   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5438 )
5439
5440 ;; vqdml[sa]l_lane
5441
5442 (define_insn "aarch64_sqdmlal_lane<mode>"
5443   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5444         (ss_plus:<VWIDE>
5445           (ss_ashift:<VWIDE>
5446             (mult:<VWIDE>
5447               (sign_extend:<VWIDE>
5448                 (match_operand:VD_HSI 2 "register_operand" "w"))
5449               (vec_duplicate:<VWIDE>
5450                 (sign_extend:<VWIDE_S>
5451                   (vec_select:<VEL>
5452                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5453                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5454               ))
5455             (const_int 1))
5456           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5457   "TARGET_SIMD"
5458   {
5459     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5460     return
5461       "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5462   }
5463   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5464 )
5465
5466 (define_insn "aarch64_sqdmlsl_lane<mode>"
5467   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5468         (ss_minus:<VWIDE>
5469           (match_operand:<VWIDE> 1 "register_operand" "0")
5470           (ss_ashift:<VWIDE>
5471             (mult:<VWIDE>
5472               (sign_extend:<VWIDE>
5473                 (match_operand:VD_HSI 2 "register_operand" "w"))
5474               (vec_duplicate:<VWIDE>
5475                 (sign_extend:<VWIDE_S>
5476                   (vec_select:<VEL>
5477                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5478                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5479               ))
5480             (const_int 1))))]
5481   "TARGET_SIMD"
5482   {
5483     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5484     return
5485       "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5486   }
5487   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5488 )
5489
5490
5491 (define_insn "aarch64_sqdmlsl_laneq<mode>"
5492   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5493         (ss_minus:<VWIDE>
5494           (match_operand:<VWIDE> 1 "register_operand" "0")
5495           (ss_ashift:<VWIDE>
5496             (mult:<VWIDE>
5497               (sign_extend:<VWIDE>
5498                 (match_operand:VD_HSI 2 "register_operand" "w"))
5499               (vec_duplicate:<VWIDE>
5500                 (sign_extend:<VWIDE_S>
5501                   (vec_select:<VEL>
5502                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5503                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5504               ))
5505             (const_int 1))))]
5506   "TARGET_SIMD"
5507   {
5508     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5509     return
5510       "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5511   }
5512   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5513 )
5514
5515 (define_insn "aarch64_sqdmlal_laneq<mode>"
5516   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5517         (ss_plus:<VWIDE>
5518           (ss_ashift:<VWIDE>
5519             (mult:<VWIDE>
5520               (sign_extend:<VWIDE>
5521                 (match_operand:VD_HSI 2 "register_operand" "w"))
5522               (vec_duplicate:<VWIDE>
5523                 (sign_extend:<VWIDE_S>
5524                   (vec_select:<VEL>
5525                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5526                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5527               ))
5528             (const_int 1))
5529           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5530   "TARGET_SIMD"
5531   {
5532     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5533     return
5534       "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5535   }
5536   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5537 )
5538
5539
5540 (define_insn "aarch64_sqdmlal_lane<mode>"
5541   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5542         (ss_plus:<VWIDE>
5543           (ss_ashift:<VWIDE>
5544             (mult:<VWIDE>
5545               (sign_extend:<VWIDE>
5546                 (match_operand:SD_HSI 2 "register_operand" "w"))
5547               (sign_extend:<VWIDE>
5548                 (vec_select:<VEL>
5549                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5550                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5551               )
5552             (const_int 1))
5553           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5554   "TARGET_SIMD"
5555   {
5556     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5557     return
5558       "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5559   }
5560   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5561 )
5562
5563 (define_insn "aarch64_sqdmlsl_lane<mode>"
5564   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5565         (ss_minus:<VWIDE>
5566           (match_operand:<VWIDE> 1 "register_operand" "0")
5567           (ss_ashift:<VWIDE>
5568             (mult:<VWIDE>
5569               (sign_extend:<VWIDE>
5570                 (match_operand:SD_HSI 2 "register_operand" "w"))
5571               (sign_extend:<VWIDE>
5572                 (vec_select:<VEL>
5573                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5574                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5575               )
5576             (const_int 1))))]
5577   "TARGET_SIMD"
5578   {
5579     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5580     return
5581       "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5582   }
5583   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5584 )
5585
5586
5587 (define_insn "aarch64_sqdmlal_laneq<mode>"
5588   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5589         (ss_plus:<VWIDE>
5590           (ss_ashift:<VWIDE>
5591             (mult:<VWIDE>
5592               (sign_extend:<VWIDE>
5593                 (match_operand:SD_HSI 2 "register_operand" "w"))
5594               (sign_extend:<VWIDE>
5595                 (vec_select:<VEL>
5596                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5597                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5598               )
5599             (const_int 1))
5600           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5601   "TARGET_SIMD"
5602   {
5603     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5604     return
5605       "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5606   }
5607   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5608 )
5609
5610 (define_insn "aarch64_sqdmlsl_laneq<mode>"
5611   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5612         (ss_minus:<VWIDE>
5613           (match_operand:<VWIDE> 1 "register_operand" "0")
5614           (ss_ashift:<VWIDE>
5615             (mult:<VWIDE>
5616               (sign_extend:<VWIDE>
5617                 (match_operand:SD_HSI 2 "register_operand" "w"))
5618               (sign_extend:<VWIDE>
5619                 (vec_select:<VEL>
5620                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5621                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5622               )
5623             (const_int 1))))]
5624   "TARGET_SIMD"
5625   {
5626     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5627     return
5628       "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5629   }
5630   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5631 )
5632
5633 ;; vqdml[sa]l_n
5634
5635 (define_insn "aarch64_sqdmlsl_n<mode>"
5636   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5637         (ss_minus:<VWIDE>
5638           (match_operand:<VWIDE> 1 "register_operand" "0")
5639           (ss_ashift:<VWIDE>
5640               (mult:<VWIDE>
5641                 (sign_extend:<VWIDE>
5642                       (match_operand:VD_HSI 2 "register_operand" "w"))
5643                 (vec_duplicate:<VWIDE>
5644                   (sign_extend:<VWIDE_S>
5645                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
5646               (const_int 1))))]
5647   "TARGET_SIMD"
5648   "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
5649   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5650 )
5651
5652 (define_insn "aarch64_sqdmlal_n<mode>"
5653   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5654         (ss_plus:<VWIDE>
5655           (ss_ashift:<VWIDE>
5656               (mult:<VWIDE>
5657                 (sign_extend:<VWIDE>
5658                       (match_operand:VD_HSI 2 "register_operand" "w"))
5659                 (vec_duplicate:<VWIDE>
5660                   (sign_extend:<VWIDE_S>
5661                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
5662               (const_int 1))
5663           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5664   "TARGET_SIMD"
5665   "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
5666   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5667 )
5668
5669
5670 ;; sqdml[as]l2
5671
5672 (define_insn "aarch64_sqdmlal2<mode>_internal"
5673   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5674         (ss_plus:<VWIDE>
5675          (ss_ashift:<VWIDE>
5676              (mult:<VWIDE>
5677                (sign_extend:<VWIDE>
5678                  (vec_select:<VHALF>
5679                      (match_operand:VQ_HSI 2 "register_operand" "w")
5680                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
5681                (sign_extend:<VWIDE>
5682                  (vec_select:<VHALF>
5683                      (match_operand:VQ_HSI 3 "register_operand" "w")
5684                      (match_dup 4))))
5685              (const_int 1))
5686           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5687   "TARGET_SIMD"
5688   "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5689   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5690 )
5691
5692 (define_insn "aarch64_sqdmlsl2<mode>_internal"
5693   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5694         (ss_minus:<VWIDE>
5695          (match_operand:<VWIDE> 1 "register_operand" "0")
5696          (ss_ashift:<VWIDE>
5697              (mult:<VWIDE>
5698                (sign_extend:<VWIDE>
5699                  (vec_select:<VHALF>
5700                      (match_operand:VQ_HSI 2 "register_operand" "w")
5701                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
5702                (sign_extend:<VWIDE>
5703                  (vec_select:<VHALF>
5704                      (match_operand:VQ_HSI 3 "register_operand" "w")
5705                      (match_dup 4))))
5706              (const_int 1))))]
5707   "TARGET_SIMD"
5708   "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5709   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5710 )
5711
5712 (define_expand "aarch64_sqdml<SBINQOPS:as>l2<mode>"
5713   [(match_operand:<VWIDE> 0 "register_operand")
5714    (SBINQOPS:<VWIDE>
5715      (match_operand:<VWIDE> 1 "register_operand")
5716      (match_dup 1))
5717    (match_operand:VQ_HSI 2 "register_operand")
5718    (match_operand:VQ_HSI 3 "register_operand")]
5719   "TARGET_SIMD"
5720 {
5721   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
5722   emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2<mode>_internal (operands[0],
5723                                                 operands[1], operands[2],
5724                                                 operands[3], p));
5725   DONE;
5726 })
5727
5728 ;; vqdml[sa]l2_lane
5729
5730 (define_insn "aarch64_sqdmlsl2_lane<mode>_internal"
5731   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5732         (ss_minus:<VWIDE>
5733           (match_operand:<VWIDE> 1 "register_operand" "0")
5734           (ss_ashift:<VWIDE>
5735               (mult:<VWIDE>
5736                 (sign_extend:<VWIDE>
5737                   (vec_select:<VHALF>
5738                     (match_operand:VQ_HSI 2 "register_operand" "w")
5739                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5740                 (vec_duplicate:<VWIDE>
5741                   (sign_extend:<VWIDE_S>
5742                     (vec_select:<VEL>
5743                       (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5744                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5745                     ))))
5746               (const_int 1))))]
5747   "TARGET_SIMD"
5748   {
5749     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5750     return
5751      "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5752   }
5753   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5754 )
5755
5756 (define_insn "aarch64_sqdmlal2_lane<mode>_internal"
5757   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5758         (ss_plus:<VWIDE>
5759           (ss_ashift:<VWIDE>
5760               (mult:<VWIDE>
5761                 (sign_extend:<VWIDE>
5762                   (vec_select:<VHALF>
5763                     (match_operand:VQ_HSI 2 "register_operand" "w")
5764                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5765                 (vec_duplicate:<VWIDE>
5766                   (sign_extend:<VWIDE_S>
5767                     (vec_select:<VEL>
5768                       (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5769                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5770                     ))))
5771               (const_int 1))
5772           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5773   "TARGET_SIMD"
5774   {
5775     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5776     return
5777      "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5778   }
5779   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5780 )
5781
5782 (define_insn "aarch64_sqdmlsl2_laneq<mode>_internal"
5783   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5784         (ss_minus:<VWIDE>
5785           (match_operand:<VWIDE> 1 "register_operand" "0")
5786           (ss_ashift:<VWIDE>
5787               (mult:<VWIDE>
5788                 (sign_extend:<VWIDE>
5789                   (vec_select:<VHALF>
5790                     (match_operand:VQ_HSI 2 "register_operand" "w")
5791                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5792                 (vec_duplicate:<VWIDE>
5793                   (sign_extend:<VWIDE_S>
5794                     (vec_select:<VEL>
5795                       (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5796                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5797                     ))))
5798               (const_int 1))))]
5799   "TARGET_SIMD"
5800   {
5801     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5802     return
5803      "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5804   }
5805   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5806 )
5807
5808 (define_insn "aarch64_sqdmlal2_laneq<mode>_internal"
5809   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5810         (ss_plus:<VWIDE>
5811           (ss_ashift:<VWIDE>
5812               (mult:<VWIDE>
5813                 (sign_extend:<VWIDE>
5814                   (vec_select:<VHALF>
5815                     (match_operand:VQ_HSI 2 "register_operand" "w")
5816                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5817                 (vec_duplicate:<VWIDE>
5818                   (sign_extend:<VWIDE_S>
5819                     (vec_select:<VEL>
5820                       (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5821                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5822                     ))))
5823               (const_int 1))
5824           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5825   "TARGET_SIMD"
5826   {
5827     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5828     return
5829      "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5830   }
5831   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5832 )
5833
5834 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>"
5835   [(match_operand:<VWIDE> 0 "register_operand")
5836    (SBINQOPS:<VWIDE>
5837      (match_operand:<VWIDE> 1 "register_operand")
5838      (match_dup 1))
5839    (match_operand:VQ_HSI 2 "register_operand")
5840    (match_operand:<VCOND> 3 "register_operand")
5841    (match_operand:SI 4 "immediate_operand")]
5842   "TARGET_SIMD"
5843 {
5844   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
5845   emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal (operands[0],
5846                                                 operands[1], operands[2],
5847                                                 operands[3], operands[4], p));
5848   DONE;
5849 })
5850
5851 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>"
5852   [(match_operand:<VWIDE> 0 "register_operand")
5853    (SBINQOPS:<VWIDE>
5854      (match_operand:<VWIDE> 1 "register_operand")
5855      (match_dup 1))
5856    (match_operand:VQ_HSI 2 "register_operand")
5857    (match_operand:<VCONQ> 3 "register_operand")
5858    (match_operand:SI 4 "immediate_operand")]
5859   "TARGET_SIMD"
5860 {
5861   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
5862   emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal (operands[0],
5863                                                 operands[1], operands[2],
5864                                                 operands[3], operands[4], p));
5865   DONE;
5866 })
5867
5868 (define_insn "aarch64_sqdmlsl2_n<mode>_internal"
5869   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5870         (ss_minus:<VWIDE>
5871           (match_operand:<VWIDE> 1 "register_operand" "0")
5872           (ss_ashift:<VWIDE>
5873             (mult:<VWIDE>
5874               (sign_extend:<VWIDE>
5875                 (vec_select:<VHALF>
5876                   (match_operand:VQ_HSI 2 "register_operand" "w")
5877                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
5878               (vec_duplicate:<VWIDE>
5879                 (sign_extend:<VWIDE_S>
5880                   (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
5881             (const_int 1))))]
5882   "TARGET_SIMD"
5883   "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
5884   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5885 )
5886
5887 (define_insn "aarch64_sqdmlal2_n<mode>_internal"
5888   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5889         (ss_plus:<VWIDE>
5890           (ss_ashift:<VWIDE>
5891             (mult:<VWIDE>
5892               (sign_extend:<VWIDE>
5893                 (vec_select:<VHALF>
5894                   (match_operand:VQ_HSI 2 "register_operand" "w")
5895                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
5896               (vec_duplicate:<VWIDE>
5897                 (sign_extend:<VWIDE_S>
5898                   (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
5899             (const_int 1))
5900           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5901   "TARGET_SIMD"
5902   "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
5903   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5904 )
5905
5906 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_n<mode>"
5907   [(match_operand:<VWIDE> 0 "register_operand")
5908    (SBINQOPS:<VWIDE>
5909      (match_operand:<VWIDE> 1 "register_operand")
5910      (match_dup 1))
5911    (match_operand:VQ_HSI 2 "register_operand")
5912    (match_operand:<VEL> 3 "register_operand")]
5913   "TARGET_SIMD"
5914 {
5915   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
5916   emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal (operands[0],
5917                                                 operands[1], operands[2],
5918                                                 operands[3], p));
5919   DONE;
5920 })
5921
5922 ;; vqdmull
5923
5924 (define_insn "aarch64_sqdmull<mode>"
5925   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5926         (ss_ashift:<VWIDE>
5927              (mult:<VWIDE>
5928                (sign_extend:<VWIDE>
5929                      (match_operand:VSD_HSI 1 "register_operand" "w"))
5930                (sign_extend:<VWIDE>
5931                      (match_operand:VSD_HSI 2 "register_operand" "w")))
5932              (const_int 1)))]
5933   "TARGET_SIMD"
5934   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5935   [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
5936 )
5937
5938 ;; vqdmull_lane
5939
5940 (define_insn "aarch64_sqdmull_lane<mode>"
5941   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5942         (ss_ashift:<VWIDE>
5943              (mult:<VWIDE>
5944                (sign_extend:<VWIDE>
5945                  (match_operand:VD_HSI 1 "register_operand" "w"))
5946                (vec_duplicate:<VWIDE>
5947                  (sign_extend:<VWIDE_S>
5948                    (vec_select:<VEL>
5949                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
5950                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
5951                ))
5952              (const_int 1)))]
5953   "TARGET_SIMD"
5954   {
5955     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
5956     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5957   }
5958   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
5959 )
5960
5961 (define_insn "aarch64_sqdmull_laneq<mode>"
5962   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5963         (ss_ashift:<VWIDE>
5964              (mult:<VWIDE>
5965                (sign_extend:<VWIDE>
5966                  (match_operand:VD_HSI 1 "register_operand" "w"))
5967                (vec_duplicate:<VWIDE>
5968                  (sign_extend:<VWIDE_S>
5969                    (vec_select:<VEL>
5970                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
5971                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
5972                ))
5973              (const_int 1)))]
5974   "TARGET_SIMD"
5975   {
5976     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
5977     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5978   }
5979   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
5980 )
5981
5982 (define_insn "aarch64_sqdmull_lane<mode>"
5983   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5984         (ss_ashift:<VWIDE>
5985              (mult:<VWIDE>
5986                (sign_extend:<VWIDE>
5987                  (match_operand:SD_HSI 1 "register_operand" "w"))
5988                (sign_extend:<VWIDE>
5989                  (vec_select:<VEL>
5990                    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
5991                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
5992                ))
5993              (const_int 1)))]
5994   "TARGET_SIMD"
5995   {
5996     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
5997     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5998   }
5999   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6000 )
6001
6002 (define_insn "aarch64_sqdmull_laneq<mode>"
6003   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6004         (ss_ashift:<VWIDE>
6005              (mult:<VWIDE>
6006                (sign_extend:<VWIDE>
6007                  (match_operand:SD_HSI 1 "register_operand" "w"))
6008                (sign_extend:<VWIDE>
6009                  (vec_select:<VEL>
6010                    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
6011                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
6012                ))
6013              (const_int 1)))]
6014   "TARGET_SIMD"
6015   {
6016     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
6017     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6018   }
6019   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6020 )
6021
6022 ;; vqdmull_n
6023
6024 (define_insn "aarch64_sqdmull_n<mode>"
6025   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6026         (ss_ashift:<VWIDE>
6027              (mult:<VWIDE>
6028                (sign_extend:<VWIDE>
6029                  (match_operand:VD_HSI 1 "register_operand" "w"))
6030                (vec_duplicate:<VWIDE>
6031                  (sign_extend:<VWIDE_S>
6032                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
6033                )
6034              (const_int 1)))]
6035   "TARGET_SIMD"
6036   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
6037   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6038 )
6039
6040 ;; vqdmull2
6041
6042 (define_insn "aarch64_sqdmull2<mode>_internal"
6043   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6044         (ss_ashift:<VWIDE>
6045              (mult:<VWIDE>
6046                (sign_extend:<VWIDE>
6047                  (vec_select:<VHALF>
6048                    (match_operand:VQ_HSI 1 "register_operand" "w")
6049                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
6050                (sign_extend:<VWIDE>
6051                  (vec_select:<VHALF>
6052                    (match_operand:VQ_HSI 2 "register_operand" "w")
6053                    (match_dup 3)))
6054                )
6055              (const_int 1)))]
6056   "TARGET_SIMD"
6057   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6058   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6059 )
6060
6061 (define_expand "aarch64_sqdmull2<mode>"
6062   [(match_operand:<VWIDE> 0 "register_operand")
6063    (match_operand:VQ_HSI 1 "register_operand")
6064    (match_operand:VQ_HSI 2 "register_operand")]
6065   "TARGET_SIMD"
6066 {
6067   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6068   emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
6069                                                   operands[2], p));
6070   DONE;
6071 })
6072
6073 ;; vqdmull2_lane
6074
6075 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
6076   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6077         (ss_ashift:<VWIDE>
6078              (mult:<VWIDE>
6079                (sign_extend:<VWIDE>
6080                  (vec_select:<VHALF>
6081                    (match_operand:VQ_HSI 1 "register_operand" "w")
6082                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6083                (vec_duplicate:<VWIDE>
6084                  (sign_extend:<VWIDE_S>
6085                    (vec_select:<VEL>
6086                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
6087                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6088                ))
6089              (const_int 1)))]
6090   "TARGET_SIMD"
6091   {
6092     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
6093     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6094   }
6095   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6096 )
6097
6098 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
6099   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6100         (ss_ashift:<VWIDE>
6101              (mult:<VWIDE>
6102                (sign_extend:<VWIDE>
6103                  (vec_select:<VHALF>
6104                    (match_operand:VQ_HSI 1 "register_operand" "w")
6105                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6106                (vec_duplicate:<VWIDE>
6107                  (sign_extend:<VWIDE_S>
6108                    (vec_select:<VEL>
6109                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
6110                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6111                ))
6112              (const_int 1)))]
6113   "TARGET_SIMD"
6114   {
6115     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
6116     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6117   }
6118   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6119 )
6120
6121 (define_expand "aarch64_sqdmull2_lane<mode>"
6122   [(match_operand:<VWIDE> 0 "register_operand")
6123    (match_operand:VQ_HSI 1 "register_operand")
6124    (match_operand:<VCOND> 2 "register_operand")
6125    (match_operand:SI 3 "immediate_operand")]
6126   "TARGET_SIMD"
6127 {
6128   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6129   emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
6130                                                        operands[2], operands[3],
6131                                                        p));
6132   DONE;
6133 })
6134
6135 (define_expand "aarch64_sqdmull2_laneq<mode>"
6136   [(match_operand:<VWIDE> 0 "register_operand")
6137    (match_operand:VQ_HSI 1 "register_operand")
6138    (match_operand:<VCONQ> 2 "register_operand")
6139    (match_operand:SI 3 "immediate_operand")]
6140   "TARGET_SIMD"
6141 {
6142   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6143   emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
6144                                                        operands[2], operands[3],
6145                                                        p));
6146   DONE;
6147 })
6148
6149 ;; vqdmull2_n
6150
6151 (define_insn "aarch64_sqdmull2_n<mode>_internal"
6152   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6153         (ss_ashift:<VWIDE>
6154              (mult:<VWIDE>
6155                (sign_extend:<VWIDE>
6156                  (vec_select:<VHALF>
6157                    (match_operand:VQ_HSI 1 "register_operand" "w")
6158                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
6159                (vec_duplicate:<VWIDE>
6160                  (sign_extend:<VWIDE_S>
6161                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
6162                )
6163              (const_int 1)))]
6164   "TARGET_SIMD"
6165   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
6166   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6167 )
6168
6169 (define_expand "aarch64_sqdmull2_n<mode>"
6170   [(match_operand:<VWIDE> 0 "register_operand")
6171    (match_operand:VQ_HSI 1 "register_operand")
6172    (match_operand:<VEL> 2 "register_operand")]
6173   "TARGET_SIMD"
6174 {
6175   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6176   emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
6177                                                     operands[2], p));
6178   DONE;
6179 })
6180
6181 ;; vshl
6182
6183 (define_insn "aarch64_<sur>shl<mode>"
6184   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6185         (unspec:VSDQ_I_DI
6186           [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
6187            (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
6188          VSHL))]
6189   "TARGET_SIMD"
6190   "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
6191   [(set_attr "type" "neon_shift_reg<q>")]
6192 )
6193
6194
6195 ;; vqshl
6196
6197 (define_insn "aarch64_<sur>q<r>shl<mode>"
6198   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
6199         (unspec:VSDQ_I
6200           [(match_operand:VSDQ_I 1 "register_operand" "w")
6201            (match_operand:VSDQ_I 2 "register_operand" "w")]
6202          VQSHL))]
6203   "TARGET_SIMD"
6204   "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
6205   [(set_attr "type" "neon_sat_shift_reg<q>")]
6206 )
6207
6208 (define_expand "vec_widen_<sur>shiftl_lo_<mode>"
6209   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6210         (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
6211                          (match_operand:SI 2
6212                            "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
6213                          VSHLL))]
6214   "TARGET_SIMD"
6215   {
6216     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
6217     emit_insn (gen_aarch64_<sur>shll<mode>_internal (operands[0], operands[1],
6218                                                      p, operands[2]));
6219     DONE;
6220   }
6221 )
6222
6223 (define_expand "vec_widen_<sur>shiftl_hi_<mode>"
6224    [(set (match_operand:<VWIDE> 0 "register_operand")
6225         (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
6226                          (match_operand:SI 2
6227                            "immediate_operand" "i")]
6228                           VSHLL))]
6229    "TARGET_SIMD"
6230    {
6231     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6232     emit_insn (gen_aarch64_<sur>shll2<mode>_internal (operands[0], operands[1],
6233                                                       p, operands[2]));
6234     DONE;
6235    }
6236 )
6237
6238 ;; vshll_n
6239
6240 (define_insn "aarch64_<sur>shll<mode>_internal"
6241   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6242         (unspec:<VWIDE> [(vec_select:<VHALF>
6243                             (match_operand:VQW 1 "register_operand" "w")
6244                             (match_operand:VQW 2 "vect_par_cnst_lo_half" ""))
6245                          (match_operand:SI 3
6246                            "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
6247                          VSHLL))]
6248   "TARGET_SIMD"
6249   {
6250     if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
6251       return "shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
6252     else
6253       return "<sur>shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
6254   }
6255   [(set_attr "type" "neon_shift_imm_long")]
6256 )
6257
6258 (define_insn "aarch64_<sur>shll2<mode>_internal"
6259   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6260         (unspec:<VWIDE> [(vec_select:<VHALF>
6261                             (match_operand:VQW 1 "register_operand" "w")
6262                             (match_operand:VQW 2 "vect_par_cnst_hi_half" ""))
6263                          (match_operand:SI 3
6264                            "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
6265                          VSHLL))]
6266   "TARGET_SIMD"
6267   {
6268     if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
6269       return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
6270     else
6271       return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
6272   }
6273   [(set_attr "type" "neon_shift_imm_long")]
6274 )
6275
6276 (define_insn "aarch64_<sur>shll_n<mode>"
6277   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6278         (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
6279                          (match_operand:SI 2
6280                            "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
6281                          VSHLL))]
6282   "TARGET_SIMD"
6283   {
6284     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
6285       return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
6286     else
6287       return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
6288   }
6289   [(set_attr "type" "neon_shift_imm_long")]
6290 )
6291
6292 ;; vshll_high_n
6293
6294 (define_insn "aarch64_<sur>shll2_n<mode>"
6295   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6296         (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
6297                          (match_operand:SI 2 "immediate_operand" "i")]
6298                          VSHLL))]
6299   "TARGET_SIMD"
6300   {
6301     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
6302       return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
6303     else
6304       return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
6305   }
6306   [(set_attr "type" "neon_shift_imm_long")]
6307 )
6308
6309 ;; vrshr_n
6310
6311 (define_insn "aarch64_<sur>shr_n<mode>"
6312   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6313         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
6314                            (match_operand:SI 2
6315                              "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
6316                           VRSHR_N))]
6317   "TARGET_SIMD"
6318   "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
6319   [(set_attr "type" "neon_sat_shift_imm<q>")]
6320 )
6321
6322 ;; v(r)sra_n
6323
6324 (define_insn "aarch64_<sur>sra_n<mode>"
6325   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6326         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
6327                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
6328                        (match_operand:SI 3
6329                          "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
6330                       VSRA))]
6331   "TARGET_SIMD"
6332   "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
6333   [(set_attr "type" "neon_shift_acc<q>")]
6334 )
6335
6336 ;; vs<lr>i_n
6337
6338 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
6339   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6340         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
6341                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
6342                        (match_operand:SI 3
6343                          "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
6344                       VSLRI))]
6345   "TARGET_SIMD"
6346   "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
6347   [(set_attr "type" "neon_shift_imm<q>")]
6348 )
6349
6350 ;; vqshl(u)
6351
6352 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
6353   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
6354         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
6355                        (match_operand:SI 2
6356                          "aarch64_simd_shift_imm_<ve_mode>" "i")]
6357                       VQSHL_N))]
6358   "TARGET_SIMD"
6359   "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
6360   [(set_attr "type" "neon_sat_shift_imm<q>")]
6361 )
6362
6363
6364 ;; vq(r)shr(u)n_n
6365
6366 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
6367   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6368         (unspec:<VNARROWQ> [(match_operand:SD_HSDI 1 "register_operand" "w")
6369                             (match_operand:SI 2
6370                               "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
6371                            VQSHRN_N))]
6372   "TARGET_SIMD"
6373   "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6374   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
6375 )
6376
6377 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>_insn_le"
6378   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6379         (vec_concat:<VNARROWQ2>
6380           (unspec:<VNARROWQ>
6381                 [(match_operand:VQN 1 "register_operand" "w")
6382                  (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
6383                 VQSHRN_N)
6384           (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
6385   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
6386   "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6387   [(set_attr "type" "neon_shift_imm_narrow_q")]
6388 )
6389
6390 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>_insn_be"
6391   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6392         (vec_concat:<VNARROWQ2>
6393           (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
6394           (unspec:<VNARROWQ>
6395                 [(match_operand:VQN 1 "register_operand" "w")
6396                  (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
6397                 VQSHRN_N)))]
6398   "TARGET_SIMD && BYTES_BIG_ENDIAN"
6399   "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6400   [(set_attr "type" "neon_shift_imm_narrow_q")]
6401 )
6402
6403 (define_expand "aarch64_<sur>q<r>shr<u>n_n<mode>"
6404   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6405         (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand")
6406                             (match_operand:SI 2
6407                               "aarch64_simd_shift_imm_offset_<ve_mode>")]
6408                            VQSHRN_N))]
6409   "TARGET_SIMD"
6410   {
6411     operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6412                                                  INTVAL (operands[2]));
6413     rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
6414     if (BYTES_BIG_ENDIAN)
6415       emit_insn (gen_aarch64_<sur>q<r>shr<u>n_n<mode>_insn_be (tmp,
6416                     operands[1], operands[2], CONST0_RTX (<VNARROWQ>mode)));
6417     else
6418       emit_insn (gen_aarch64_<sur>q<r>shr<u>n_n<mode>_insn_le (tmp,
6419                     operands[1], operands[2], CONST0_RTX (<VNARROWQ>mode)));
6420
6421     /* The intrinsic expects a narrow result, so emit a subreg that will get
6422        optimized away as appropriate.  */
6423     emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
6424                                                  <VNARROWQ2>mode));
6425     DONE;
6426   }
6427 )
6428
6429 (define_insn "aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_le"
6430   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6431         (vec_concat:<VNARROWQ2>
6432           (match_operand:<VNARROWQ> 1 "register_operand" "0")
6433           (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
6434                               (match_operand:VQN 3
6435                                 "aarch64_simd_shift_imm_vec_<vn_mode>")]
6436                              VQSHRN_N)))]
6437   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
6438   "<sur>q<r>shr<u>n2\\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6439   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
6440 )
6441
6442 (define_insn "aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_be"
6443   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6444         (vec_concat:<VNARROWQ2>
6445           (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
6446                               (match_operand:VQN 3
6447                                 "aarch64_simd_shift_imm_vec_<vn_mode>")]
6448                              VQSHRN_N)
6449           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
6450   "TARGET_SIMD && BYTES_BIG_ENDIAN"
6451   "<sur>q<r>shr<u>n2\\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6452   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
6453 )
6454
6455 (define_expand "aarch64_<sur>q<r>shr<u>n2_n<mode>"
6456   [(match_operand:<VNARROWQ2> 0 "register_operand")
6457    (match_operand:<VNARROWQ> 1 "register_operand")
6458    (unspec:<VNARROWQ>
6459         [(match_operand:VQN 2 "register_operand")
6460          (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
6461         VQSHRN_N)]
6462   "TARGET_SIMD"
6463   {
6464     operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6465                                                  INTVAL (operands[3]));
6466
6467     if (BYTES_BIG_ENDIAN)
6468       emit_insn (gen_aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_be (operands[0],
6469                                 operands[1], operands[2], operands[3]));
6470     else
6471       emit_insn (gen_aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_le (operands[0],
6472                                 operands[1], operands[2], operands[3]));
6473     DONE;
6474   }
6475 )
6476
6477
6478 ;; cm(eq|ge|gt|lt|le)
6479 ;; Note, we have constraints for Dz and Z as different expanders
6480 ;; have different ideas of what should be passed to this pattern.
6481
6482 (define_insn "aarch64_cm<optab><mode>"
6483   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
6484         (neg:<V_INT_EQUIV>
6485           (COMPARISONS:<V_INT_EQUIV>
6486             (match_operand:VDQ_I 1 "register_operand" "w,w")
6487             (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
6488           )))]
6489   "TARGET_SIMD"
6490   "@
6491   cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
6492   cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
6493   [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
6494 )
6495
6496 (define_insn_and_split "aarch64_cm<optab>di"
6497   [(set (match_operand:DI 0 "register_operand" "=w,w,r")
6498         (neg:DI
6499           (COMPARISONS:DI
6500             (match_operand:DI 1 "register_operand" "w,w,r")
6501             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
6502           )))
6503      (clobber (reg:CC CC_REGNUM))]
6504   "TARGET_SIMD"
6505   "#"
6506   "&& reload_completed"
6507   [(set (match_operand:DI 0 "register_operand")
6508         (neg:DI
6509           (COMPARISONS:DI
6510             (match_operand:DI 1 "register_operand")
6511             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
6512           )))]
6513   {
6514     /* If we are in the general purpose register file,
6515        we split to a sequence of comparison and store.  */
6516     if (GP_REGNUM_P (REGNO (operands[0]))
6517         && GP_REGNUM_P (REGNO (operands[1])))
6518       {
6519         machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
6520         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
6521         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
6522         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
6523         DONE;
6524       }
6525     /* Otherwise, we expand to a similar pattern which does not
6526        clobber CC_REGNUM.  */
6527   }
6528   [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
6529 )
6530
6531 (define_insn "*aarch64_cm<optab>di"
6532   [(set (match_operand:DI 0 "register_operand" "=w,w")
6533         (neg:DI
6534           (COMPARISONS:DI
6535             (match_operand:DI 1 "register_operand" "w,w")
6536             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
6537           )))]
6538   "TARGET_SIMD && reload_completed"
6539   "@
6540   cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
6541   cm<optab>\t%d0, %d1, #0"
6542   [(set_attr "type" "neon_compare, neon_compare_zero")]
6543 )
6544
6545 ;; cm(hs|hi)
6546
6547 (define_insn "aarch64_cm<optab><mode>"
6548   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
6549         (neg:<V_INT_EQUIV>
6550           (UCOMPARISONS:<V_INT_EQUIV>
6551             (match_operand:VDQ_I 1 "register_operand" "w")
6552             (match_operand:VDQ_I 2 "register_operand" "w")
6553           )))]
6554   "TARGET_SIMD"
6555   "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
6556   [(set_attr "type" "neon_compare<q>")]
6557 )
6558
6559 (define_insn_and_split "aarch64_cm<optab>di"
6560   [(set (match_operand:DI 0 "register_operand" "=w,r")
6561         (neg:DI
6562           (UCOMPARISONS:DI
6563             (match_operand:DI 1 "register_operand" "w,r")
6564             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
6565           )))
6566     (clobber (reg:CC CC_REGNUM))]
6567   "TARGET_SIMD"
6568   "#"
6569   "&& reload_completed"
6570   [(set (match_operand:DI 0 "register_operand")
6571         (neg:DI
6572           (UCOMPARISONS:DI
6573             (match_operand:DI 1 "register_operand")
6574             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
6575           )))]
6576   {
6577     /* If we are in the general purpose register file,
6578        we split to a sequence of comparison and store.  */
6579     if (GP_REGNUM_P (REGNO (operands[0]))
6580         && GP_REGNUM_P (REGNO (operands[1])))
6581       {
6582         machine_mode mode = CCmode;
6583         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
6584         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
6585         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
6586         DONE;
6587       }
6588     /* Otherwise, we expand to a similar pattern which does not
6589        clobber CC_REGNUM.  */
6590   }
6591   [(set_attr "type" "neon_compare,multiple")]
6592 )
6593
6594 (define_insn "*aarch64_cm<optab>di"
6595   [(set (match_operand:DI 0 "register_operand" "=w")
6596         (neg:DI
6597           (UCOMPARISONS:DI
6598             (match_operand:DI 1 "register_operand" "w")
6599             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
6600           )))]
6601   "TARGET_SIMD && reload_completed"
6602   "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
6603   [(set_attr "type" "neon_compare")]
6604 )
6605
6606 ;; cmtst
6607
6608 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
6609 ;; we don't have any insns using ne, and aarch64_vcond outputs
6610 ;; not (neg (eq (and x y) 0))
6611 ;; which is rewritten by simplify_rtx as
6612 ;; plus (eq (and x y) 0) -1.
6613
6614 (define_insn "aarch64_cmtst<mode>"
6615   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
6616         (plus:<V_INT_EQUIV>
6617           (eq:<V_INT_EQUIV>
6618             (and:VDQ_I
6619               (match_operand:VDQ_I 1 "register_operand" "w")
6620               (match_operand:VDQ_I 2 "register_operand" "w"))
6621             (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
6622           (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
6623   ]
6624   "TARGET_SIMD"
6625   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6626   [(set_attr "type" "neon_tst<q>")]
6627 )
6628
6629 ;; One can also get a cmtsts by having to combine a
6630 ;; not (neq (eq x 0)) in which case you rewrite it to
6631 ;; a comparison against itself
6632
6633 (define_insn "*aarch64_cmtst_same_<mode>"
6634   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
6635         (plus:<V_INT_EQUIV>
6636           (eq:<V_INT_EQUIV>
6637             (match_operand:VDQ_I 1 "register_operand" "w")
6638             (match_operand:VDQ_I 2 "aarch64_simd_imm_zero"))
6639           (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_imm_minus_one")))
6640   ]
6641   "TARGET_SIMD"
6642   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>1<Vmtype>"
6643   [(set_attr "type" "neon_tst<q>")]
6644 )
6645
6646 (define_insn_and_split "aarch64_cmtstdi"
6647   [(set (match_operand:DI 0 "register_operand" "=w,r")
6648         (neg:DI
6649           (ne:DI
6650             (and:DI
6651               (match_operand:DI 1 "register_operand" "w,r")
6652               (match_operand:DI 2 "register_operand" "w,r"))
6653             (const_int 0))))
6654     (clobber (reg:CC CC_REGNUM))]
6655   "TARGET_SIMD"
6656   "#"
6657   "&& reload_completed"
6658   [(set (match_operand:DI 0 "register_operand")
6659         (neg:DI
6660           (ne:DI
6661             (and:DI
6662               (match_operand:DI 1 "register_operand")
6663               (match_operand:DI 2 "register_operand"))
6664             (const_int 0))))]
6665   {
6666     /* If we are in the general purpose register file,
6667        we split to a sequence of comparison and store.  */
6668     if (GP_REGNUM_P (REGNO (operands[0]))
6669         && GP_REGNUM_P (REGNO (operands[1])))
6670       {
6671         rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
6672         machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
6673         rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
6674         rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
6675         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
6676         DONE;
6677       }
6678     /* Otherwise, we expand to a similar pattern which does not
6679        clobber CC_REGNUM.  */
6680   }
6681   [(set_attr "type" "neon_tst,multiple")]
6682 )
6683
6684 (define_insn "*aarch64_cmtstdi"
6685   [(set (match_operand:DI 0 "register_operand" "=w")
6686         (neg:DI
6687           (ne:DI
6688             (and:DI
6689               (match_operand:DI 1 "register_operand" "w")
6690               (match_operand:DI 2 "register_operand" "w"))
6691             (const_int 0))))]
6692   "TARGET_SIMD"
6693   "cmtst\t%d0, %d1, %d2"
6694   [(set_attr "type" "neon_tst")]
6695 )
6696
6697 ;; fcm(eq|ge|gt|le|lt)
6698
6699 (define_insn "aarch64_cm<optab><mode>"
6700   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
6701         (neg:<V_INT_EQUIV>
6702           (COMPARISONS:<V_INT_EQUIV>
6703             (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
6704             (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
6705           )))]
6706   "TARGET_SIMD"
6707   "@
6708   fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
6709   fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
6710   [(set_attr "type" "neon_fp_compare_<stype><q>")]
6711 )
6712
6713 ;; fac(ge|gt)
6714 ;; Note we can also handle what would be fac(le|lt) by
6715 ;; generating fac(ge|gt).
6716
6717 (define_insn "aarch64_fac<optab><mode>"
6718   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
6719         (neg:<V_INT_EQUIV>
6720           (FAC_COMPARISONS:<V_INT_EQUIV>
6721             (abs:VHSDF_HSDF
6722               (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
6723             (abs:VHSDF_HSDF
6724               (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
6725   )))]
6726   "TARGET_SIMD"
6727   "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
6728   [(set_attr "type" "neon_fp_compare_<stype><q>")]
6729 )
6730
6731 ;; addp
6732
6733 (define_insn "aarch64_addp<mode>"
6734   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
6735         (unspec:VDQ_I
6736           [(match_operand:VDQ_I 1 "register_operand" "w")
6737            (match_operand:VDQ_I 2 "register_operand" "w")]
6738           UNSPEC_ADDP))]
6739   "TARGET_SIMD"
6740   "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6741   [(set_attr "type" "neon_reduc_add<q>")]
6742 )
6743
6744 (define_insn "aarch64_addpdi"
6745   [(set (match_operand:DI 0 "register_operand" "=w")
6746         (unspec:DI
6747           [(match_operand:V2DI 1 "register_operand" "w")]
6748           UNSPEC_ADDP))]
6749   "TARGET_SIMD"
6750   "addp\t%d0, %1.2d"
6751   [(set_attr "type" "neon_reduc_add")]
6752 )
6753
6754 ;; sqrt
6755
6756 (define_expand "sqrt<mode>2"
6757   [(set (match_operand:VHSDF 0 "register_operand")
6758         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))]
6759   "TARGET_SIMD"
6760 {
6761   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
6762     DONE;
6763 })
6764
6765 (define_insn "*sqrt<mode>2"
6766   [(set (match_operand:VHSDF 0 "register_operand" "=w")
6767         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
6768   "TARGET_SIMD"
6769   "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
6770   [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
6771 )
6772
6773 ;; Patterns for vector struct loads and stores.
6774
6775 (define_insn "aarch64_simd_ld2<vstruct_elt>"
6776   [(set (match_operand:VSTRUCT_2Q 0 "register_operand" "=w")
6777         (unspec:VSTRUCT_2Q [
6778           (match_operand:VSTRUCT_2Q 1 "aarch64_simd_struct_operand" "Utv")]
6779           UNSPEC_LD2))]
6780   "TARGET_SIMD"
6781   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
6782   [(set_attr "type" "neon_load2_2reg<q>")]
6783 )
6784
6785 (define_insn "aarch64_simd_ld2r<vstruct_elt>"
6786   [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
6787         (unspec:VSTRUCT_2QD [
6788           (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
6789           UNSPEC_LD2_DUP))]
6790   "TARGET_SIMD"
6791   "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
6792   [(set_attr "type" "neon_load2_all_lanes<q>")]
6793 )
6794
6795 (define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
6796   [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
6797         (unspec:VSTRUCT_2QD [
6798                 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6799                 (match_operand:VSTRUCT_2QD 2 "register_operand" "0")
6800                 (match_operand:SI 3 "immediate_operand" "i")]
6801                 UNSPEC_LD2_LANE))]
6802   "TARGET_SIMD"
6803   {
6804     operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
6805                                            INTVAL (operands[3]));
6806     return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
6807   }
6808   [(set_attr "type" "neon_load2_one_lane")]
6809 )
6810
6811 (define_expand "vec_load_lanes<mode><vstruct_elt>"
6812   [(set (match_operand:VSTRUCT_2Q 0 "register_operand")
6813         (unspec:VSTRUCT_2Q [
6814                 (match_operand:VSTRUCT_2Q 1 "aarch64_simd_struct_operand")]
6815                 UNSPEC_LD2))]
6816   "TARGET_SIMD"
6817 {
6818   if (BYTES_BIG_ENDIAN)
6819     {
6820       rtx tmp = gen_reg_rtx (<MODE>mode);
6821       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
6822                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
6823       emit_insn (gen_aarch64_simd_ld2<vstruct_elt> (tmp, operands[1]));
6824       emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
6825     }
6826   else
6827     emit_insn (gen_aarch64_simd_ld2<vstruct_elt> (operands[0], operands[1]));
6828   DONE;
6829 })
6830
6831 (define_insn "aarch64_simd_st2<vstruct_elt>"
6832   [(set (match_operand:VSTRUCT_2Q 0 "aarch64_simd_struct_operand" "=Utv")
6833         (unspec:VSTRUCT_2Q [
6834                 (match_operand:VSTRUCT_2Q 1 "register_operand" "w")]
6835                 UNSPEC_ST2))]
6836   "TARGET_SIMD"
6837   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
6838   [(set_attr "type" "neon_store2_2reg<q>")]
6839 )
6840
6841 ;; RTL uses GCC vector extension indices, so flip only for assembly.
6842 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
6843   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6844         (unspec:BLK [(match_operand:VSTRUCT_2QD 1 "register_operand" "w")
6845                      (match_operand:SI 2 "immediate_operand" "i")]
6846                      UNSPEC_ST2_LANE))]
6847   "TARGET_SIMD"
6848   {
6849     operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
6850                                            INTVAL (operands[2]));
6851     return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
6852   }
6853   [(set_attr "type" "neon_store2_one_lane<q>")]
6854 )
6855
6856 (define_expand "vec_store_lanes<mode><vstruct_elt>"
6857   [(set (match_operand:VSTRUCT_2Q 0 "aarch64_simd_struct_operand")
6858         (unspec:VSTRUCT_2Q [(match_operand:VSTRUCT_2Q 1 "register_operand")]
6859                    UNSPEC_ST2))]
6860   "TARGET_SIMD"
6861 {
6862   if (BYTES_BIG_ENDIAN)
6863     {
6864       rtx tmp = gen_reg_rtx (<MODE>mode);
6865       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
6866                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
6867       emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
6868       emit_insn (gen_aarch64_simd_st2<vstruct_elt> (operands[0], tmp));
6869     }
6870   else
6871     emit_insn (gen_aarch64_simd_st2<vstruct_elt> (operands[0], operands[1]));
6872   DONE;
6873 })
6874
6875 (define_insn "aarch64_simd_ld3<vstruct_elt>"
6876   [(set (match_operand:VSTRUCT_3Q 0 "register_operand" "=w")
6877         (unspec:VSTRUCT_3Q [
6878           (match_operand:VSTRUCT_3Q 1 "aarch64_simd_struct_operand" "Utv")]
6879           UNSPEC_LD3))]
6880   "TARGET_SIMD"
6881   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
6882   [(set_attr "type" "neon_load3_3reg<q>")]
6883 )
6884
6885 (define_insn "aarch64_simd_ld3r<vstruct_elt>"
6886   [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
6887         (unspec:VSTRUCT_3QD [
6888           (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
6889           UNSPEC_LD3_DUP))]
6890   "TARGET_SIMD"
6891   "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
6892   [(set_attr "type" "neon_load3_all_lanes<q>")]
6893 )
6894
6895 (define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
6896   [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
6897         (unspec:VSTRUCT_3QD [
6898                 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6899                 (match_operand:VSTRUCT_3QD 2 "register_operand" "0")
6900                 (match_operand:SI 3 "immediate_operand" "i")]
6901                 UNSPEC_LD3_LANE))]
6902   "TARGET_SIMD"
6903 {
6904     operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
6905                                            INTVAL (operands[3]));
6906     return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
6907 }
6908   [(set_attr "type" "neon_load3_one_lane")]
6909 )
6910
6911 (define_expand "vec_load_lanes<mode><vstruct_elt>"
6912   [(set (match_operand:VSTRUCT_3Q 0 "register_operand")
6913         (unspec:VSTRUCT_3Q [
6914                 (match_operand:VSTRUCT_3Q 1 "aarch64_simd_struct_operand")]
6915                 UNSPEC_LD3))]
6916   "TARGET_SIMD"
6917 {
6918   if (BYTES_BIG_ENDIAN)
6919     {
6920       rtx tmp = gen_reg_rtx (<MODE>mode);
6921       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
6922                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
6923       emit_insn (gen_aarch64_simd_ld3<vstruct_elt> (tmp, operands[1]));
6924       emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
6925     }
6926   else
6927     emit_insn (gen_aarch64_simd_ld3<vstruct_elt> (operands[0], operands[1]));
6928   DONE;
6929 })
6930
6931 (define_insn "aarch64_simd_st3<vstruct_elt>"
6932   [(set (match_operand:VSTRUCT_3Q 0 "aarch64_simd_struct_operand" "=Utv")
6933         (unspec:VSTRUCT_3Q [(match_operand:VSTRUCT_3Q 1 "register_operand" "w")]
6934                    UNSPEC_ST3))]
6935   "TARGET_SIMD"
6936   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
6937   [(set_attr "type" "neon_store3_3reg<q>")]
6938 )
6939
6940 ;; RTL uses GCC vector extension indices, so flip only for assembly.
6941 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
6942   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6943         (unspec:BLK [(match_operand:VSTRUCT_3QD 1 "register_operand" "w")
6944                      (match_operand:SI 2 "immediate_operand" "i")]
6945                      UNSPEC_ST3_LANE))]
6946   "TARGET_SIMD"
6947   {
6948     operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
6949                                            INTVAL (operands[2]));
6950     return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
6951   }
6952   [(set_attr "type" "neon_store3_one_lane<q>")]
6953 )
6954
6955 (define_expand "vec_store_lanes<mode><vstruct_elt>"
6956   [(set (match_operand:VSTRUCT_3Q 0 "aarch64_simd_struct_operand")
6957         (unspec:VSTRUCT_3Q [
6958                 (match_operand:VSTRUCT_3Q 1 "register_operand")]
6959                 UNSPEC_ST3))]
6960   "TARGET_SIMD"
6961 {
6962   if (BYTES_BIG_ENDIAN)
6963     {
6964       rtx tmp = gen_reg_rtx (<MODE>mode);
6965       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
6966                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
6967       emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
6968       emit_insn (gen_aarch64_simd_st3<vstruct_elt> (operands[0], tmp));
6969     }
6970   else
6971     emit_insn (gen_aarch64_simd_st3<vstruct_elt> (operands[0], operands[1]));
6972   DONE;
6973 })
6974
6975 (define_insn "aarch64_simd_ld4<vstruct_elt>"
6976   [(set (match_operand:VSTRUCT_4Q 0 "register_operand" "=w")
6977         (unspec:VSTRUCT_4Q [
6978           (match_operand:VSTRUCT_4Q 1 "aarch64_simd_struct_operand" "Utv")]
6979           UNSPEC_LD4))]
6980   "TARGET_SIMD"
6981   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
6982   [(set_attr "type" "neon_load4_4reg<q>")]
6983 )
6984
6985 (define_insn "aarch64_simd_ld4r<vstruct_elt>"
6986   [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
6987         (unspec:VSTRUCT_4QD [
6988           (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
6989           UNSPEC_LD4_DUP))]
6990   "TARGET_SIMD"
6991   "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
6992   [(set_attr "type" "neon_load4_all_lanes<q>")]
6993 )
6994
6995 (define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
6996   [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
6997         (unspec:VSTRUCT_4QD [
6998                 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6999                 (match_operand:VSTRUCT_4QD 2 "register_operand" "0")
7000                 (match_operand:SI 3 "immediate_operand" "i")]
7001                 UNSPEC_LD4_LANE))]
7002   "TARGET_SIMD"
7003 {
7004     operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7005                                            INTVAL (operands[3]));
7006     return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
7007 }
7008   [(set_attr "type" "neon_load4_one_lane")]
7009 )
7010
7011 (define_expand "vec_load_lanes<mode><vstruct_elt>"
7012   [(set (match_operand:VSTRUCT_4Q 0 "register_operand")
7013         (unspec:VSTRUCT_4Q [
7014                 (match_operand:VSTRUCT_4Q 1 "aarch64_simd_struct_operand")]
7015                 UNSPEC_LD4))]
7016   "TARGET_SIMD"
7017 {
7018   if (BYTES_BIG_ENDIAN)
7019     {
7020       rtx tmp = gen_reg_rtx (<MODE>mode);
7021       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7022                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7023       emit_insn (gen_aarch64_simd_ld4<vstruct_elt> (tmp, operands[1]));
7024       emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
7025     }
7026   else
7027     emit_insn (gen_aarch64_simd_ld4<vstruct_elt> (operands[0], operands[1]));
7028   DONE;
7029 })
7030
7031 (define_insn "aarch64_simd_st4<vstruct_elt>"
7032   [(set (match_operand:VSTRUCT_4Q 0 "aarch64_simd_struct_operand" "=Utv")
7033         (unspec:VSTRUCT_4Q [
7034                 (match_operand:VSTRUCT_4Q 1 "register_operand" "w")]
7035                 UNSPEC_ST4))]
7036   "TARGET_SIMD"
7037   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
7038   [(set_attr "type" "neon_store4_4reg<q>")]
7039 )
7040
7041 ;; RTL uses GCC vector extension indices, so flip only for assembly.
7042 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
7043   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
7044         (unspec:BLK [(match_operand:VSTRUCT_4QD 1 "register_operand" "w")
7045                      (match_operand:SI 2 "immediate_operand" "i")]
7046                      UNSPEC_ST4_LANE))]
7047   "TARGET_SIMD"
7048   {
7049     operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7050                                            INTVAL (operands[2]));
7051     return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
7052   }
7053   [(set_attr "type" "neon_store4_one_lane<q>")]
7054 )
7055
7056 (define_expand "vec_store_lanes<mode><vstruct_elt>"
7057   [(set (match_operand:VSTRUCT_4Q 0 "aarch64_simd_struct_operand")
7058         (unspec:VSTRUCT_4Q [(match_operand:VSTRUCT_4Q 1 "register_operand")]
7059                    UNSPEC_ST4))]
7060   "TARGET_SIMD"
7061 {
7062   if (BYTES_BIG_ENDIAN)
7063     {
7064       rtx tmp = gen_reg_rtx (<MODE>mode);
7065       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7066                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7067       emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
7068       emit_insn (gen_aarch64_simd_st4<vstruct_elt> (operands[0], tmp));
7069     }
7070   else
7071     emit_insn (gen_aarch64_simd_st4<vstruct_elt> (operands[0], operands[1]));
7072   DONE;
7073 })
7074
7075 (define_insn_and_split "aarch64_rev_reglist<mode>"
7076 [(set (match_operand:VSTRUCT_QD 0 "register_operand" "=&w")
7077         (unspec:VSTRUCT_QD
7078                    [(match_operand:VSTRUCT_QD 1 "register_operand" "w")
7079                     (match_operand:V16QI 2 "register_operand" "w")]
7080                    UNSPEC_REV_REGLIST))]
7081   "TARGET_SIMD"
7082   "#"
7083   "&& reload_completed"
7084   [(const_int 0)]
7085 {
7086   int i;
7087   int nregs = GET_MODE_SIZE (<MODE>mode).to_constant () / UNITS_PER_VREG;
7088   for (i = 0; i < nregs; i++)
7089     {
7090       rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
7091       rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
7092       emit_insn (gen_aarch64_qtbl1v16qi (op0, op1, operands[2]));
7093     }
7094   DONE;
7095 }
7096   [(set_attr "type" "neon_tbl1_q")
7097    (set_attr "length" "<insn_count>")]
7098 )
7099
7100 ;; Reload patterns for AdvSIMD register list operands.
7101
7102 (define_expand "mov<mode>"
7103   [(set (match_operand:VSTRUCT_QD 0 "nonimmediate_operand")
7104         (match_operand:VSTRUCT_QD 1 "general_operand"))]
7105   "TARGET_SIMD"
7106 {
7107   if (can_create_pseudo_p ())
7108     {
7109       if (GET_CODE (operands[0]) != REG)
7110         operands[1] = force_reg (<MODE>mode, operands[1]);
7111     }
7112 })
7113
7114 (define_expand "mov<mode>"
7115   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
7116         (match_operand:VSTRUCT 1 "general_operand"))]
7117   "TARGET_SIMD"
7118 {
7119   if (can_create_pseudo_p ())
7120     {
7121       if (GET_CODE (operands[0]) != REG)
7122         operands[1] = force_reg (<MODE>mode, operands[1]);
7123     }
7124 })
7125
7126 (define_expand "movv8di"
7127   [(set (match_operand:V8DI 0 "nonimmediate_operand")
7128         (match_operand:V8DI 1 "general_operand"))]
7129   "TARGET_SIMD"
7130 {
7131   if (can_create_pseudo_p () && MEM_P (operands[0]))
7132     operands[1] = force_reg (V8DImode, operands[1]);
7133 })
7134
7135 (define_expand "aarch64_ld1x3<vstruct_elt>"
7136   [(match_operand:VSTRUCT_3QD 0 "register_operand")
7137    (match_operand:DI 1 "register_operand")]
7138   "TARGET_SIMD"
7139 {
7140   rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
7141   emit_insn (gen_aarch64_ld1_x3_<vstruct_elt> (operands[0], mem));
7142   DONE;
7143 })
7144
7145 (define_insn "aarch64_ld1_x3_<vstruct_elt>"
7146   [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
7147         (unspec:VSTRUCT_3QD
7148           [(match_operand:VSTRUCT_3QD 1 "aarch64_simd_struct_operand" "Utv")]
7149           UNSPEC_LD1))]
7150   "TARGET_SIMD"
7151   "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
7152   [(set_attr "type" "neon_load1_3reg<q>")]
7153 )
7154
7155 (define_expand "aarch64_ld1x4<vstruct_elt>"
7156   [(match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
7157    (match_operand:DI 1 "register_operand" "r")]
7158   "TARGET_SIMD"
7159 {
7160   rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
7161   emit_insn (gen_aarch64_ld1_x4_<vstruct_elt> (operands[0], mem));
7162   DONE;
7163 })
7164
7165 (define_insn "aarch64_ld1_x4_<vstruct_elt>"
7166   [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
7167         (unspec:VSTRUCT_4QD
7168           [(match_operand:VSTRUCT_4QD 1 "aarch64_simd_struct_operand" "Utv")]
7169         UNSPEC_LD1))]
7170   "TARGET_SIMD"
7171   "ld1\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
7172   [(set_attr "type" "neon_load1_4reg<q>")]
7173 )
7174
7175 (define_expand "aarch64_st1x2<vstruct_elt>"
7176   [(match_operand:DI 0 "register_operand")
7177    (match_operand:VSTRUCT_2QD 1 "register_operand")]
7178   "TARGET_SIMD"
7179 {
7180   rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7181   emit_insn (gen_aarch64_st1_x2_<vstruct_elt> (mem, operands[1]));
7182   DONE;
7183 })
7184
7185 (define_insn "aarch64_st1_x2_<vstruct_elt>"
7186   [(set (match_operand:VSTRUCT_2QD 0 "aarch64_simd_struct_operand" "=Utv")
7187         (unspec:VSTRUCT_2QD
7188                 [(match_operand:VSTRUCT_2QD 1 "register_operand" "w")]
7189                 UNSPEC_ST1))]
7190   "TARGET_SIMD"
7191   "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
7192   [(set_attr "type" "neon_store1_2reg<q>")]
7193 )
7194
7195 (define_expand "aarch64_st1x3<vstruct_elt>"
7196   [(match_operand:DI 0 "register_operand")
7197    (match_operand:VSTRUCT_3QD 1 "register_operand")]
7198   "TARGET_SIMD"
7199 {
7200   rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7201   emit_insn (gen_aarch64_st1_x3_<vstruct_elt> (mem, operands[1]));
7202   DONE;
7203 })
7204
7205 (define_insn "aarch64_st1_x3_<vstruct_elt>"
7206   [(set (match_operand:VSTRUCT_3QD 0 "aarch64_simd_struct_operand" "=Utv")
7207         (unspec:VSTRUCT_3QD
7208                 [(match_operand:VSTRUCT_3QD 1 "register_operand" "w")]
7209                 UNSPEC_ST1))]
7210   "TARGET_SIMD"
7211   "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
7212   [(set_attr "type" "neon_store1_3reg<q>")]
7213 )
7214
7215 (define_expand "aarch64_st1x4<vstruct_elt>"
7216   [(match_operand:DI 0 "register_operand" "")
7217    (match_operand:VSTRUCT_4QD 1 "register_operand" "")]
7218   "TARGET_SIMD"
7219 {
7220   rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7221   emit_insn (gen_aarch64_st1_x4_<vstruct_elt> (mem, operands[1]));
7222   DONE;
7223 })
7224
7225 (define_insn "aarch64_st1_x4_<vstruct_elt>"
7226   [(set (match_operand:VSTRUCT_4QD 0 "aarch64_simd_struct_operand" "=Utv")
7227         (unspec:VSTRUCT_4QD
7228                 [(match_operand:VSTRUCT_4QD 1 "register_operand" "w")]
7229                 UNSPEC_ST1))]
7230   "TARGET_SIMD"
7231   "st1\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
7232   [(set_attr "type" "neon_store1_4reg<q>")]
7233 )
7234
7235 (define_insn "*aarch64_mov<mode>"
7236   [(set (match_operand:VSTRUCT_QD 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
7237         (match_operand:VSTRUCT_QD 1 "aarch64_simd_general_operand" " w,w,Utv"))]
7238   "TARGET_SIMD && !BYTES_BIG_ENDIAN
7239    && (register_operand (operands[0], <MODE>mode)
7240        || register_operand (operands[1], <MODE>mode))"
7241   "@
7242    #
7243    st1\\t{%S1.<Vtype> - %<Vendreg>1.<Vtype>}, %0
7244    ld1\\t{%S0.<Vtype> - %<Vendreg>0.<Vtype>}, %1"
7245   [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
7246                      neon_load<nregs>_<nregs>reg_q")
7247    (set_attr "length" "<insn_count>,4,4")]
7248 )
7249
7250 (define_insn "*aarch64_mov<mode>"
7251   [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
7252         (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
7253   "TARGET_SIMD && !BYTES_BIG_ENDIAN
7254    && (register_operand (operands[0], <MODE>mode)
7255        || register_operand (operands[1], <MODE>mode))"
7256   "@
7257    #
7258    st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
7259    ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
7260   [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
7261                      neon_load<nregs>_<nregs>reg_q")
7262    (set_attr "length" "<insn_count>,4,4")]
7263 )
7264
7265 (define_insn "*aarch64_movv8di"
7266   [(set (match_operand:V8DI 0 "nonimmediate_operand" "=r,m,r")
7267         (match_operand:V8DI 1 "general_operand" " r,r,m"))]
7268   "!BYTES_BIG_ENDIAN
7269    && (register_operand (operands[0], V8DImode)
7270        || register_operand (operands[1], V8DImode))"
7271   "#"
7272   [(set_attr "type" "multiple,multiple,multiple")
7273    (set_attr "length" "32,16,16")]
7274 )
7275
7276 (define_insn "aarch64_be_ld1<mode>"
7277   [(set (match_operand:VALLDI_F16 0     "register_operand" "=w")
7278         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
7279                              "aarch64_simd_struct_operand" "Utv")]
7280         UNSPEC_LD1))]
7281   "TARGET_SIMD"
7282   "ld1\\t{%0<Vmtype>}, %1"
7283   [(set_attr "type" "neon_load1_1reg<q>")]
7284 )
7285
7286 (define_insn "aarch64_be_st1<mode>"
7287   [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
7288         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
7289         UNSPEC_ST1))]
7290   "TARGET_SIMD"
7291   "st1\\t{%1<Vmtype>}, %0"
7292   [(set_attr "type" "neon_store1_1reg<q>")]
7293 )
7294
7295 (define_insn "*aarch64_be_mov<mode>"
7296   [(set (match_operand:VSTRUCT_2D 0 "nonimmediate_operand" "=w,m,w")
7297         (match_operand:VSTRUCT_2D 1 "general_operand"      " w,w,m"))]
7298   "TARGET_SIMD && BYTES_BIG_ENDIAN
7299    && (register_operand (operands[0], <MODE>mode)
7300        || register_operand (operands[1], <MODE>mode))"
7301   "@
7302    #
7303    stp\\t%d1, %R1, %0
7304    ldp\\t%d0, %R0, %1"
7305   [(set_attr "type" "multiple,neon_stp,neon_ldp")
7306    (set_attr "length" "8,4,4")]
7307 )
7308
7309 (define_insn "*aarch64_be_mov<mode>"
7310   [(set (match_operand:VSTRUCT_2Q 0 "nonimmediate_operand" "=w,m,w")
7311         (match_operand:VSTRUCT_2Q 1 "general_operand"      " w,w,m"))]
7312   "TARGET_SIMD && BYTES_BIG_ENDIAN
7313    && (register_operand (operands[0], <MODE>mode)
7314        || register_operand (operands[1], <MODE>mode))"
7315   "@
7316    #
7317    stp\\t%q1, %R1, %0
7318    ldp\\t%q0, %R0, %1"
7319   [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
7320    (set_attr "length" "8,4,4")]
7321 )
7322
7323 (define_insn "*aarch64_be_movoi"
7324   [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
7325         (match_operand:OI 1 "general_operand"      " w,w,m"))]
7326   "TARGET_SIMD && BYTES_BIG_ENDIAN
7327    && (register_operand (operands[0], OImode)
7328        || register_operand (operands[1], OImode))"
7329   "@
7330    #
7331    stp\\t%q1, %R1, %0
7332    ldp\\t%q0, %R0, %1"
7333   [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
7334    (set_attr "length" "8,4,4")]
7335 )
7336
7337 (define_insn "*aarch64_be_mov<mode>"
7338   [(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand" "=w,o,w")
7339         (match_operand:VSTRUCT_3QD 1 "general_operand"      " w,w,o"))]
7340   "TARGET_SIMD && BYTES_BIG_ENDIAN
7341    && (register_operand (operands[0], <MODE>mode)
7342        || register_operand (operands[1], <MODE>mode))"
7343   "#"
7344   [(set_attr "type" "multiple")
7345    (set_attr "length" "12,8,8")]
7346 )
7347
7348 (define_insn "*aarch64_be_movci"
7349   [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
7350         (match_operand:CI 1 "general_operand"      " w,w,o"))]
7351   "TARGET_SIMD && BYTES_BIG_ENDIAN
7352    && (register_operand (operands[0], CImode)
7353        || register_operand (operands[1], CImode))"
7354   "#"
7355   [(set_attr "type" "multiple")
7356    (set_attr "length" "12,4,4")]
7357 )
7358
7359 (define_insn "*aarch64_be_mov<mode>"
7360   [(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand" "=w,o,w")
7361         (match_operand:VSTRUCT_4QD 1 "general_operand"      " w,w,o"))]
7362   "TARGET_SIMD && BYTES_BIG_ENDIAN
7363    && (register_operand (operands[0], <MODE>mode)
7364        || register_operand (operands[1], <MODE>mode))"
7365   "#"
7366   [(set_attr "type" "multiple")
7367    (set_attr "length" "16,8,8")]
7368 )
7369
7370 (define_insn "*aarch64_be_movxi"
7371   [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
7372         (match_operand:XI 1 "general_operand"      " w,w,o"))]
7373   "TARGET_SIMD && BYTES_BIG_ENDIAN
7374    && (register_operand (operands[0], XImode)
7375        || register_operand (operands[1], XImode))"
7376   "#"
7377   [(set_attr "type" "multiple")
7378    (set_attr "length" "16,4,4")]
7379 )
7380
7381 (define_split
7382   [(set (match_operand:VSTRUCT_2QD 0 "register_operand")
7383         (match_operand:VSTRUCT_2QD 1 "register_operand"))]
7384   "TARGET_SIMD && reload_completed"
7385   [(const_int 0)]
7386 {
7387   aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 2);
7388   DONE;
7389 })
7390
7391 (define_split
7392   [(set (match_operand:OI 0 "register_operand")
7393         (match_operand:OI 1 "register_operand"))]
7394   "TARGET_SIMD && reload_completed"
7395   [(const_int 0)]
7396 {
7397   aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
7398   DONE;
7399 })
7400
7401 (define_split
7402   [(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand")
7403         (match_operand:VSTRUCT_3QD 1 "general_operand"))]
7404   "TARGET_SIMD && reload_completed"
7405   [(const_int 0)]
7406 {
7407   if (register_operand (operands[0], <MODE>mode)
7408       && register_operand (operands[1], <MODE>mode))
7409     {
7410       aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 3);
7411       DONE;
7412     }
7413   else if (BYTES_BIG_ENDIAN)
7414     {
7415       int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
7416       machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
7417       emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
7418                                            <MODE>mode, 0),
7419                       simplify_gen_subreg (pair_mode, operands[1],
7420                                            <MODE>mode, 0));
7421       emit_move_insn (gen_lowpart (<VSTRUCT_ELT>mode,
7422                                    simplify_gen_subreg (<VSTRUCT_ELT>mode,
7423                                                         operands[0],
7424                                                         <MODE>mode,
7425                                                         2 * elt_size)),
7426                       gen_lowpart (<VSTRUCT_ELT>mode,
7427                                    simplify_gen_subreg (<VSTRUCT_ELT>mode,
7428                                                         operands[1],
7429                                                         <MODE>mode,
7430                                                         2 * elt_size)));
7431       DONE;
7432     }
7433   else
7434     FAIL;
7435 })
7436
7437 (define_split
7438   [(set (match_operand:CI 0 "nonimmediate_operand")
7439         (match_operand:CI 1 "general_operand"))]
7440   "TARGET_SIMD && reload_completed"
7441   [(const_int 0)]
7442 {
7443   if (register_operand (operands[0], CImode)
7444       && register_operand (operands[1], CImode))
7445     {
7446       aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
7447       DONE;
7448     }
7449   else if (BYTES_BIG_ENDIAN)
7450     {
7451       emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
7452                       simplify_gen_subreg (OImode, operands[1], CImode, 0));
7453       emit_move_insn (gen_lowpart (V16QImode,
7454                                    simplify_gen_subreg (TImode, operands[0],
7455                                                         CImode, 32)),
7456                       gen_lowpart (V16QImode,
7457                                    simplify_gen_subreg (TImode, operands[1],
7458                                                         CImode, 32)));
7459       DONE;
7460     }
7461   else
7462     FAIL;
7463 })
7464
7465 (define_split
7466   [(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand")
7467         (match_operand:VSTRUCT_4QD 1 "general_operand"))]
7468   "TARGET_SIMD && reload_completed"
7469   [(const_int 0)]
7470 {
7471   if (register_operand (operands[0], <MODE>mode)
7472       && register_operand (operands[1], <MODE>mode))
7473     {
7474       aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 4);
7475       DONE;
7476     }
7477   else if (BYTES_BIG_ENDIAN)
7478     {
7479       int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
7480       machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
7481       emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
7482                                            <MODE>mode, 0),
7483                       simplify_gen_subreg (pair_mode, operands[1],
7484                                            <MODE>mode, 0));
7485       emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
7486                                            <MODE>mode, 2 * elt_size),
7487                       simplify_gen_subreg (pair_mode, operands[1],
7488                                            <MODE>mode, 2 * elt_size));
7489       DONE;
7490     }
7491   else
7492     FAIL;
7493 })
7494
7495 (define_split
7496   [(set (match_operand:XI 0 "nonimmediate_operand")
7497         (match_operand:XI 1 "general_operand"))]
7498   "TARGET_SIMD && reload_completed"
7499   [(const_int 0)]
7500 {
7501   if (register_operand (operands[0], XImode)
7502       && register_operand (operands[1], XImode))
7503     {
7504       aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
7505       DONE;
7506     }
7507   else if (BYTES_BIG_ENDIAN)
7508     {
7509       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
7510                       simplify_gen_subreg (OImode, operands[1], XImode, 0));
7511       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
7512                       simplify_gen_subreg (OImode, operands[1], XImode, 32));
7513       DONE;
7514     }
7515   else
7516     FAIL;
7517 })
7518
7519 (define_split
7520   [(set (match_operand:V8DI 0 "nonimmediate_operand")
7521         (match_operand:V8DI 1 "general_operand"))]
7522   "TARGET_SIMD && reload_completed"
7523   [(const_int 0)]
7524 {
7525   if (register_operand (operands[0], V8DImode)
7526       && register_operand (operands[1], V8DImode))
7527     {
7528       aarch64_simd_emit_reg_reg_move (operands, DImode, 8);
7529       DONE;
7530     }
7531   else if ((register_operand (operands[0], V8DImode)
7532             && memory_operand (operands[1], V8DImode))
7533            || (memory_operand (operands[0], V8DImode)
7534             && register_operand (operands[1], V8DImode)))
7535     {
7536       for (int offset = 0; offset < 64; offset += 16)
7537         emit_move_insn (simplify_gen_subreg (TImode, operands[0],
7538                                              V8DImode, offset),
7539                         simplify_gen_subreg (TImode, operands[1],
7540                                              V8DImode, offset));
7541       DONE;
7542     }
7543   else
7544     FAIL;
7545 })
7546
7547 (define_expand "aarch64_ld<nregs>r<vstruct_elt>"
7548   [(match_operand:VSTRUCT_QD 0 "register_operand")
7549    (match_operand:DI 1 "register_operand")]
7550   "TARGET_SIMD"
7551 {
7552   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
7553   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
7554
7555   emit_insn (gen_aarch64_simd_ld<nregs>r<vstruct_elt> (operands[0], mem));
7556   DONE;
7557 })
7558
7559 (define_insn "aarch64_ld2<vstruct_elt>_dreg"
7560   [(set (match_operand:VSTRUCT_2DNX 0 "register_operand" "=w")
7561         (unspec:VSTRUCT_2DNX [
7562           (match_operand:VSTRUCT_2DNX 1 "aarch64_simd_struct_operand" "Utv")]
7563           UNSPEC_LD2_DREG))]
7564   "TARGET_SIMD"
7565   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
7566   [(set_attr "type" "neon_load2_2reg<q>")]
7567 )
7568
7569 (define_insn "aarch64_ld2<vstruct_elt>_dreg"
7570   [(set (match_operand:VSTRUCT_2DX 0 "register_operand" "=w")
7571         (unspec:VSTRUCT_2DX [
7572           (match_operand:VSTRUCT_2DX 1 "aarch64_simd_struct_operand" "Utv")]
7573           UNSPEC_LD2_DREG))]
7574   "TARGET_SIMD"
7575   "ld1\\t{%S0.1d - %T0.1d}, %1"
7576   [(set_attr "type" "neon_load1_2reg<q>")]
7577 )
7578
7579 (define_insn "aarch64_ld3<vstruct_elt>_dreg"
7580   [(set (match_operand:VSTRUCT_3DNX 0 "register_operand" "=w")
7581         (unspec:VSTRUCT_3DNX [
7582           (match_operand:VSTRUCT_3DNX 1 "aarch64_simd_struct_operand" "Utv")]
7583           UNSPEC_LD3_DREG))]
7584   "TARGET_SIMD"
7585   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
7586   [(set_attr "type" "neon_load3_3reg<q>")]
7587 )
7588
7589 (define_insn "aarch64_ld3<vstruct_elt>_dreg"
7590   [(set (match_operand:VSTRUCT_3DX 0 "register_operand" "=w")
7591         (unspec:VSTRUCT_3DX [
7592           (match_operand:VSTRUCT_3DX 1 "aarch64_simd_struct_operand" "Utv")]
7593           UNSPEC_LD3_DREG))]
7594   "TARGET_SIMD"
7595   "ld1\\t{%S0.1d - %U0.1d}, %1"
7596   [(set_attr "type" "neon_load1_3reg<q>")]
7597 )
7598
7599 (define_insn "aarch64_ld4<vstruct_elt>_dreg"
7600   [(set (match_operand:VSTRUCT_4DNX 0 "register_operand" "=w")
7601         (unspec:VSTRUCT_4DNX [
7602           (match_operand:VSTRUCT_4DNX 1 "aarch64_simd_struct_operand" "Utv")]
7603           UNSPEC_LD4_DREG))]
7604   "TARGET_SIMD"
7605   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
7606   [(set_attr "type" "neon_load4_4reg<q>")]
7607 )
7608
7609 (define_insn "aarch64_ld4<vstruct_elt>_dreg"
7610   [(set (match_operand:VSTRUCT_4DX 0 "register_operand" "=w")
7611         (unspec:VSTRUCT_4DX [
7612           (match_operand:VSTRUCT_4DX 1 "aarch64_simd_struct_operand" "Utv")]
7613           UNSPEC_LD4_DREG))]
7614   "TARGET_SIMD"
7615   "ld1\\t{%S0.1d - %V0.1d}, %1"
7616   [(set_attr "type" "neon_load1_4reg<q>")]
7617 )
7618
7619 (define_expand "aarch64_ld<nregs><vstruct_elt>"
7620  [(match_operand:VSTRUCT_D 0 "register_operand")
7621   (match_operand:DI 1 "register_operand")]
7622   "TARGET_SIMD"
7623 {
7624   rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
7625   emit_insn (gen_aarch64_ld<nregs><vstruct_elt>_dreg (operands[0], mem));
7626   DONE;
7627 })
7628
7629 (define_expand "aarch64_ld1<VALL_F16:mode>"
7630  [(match_operand:VALL_F16 0 "register_operand")
7631   (match_operand:DI 1 "register_operand")]
7632   "TARGET_SIMD"
7633 {
7634   machine_mode mode = <VALL_F16:MODE>mode;
7635   rtx mem = gen_rtx_MEM (mode, operands[1]);
7636
7637   if (BYTES_BIG_ENDIAN)
7638     emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
7639   else
7640     emit_move_insn (operands[0], mem);
7641   DONE;
7642 })
7643
7644 (define_expand "aarch64_ld<nregs><vstruct_elt>"
7645  [(match_operand:VSTRUCT_Q 0 "register_operand")
7646   (match_operand:DI 1 "register_operand")]
7647   "TARGET_SIMD"
7648 {
7649   rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
7650   emit_insn (gen_aarch64_simd_ld<nregs><vstruct_elt> (operands[0], mem));
7651   DONE;
7652 })
7653
7654 (define_expand "aarch64_ld1x2<vstruct_elt>"
7655  [(match_operand:VSTRUCT_2QD 0 "register_operand")
7656   (match_operand:DI 1 "register_operand")]
7657   "TARGET_SIMD"
7658 {
7659   machine_mode mode = <MODE>mode;
7660   rtx mem = gen_rtx_MEM (mode, operands[1]);
7661
7662   emit_insn (gen_aarch64_simd_ld1<vstruct_elt>_x2 (operands[0], mem));
7663   DONE;
7664 })
7665
7666 (define_expand "aarch64_ld<nregs>_lane<vstruct_elt>"
7667   [(match_operand:VSTRUCT_QD 0 "register_operand")
7668         (match_operand:DI 1 "register_operand")
7669         (match_operand:VSTRUCT_QD 2 "register_operand")
7670         (match_operand:SI 3 "immediate_operand")]
7671   "TARGET_SIMD"
7672 {
7673   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
7674   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
7675
7676   aarch64_simd_lane_bounds (operands[3], 0,
7677                 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>, NULL);
7678   emit_insn (gen_aarch64_vec_load_lanes<mode>_lane<vstruct_elt> (operands[0],
7679                                 mem, operands[2], operands[3]));
7680   DONE;
7681 })
7682
7683 ;; Permuted-store expanders for neon intrinsics.
7684
7685 ;; Permute instructions
7686
7687 ;; vec_perm support
7688
7689 (define_expand "vec_perm<mode>"
7690   [(match_operand:VB 0 "register_operand")
7691    (match_operand:VB 1 "register_operand")
7692    (match_operand:VB 2 "register_operand")
7693    (match_operand:VB 3 "register_operand")]
7694   "TARGET_SIMD"
7695 {
7696   aarch64_expand_vec_perm (operands[0], operands[1],
7697                            operands[2], operands[3], <nunits>);
7698   DONE;
7699 })
7700
7701 (define_insn "aarch64_qtbl1<mode>"
7702   [(set (match_operand:VB 0 "register_operand" "=w")
7703         (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
7704                     (match_operand:VB 2 "register_operand" "w")]
7705                    UNSPEC_TBL))]
7706   "TARGET_SIMD"
7707   "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
7708   [(set_attr "type" "neon_tbl1<q>")]
7709 )
7710
7711 (define_insn "aarch64_qtbx1<mode>"
7712   [(set (match_operand:VB 0 "register_operand" "=w")
7713         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
7714                     (match_operand:V16QI 2 "register_operand" "w")
7715                     (match_operand:VB 3 "register_operand" "w")]
7716                    UNSPEC_TBX))]
7717   "TARGET_SIMD"
7718   "tbx\\t%0.<Vtype>, {%2.16b}, %3.<Vtype>"
7719   [(set_attr "type" "neon_tbl1<q>")]
7720 )
7721
7722 ;; Two source registers.
7723
7724 (define_insn "aarch64_qtbl2<mode>"
7725   [(set (match_operand:VB 0 "register_operand" "=w")
7726         (unspec:VB [(match_operand:V2x16QI 1 "register_operand" "w")
7727                       (match_operand:VB 2 "register_operand" "w")]
7728                       UNSPEC_TBL))]
7729   "TARGET_SIMD"
7730   "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
7731   [(set_attr "type" "neon_tbl2")]
7732 )
7733
7734 (define_insn "aarch64_qtbx2<mode>"
7735   [(set (match_operand:VB 0 "register_operand" "=w")
7736         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
7737                       (match_operand:V2x16QI 2 "register_operand" "w")
7738                       (match_operand:VB 3 "register_operand" "w")]
7739                       UNSPEC_TBX))]
7740   "TARGET_SIMD"
7741   "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
7742   [(set_attr "type" "neon_tbl2")]
7743 )
7744
7745 ;; Three source registers.
7746
7747 (define_insn "aarch64_qtbl3<mode>"
7748   [(set (match_operand:VB 0 "register_operand" "=w")
7749         (unspec:VB [(match_operand:V3x16QI 1 "register_operand" "w")
7750                       (match_operand:VB 2 "register_operand" "w")]
7751                       UNSPEC_TBL))]
7752   "TARGET_SIMD"
7753   "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
7754   [(set_attr "type" "neon_tbl3")]
7755 )
7756
7757 (define_insn "aarch64_qtbx3<mode>"
7758   [(set (match_operand:VB 0 "register_operand" "=w")
7759         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
7760                       (match_operand:V3x16QI 2 "register_operand" "w")
7761                       (match_operand:VB 3 "register_operand" "w")]
7762                       UNSPEC_TBX))]
7763   "TARGET_SIMD"
7764   "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
7765   [(set_attr "type" "neon_tbl3")]
7766 )
7767
7768 ;; Four source registers.
7769
7770 (define_insn "aarch64_qtbl4<mode>"
7771   [(set (match_operand:VB 0 "register_operand" "=w")
7772         (unspec:VB [(match_operand:V4x16QI 1 "register_operand" "w")
7773                       (match_operand:VB 2 "register_operand" "w")]
7774                       UNSPEC_TBL))]
7775   "TARGET_SIMD"
7776   "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
7777   [(set_attr "type" "neon_tbl4")]
7778 )
7779
7780 (define_insn "aarch64_qtbx4<mode>"
7781   [(set (match_operand:VB 0 "register_operand" "=w")
7782         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
7783                       (match_operand:V4x16QI 2 "register_operand" "w")
7784                       (match_operand:VB 3 "register_operand" "w")]
7785                       UNSPEC_TBX))]
7786   "TARGET_SIMD"
7787   "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
7788   [(set_attr "type" "neon_tbl4")]
7789 )
7790
7791 (define_insn_and_split "aarch64_combinev16qi"
7792   [(set (match_operand:V2x16QI 0 "register_operand" "=w")
7793         (unspec:V2x16QI [(match_operand:V16QI 1 "register_operand" "w")
7794                          (match_operand:V16QI 2 "register_operand" "w")]
7795                         UNSPEC_CONCAT))]
7796   "TARGET_SIMD"
7797   "#"
7798   "&& reload_completed"
7799   [(const_int 0)]
7800 {
7801   aarch64_split_combinev16qi (operands);
7802   DONE;
7803 }
7804 [(set_attr "type" "multiple")]
7805 )
7806
7807 ;; This instruction's pattern is generated directly by
7808 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
7809 ;; need corresponding changes there.
7810 (define_insn "aarch64_<PERMUTE:perm_insn><mode>"
7811   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
7812         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
7813                           (match_operand:VALL_F16 2 "register_operand" "w")]
7814          PERMUTE))]
7815   "TARGET_SIMD"
7816   "<PERMUTE:perm_insn>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
7817   [(set_attr "type" "neon_permute<q>")]
7818 )
7819
7820 ;; This instruction's pattern is generated directly by
7821 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
7822 ;; need corresponding changes there.  Note that the immediate (third)
7823 ;; operand is a lane index not a byte index.
7824 (define_insn "aarch64_ext<mode>"
7825   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
7826         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
7827                           (match_operand:VALL_F16 2 "register_operand" "w")
7828                           (match_operand:SI 3 "immediate_operand" "i")]
7829          UNSPEC_EXT))]
7830   "TARGET_SIMD"
7831 {
7832   operands[3] = GEN_INT (INTVAL (operands[3])
7833       * GET_MODE_UNIT_SIZE (<MODE>mode));
7834   return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
7835 }
7836   [(set_attr "type" "neon_ext<q>")]
7837 )
7838
7839 ;; This instruction's pattern is generated directly by
7840 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
7841 ;; need corresponding changes there.
7842 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
7843   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
7844         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
7845                     REVERSE))]
7846   "TARGET_SIMD"
7847   "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
7848   [(set_attr "type" "neon_rev<q>")]
7849 )
7850
7851 (define_insn "aarch64_st2<vstruct_elt>_dreg"
7852   [(set (match_operand:VSTRUCT_2DNX 0 "aarch64_simd_struct_operand" "=Utv")
7853         (unspec:VSTRUCT_2DNX [
7854                 (match_operand:VSTRUCT_2DNX 1 "register_operand" "w")]
7855                 UNSPEC_ST2))]
7856   "TARGET_SIMD"
7857   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
7858   [(set_attr "type" "neon_store2_2reg")]
7859 )
7860
7861 (define_insn "aarch64_st2<vstruct_elt>_dreg"
7862   [(set (match_operand:VSTRUCT_2DX 0 "aarch64_simd_struct_operand" "=Utv")
7863         (unspec:VSTRUCT_2DX [
7864                 (match_operand:VSTRUCT_2DX 1 "register_operand" "w")]
7865                 UNSPEC_ST2))]
7866   "TARGET_SIMD"
7867   "st1\\t{%S1.1d - %T1.1d}, %0"
7868   [(set_attr "type" "neon_store1_2reg")]
7869 )
7870
7871 (define_insn "aarch64_st3<vstruct_elt>_dreg"
7872   [(set (match_operand:VSTRUCT_3DNX 0 "aarch64_simd_struct_operand" "=Utv")
7873         (unspec:VSTRUCT_3DNX [
7874                 (match_operand:VSTRUCT_3DNX 1 "register_operand" "w")]
7875                 UNSPEC_ST3))]
7876   "TARGET_SIMD"
7877   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
7878   [(set_attr "type" "neon_store3_3reg")]
7879 )
7880
7881 (define_insn "aarch64_st3<vstruct_elt>_dreg"
7882   [(set (match_operand:VSTRUCT_3DX 0 "aarch64_simd_struct_operand" "=Utv")
7883         (unspec:VSTRUCT_3DX [
7884                 (match_operand:VSTRUCT_3DX 1 "register_operand" "w")]
7885                 UNSPEC_ST3))]
7886   "TARGET_SIMD"
7887   "st1\\t{%S1.1d - %U1.1d}, %0"
7888   [(set_attr "type" "neon_store1_3reg")]
7889 )
7890
7891 (define_insn "aarch64_st4<vstruct_elt>_dreg"
7892   [(set (match_operand:VSTRUCT_4DNX 0 "aarch64_simd_struct_operand" "=Utv")
7893         (unspec:VSTRUCT_4DNX [
7894                 (match_operand:VSTRUCT_4DNX 1 "register_operand" "w")]
7895                 UNSPEC_ST4))]
7896   "TARGET_SIMD"
7897   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
7898   [(set_attr "type" "neon_store4_4reg")]
7899 )
7900
7901 (define_insn "aarch64_st4<vstruct_elt>_dreg"
7902   [(set (match_operand:VSTRUCT_4DX 0 "aarch64_simd_struct_operand" "=Utv")
7903         (unspec:VSTRUCT_4DX [
7904                 (match_operand:VSTRUCT_4DX 1 "register_operand" "w")]
7905                 UNSPEC_ST4))]
7906   "TARGET_SIMD"
7907   "st1\\t{%S1.1d - %V1.1d}, %0"
7908   [(set_attr "type" "neon_store1_4reg")]
7909 )
7910
7911 (define_expand "aarch64_st<nregs><vstruct_elt>"
7912  [(match_operand:DI 0 "register_operand")
7913   (match_operand:VSTRUCT_D 1 "register_operand")]
7914   "TARGET_SIMD"
7915 {
7916   rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7917   emit_insn (gen_aarch64_st<nregs><vstruct_elt>_dreg (mem, operands[1]));
7918   DONE;
7919 })
7920
7921 (define_expand "aarch64_st<nregs><vstruct_elt>"
7922  [(match_operand:DI 0 "register_operand")
7923   (match_operand:VSTRUCT_Q 1 "register_operand")]
7924   "TARGET_SIMD"
7925 {
7926   rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7927   emit_insn (gen_aarch64_simd_st<nregs><vstruct_elt> (mem, operands[1]));
7928   DONE;
7929 })
7930
7931 (define_expand "aarch64_st<nregs>_lane<vstruct_elt>"
7932  [(match_operand:DI 0 "register_operand")
7933   (match_operand:VSTRUCT_QD 1 "register_operand")
7934   (match_operand:SI 2 "immediate_operand")]
7935   "TARGET_SIMD"
7936 {
7937   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
7938   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
7939
7940   aarch64_simd_lane_bounds (operands[2], 0,
7941                 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>, NULL);
7942   emit_insn (gen_aarch64_vec_store_lanes<mode>_lane<vstruct_elt> (mem,
7943                                         operands[1], operands[2]));
7944   DONE;
7945 })
7946
7947 (define_expand "aarch64_st1<VALL_F16:mode>"
7948  [(match_operand:DI 0 "register_operand")
7949   (match_operand:VALL_F16 1 "register_operand")]
7950   "TARGET_SIMD"
7951 {
7952   machine_mode mode = <VALL_F16:MODE>mode;
7953   rtx mem = gen_rtx_MEM (mode, operands[0]);
7954
7955   if (BYTES_BIG_ENDIAN)
7956     emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
7957   else
7958     emit_move_insn (mem, operands[1]);
7959   DONE;
7960 })
7961
7962 ;; Standard pattern name vec_init<mode><Vel>.
7963
7964 (define_expand "vec_init<mode><Vel>"
7965   [(match_operand:VALL_F16 0 "register_operand")
7966    (match_operand 1 "" "")]
7967   "TARGET_SIMD"
7968 {
7969   aarch64_expand_vector_init (operands[0], operands[1]);
7970   DONE;
7971 })
7972
7973 (define_expand "vec_init<mode><Vhalf>"
7974   [(match_operand:VQ_NO2E 0 "register_operand")
7975    (match_operand 1 "" "")]
7976   "TARGET_SIMD"
7977 {
7978   aarch64_expand_vector_init (operands[0], operands[1]);
7979   DONE;
7980 })
7981
7982 (define_insn "*aarch64_simd_ld1r<mode>"
7983   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
7984         (vec_duplicate:VALL_F16
7985           (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
7986   "TARGET_SIMD"
7987   "ld1r\\t{%0.<Vtype>}, %1"
7988   [(set_attr "type" "neon_load1_all_lanes")]
7989 )
7990
7991 (define_insn "aarch64_simd_ld1<vstruct_elt>_x2"
7992   [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
7993         (unspec:VSTRUCT_2QD [
7994             (match_operand:VSTRUCT_2QD 1 "aarch64_simd_struct_operand" "Utv")]
7995             UNSPEC_LD1))]
7996   "TARGET_SIMD"
7997   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
7998   [(set_attr "type" "neon_load1_2reg<q>")]
7999 )
8000
8001
8002 (define_insn "@aarch64_frecpe<mode>"
8003   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
8004         (unspec:VHSDF_HSDF
8005          [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
8006          UNSPEC_FRECPE))]
8007   "TARGET_SIMD"
8008   "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
8009   [(set_attr "type" "neon_fp_recpe_<stype><q>")]
8010 )
8011
8012 (define_insn "aarch64_frecpx<mode>"
8013   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
8014         (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
8015          UNSPEC_FRECPX))]
8016   "TARGET_SIMD"
8017   "frecpx\t%<s>0, %<s>1"
8018   [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
8019 )
8020
8021 (define_insn "@aarch64_frecps<mode>"
8022   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
8023         (unspec:VHSDF_HSDF
8024           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
8025           (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
8026           UNSPEC_FRECPS))]
8027   "TARGET_SIMD"
8028   "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
8029   [(set_attr "type" "neon_fp_recps_<stype><q>")]
8030 )
8031
8032 (define_insn "aarch64_urecpe<mode>"
8033   [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
8034         (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
8035                 UNSPEC_URECPE))]
8036  "TARGET_SIMD"
8037  "urecpe\\t%0.<Vtype>, %1.<Vtype>"
8038   [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
8039
8040 ;; Standard pattern name vec_extract<mode><Vel>.
8041
8042 (define_expand "vec_extract<mode><Vel>"
8043   [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
8044    (match_operand:VALL_F16 1 "register_operand")
8045    (match_operand:SI 2 "immediate_operand")]
8046   "TARGET_SIMD"
8047 {
8048     emit_insn
8049       (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
8050     DONE;
8051 })
8052
8053 ;; Extract a 64-bit vector from one half of a 128-bit vector.
8054 (define_expand "vec_extract<mode><Vhalf>"
8055   [(match_operand:<VHALF> 0 "register_operand")
8056    (match_operand:VQMOV_NO2E 1 "register_operand")
8057    (match_operand 2 "immediate_operand")]
8058   "TARGET_SIMD"
8059 {
8060   int start = INTVAL (operands[2]);
8061   if (start != 0 && start != <nunits> / 2)
8062     FAIL;
8063   rtx sel = aarch64_gen_stepped_int_parallel (<nunits> / 2, start, 1);
8064   emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], sel));
8065   DONE;
8066 })
8067
8068 ;; Extract a single-element 64-bit vector from one half of a 128-bit vector.
8069 (define_expand "vec_extractv2dfv1df"
8070   [(match_operand:V1DF 0 "register_operand")
8071    (match_operand:V2DF 1 "register_operand")
8072    (match_operand 2 "immediate_operand")]
8073   "TARGET_SIMD"
8074 {
8075   /* V1DF is rarely used by other patterns, so it should be better to hide
8076      it in a subreg destination of a normal DF op.  */
8077   rtx scalar0 = gen_lowpart (DFmode, operands[0]);
8078   emit_insn (gen_vec_extractv2dfdf (scalar0, operands[1], operands[2]));
8079   DONE;
8080 })
8081
8082 ;; aes
8083
8084 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
8085   [(set (match_operand:V16QI 0 "register_operand" "=w")
8086         (unspec:V16QI
8087                 [(xor:V16QI
8088                  (match_operand:V16QI 1 "register_operand" "%0")
8089                  (match_operand:V16QI 2 "register_operand" "w"))]
8090          CRYPTO_AES))]
8091   "TARGET_SIMD && TARGET_AES"
8092   "aes<aes_op>\\t%0.16b, %2.16b"
8093   [(set_attr "type" "crypto_aese")]
8094 )
8095
8096 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
8097   [(set (match_operand:V16QI 0 "register_operand" "=w")
8098         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
8099          CRYPTO_AESMC))]
8100   "TARGET_SIMD && TARGET_AES"
8101   "aes<aesmc_op>\\t%0.16b, %1.16b"
8102   [(set_attr "type" "crypto_aesmc")]
8103 )
8104
8105 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
8106 ;; and enforce the register dependency without scheduling or register
8107 ;; allocation messing up the order or introducing moves inbetween.
8108 ;;  Mash the two together during combine.
8109
8110 (define_insn "*aarch64_crypto_aese_fused"
8111   [(set (match_operand:V16QI 0 "register_operand" "=w")
8112         (unspec:V16QI
8113           [(unspec:V16QI
8114            [(xor:V16QI
8115                 (match_operand:V16QI 1 "register_operand" "%0")
8116                 (match_operand:V16QI 2 "register_operand" "w"))]
8117              UNSPEC_AESE)]
8118         UNSPEC_AESMC))]
8119   "TARGET_SIMD && TARGET_AES
8120    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
8121   "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
8122   [(set_attr "type" "crypto_aese")
8123    (set_attr "length" "8")]
8124 )
8125
8126 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
8127 ;; and enforce the register dependency without scheduling or register
8128 ;; allocation messing up the order or introducing moves inbetween.
8129 ;;  Mash the two together during combine.
8130
8131 (define_insn "*aarch64_crypto_aesd_fused"
8132   [(set (match_operand:V16QI 0 "register_operand" "=w")
8133         (unspec:V16QI
8134           [(unspec:V16QI
8135                     [(xor:V16QI
8136                         (match_operand:V16QI 1 "register_operand" "%0")
8137                         (match_operand:V16QI 2 "register_operand" "w"))]
8138                 UNSPEC_AESD)]
8139           UNSPEC_AESIMC))]
8140   "TARGET_SIMD && TARGET_AES
8141    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
8142   "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
8143   [(set_attr "type" "crypto_aese")
8144    (set_attr "length" "8")]
8145 )
8146
8147 ;; sha1
8148
8149 (define_insn "aarch64_crypto_sha1hsi"
8150   [(set (match_operand:SI 0 "register_operand" "=w")
8151         (unspec:SI [(match_operand:SI 1
8152                        "register_operand" "w")]
8153          UNSPEC_SHA1H))]
8154   "TARGET_SIMD && TARGET_SHA2"
8155   "sha1h\\t%s0, %s1"
8156   [(set_attr "type" "crypto_sha1_fast")]
8157 )
8158
8159 (define_insn "aarch64_crypto_sha1hv4si"
8160   [(set (match_operand:SI 0 "register_operand" "=w")
8161         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
8162                      (parallel [(const_int 0)]))]
8163          UNSPEC_SHA1H))]
8164   "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
8165   "sha1h\\t%s0, %s1"
8166   [(set_attr "type" "crypto_sha1_fast")]
8167 )
8168
8169 (define_insn "aarch64_be_crypto_sha1hv4si"
8170   [(set (match_operand:SI 0 "register_operand" "=w")
8171         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
8172                      (parallel [(const_int 3)]))]
8173          UNSPEC_SHA1H))]
8174   "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
8175   "sha1h\\t%s0, %s1"
8176   [(set_attr "type" "crypto_sha1_fast")]
8177 )
8178
8179 (define_insn "aarch64_crypto_sha1su1v4si"
8180   [(set (match_operand:V4SI 0 "register_operand" "=w")
8181         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8182                       (match_operand:V4SI 2 "register_operand" "w")]
8183          UNSPEC_SHA1SU1))]
8184   "TARGET_SIMD && TARGET_SHA2"
8185   "sha1su1\\t%0.4s, %2.4s"
8186   [(set_attr "type" "crypto_sha1_fast")]
8187 )
8188
8189 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
8190   [(set (match_operand:V4SI 0 "register_operand" "=w")
8191         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8192                       (match_operand:SI 2 "register_operand" "w")
8193                       (match_operand:V4SI 3 "register_operand" "w")]
8194          CRYPTO_SHA1))]
8195   "TARGET_SIMD && TARGET_SHA2"
8196   "sha1<sha1_op>\\t%q0, %s2, %3.4s"
8197   [(set_attr "type" "crypto_sha1_slow")]
8198 )
8199
8200 (define_insn "aarch64_crypto_sha1su0v4si"
8201   [(set (match_operand:V4SI 0 "register_operand" "=w")
8202         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8203                       (match_operand:V4SI 2 "register_operand" "w")
8204                       (match_operand:V4SI 3 "register_operand" "w")]
8205          UNSPEC_SHA1SU0))]
8206   "TARGET_SIMD && TARGET_SHA2"
8207   "sha1su0\\t%0.4s, %2.4s, %3.4s"
8208   [(set_attr "type" "crypto_sha1_xor")]
8209 )
8210
8211 ;; sha256
8212
8213 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
8214   [(set (match_operand:V4SI 0 "register_operand" "=w")
8215         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8216                       (match_operand:V4SI 2 "register_operand" "w")
8217                       (match_operand:V4SI 3 "register_operand" "w")]
8218          CRYPTO_SHA256))]
8219   "TARGET_SIMD && TARGET_SHA2"
8220   "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
8221   [(set_attr "type" "crypto_sha256_slow")]
8222 )
8223
8224 (define_insn "aarch64_crypto_sha256su0v4si"
8225   [(set (match_operand:V4SI 0 "register_operand" "=w")
8226         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8227                       (match_operand:V4SI 2 "register_operand" "w")]
8228          UNSPEC_SHA256SU0))]
8229   "TARGET_SIMD && TARGET_SHA2"
8230   "sha256su0\\t%0.4s, %2.4s"
8231   [(set_attr "type" "crypto_sha256_fast")]
8232 )
8233
8234 (define_insn "aarch64_crypto_sha256su1v4si"
8235   [(set (match_operand:V4SI 0 "register_operand" "=w")
8236         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8237                       (match_operand:V4SI 2 "register_operand" "w")
8238                       (match_operand:V4SI 3 "register_operand" "w")]
8239          UNSPEC_SHA256SU1))]
8240   "TARGET_SIMD && TARGET_SHA2"
8241   "sha256su1\\t%0.4s, %2.4s, %3.4s"
8242   [(set_attr "type" "crypto_sha256_slow")]
8243 )
8244
8245 ;; sha512
8246
8247 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
8248   [(set (match_operand:V2DI 0 "register_operand" "=w")
8249         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8250                       (match_operand:V2DI 2 "register_operand" "w")
8251                       (match_operand:V2DI 3 "register_operand" "w")]
8252          CRYPTO_SHA512))]
8253   "TARGET_SIMD && TARGET_SHA3"
8254   "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
8255   [(set_attr "type" "crypto_sha512")]
8256 )
8257
8258 (define_insn "aarch64_crypto_sha512su0qv2di"
8259   [(set (match_operand:V2DI 0 "register_operand" "=w")
8260         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8261                       (match_operand:V2DI 2 "register_operand" "w")]
8262          UNSPEC_SHA512SU0))]
8263   "TARGET_SIMD && TARGET_SHA3"
8264   "sha512su0\\t%0.2d, %2.2d"
8265   [(set_attr "type" "crypto_sha512")]
8266 )
8267
8268 (define_insn "aarch64_crypto_sha512su1qv2di"
8269   [(set (match_operand:V2DI 0 "register_operand" "=w")
8270         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8271                       (match_operand:V2DI 2 "register_operand" "w")
8272                       (match_operand:V2DI 3 "register_operand" "w")]
8273          UNSPEC_SHA512SU1))]
8274   "TARGET_SIMD && TARGET_SHA3"
8275   "sha512su1\\t%0.2d, %2.2d, %3.2d"
8276   [(set_attr "type" "crypto_sha512")]
8277 )
8278
8279 ;; sha3
8280
8281 (define_insn "eor3q<mode>4"
8282   [(set (match_operand:VQ_I 0 "register_operand" "=w")
8283         (xor:VQ_I
8284          (xor:VQ_I
8285           (match_operand:VQ_I 2 "register_operand" "w")
8286           (match_operand:VQ_I 3 "register_operand" "w"))
8287          (match_operand:VQ_I 1 "register_operand" "w")))]
8288   "TARGET_SIMD && TARGET_SHA3"
8289   "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
8290   [(set_attr "type" "crypto_sha3")]
8291 )
8292
8293 (define_insn "aarch64_rax1qv2di"
8294   [(set (match_operand:V2DI 0 "register_operand" "=w")
8295         (xor:V2DI
8296          (rotate:V2DI
8297           (match_operand:V2DI 2 "register_operand" "w")
8298           (const_int 1))
8299          (match_operand:V2DI 1 "register_operand" "w")))]
8300   "TARGET_SIMD && TARGET_SHA3"
8301   "rax1\\t%0.2d, %1.2d, %2.2d"
8302   [(set_attr "type" "crypto_sha3")]
8303 )
8304
8305 (define_insn "aarch64_xarqv2di"
8306   [(set (match_operand:V2DI 0 "register_operand" "=w")
8307         (rotatert:V2DI
8308          (xor:V2DI
8309           (match_operand:V2DI 1 "register_operand" "%w")
8310           (match_operand:V2DI 2 "register_operand" "w"))
8311          (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
8312   "TARGET_SIMD && TARGET_SHA3"
8313   "xar\\t%0.2d, %1.2d, %2.2d, %3"
8314   [(set_attr "type" "crypto_sha3")]
8315 )
8316
8317 (define_insn "bcaxq<mode>4"
8318   [(set (match_operand:VQ_I 0 "register_operand" "=w")
8319         (xor:VQ_I
8320          (and:VQ_I
8321           (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
8322           (match_operand:VQ_I 2 "register_operand" "w"))
8323          (match_operand:VQ_I 1 "register_operand" "w")))]
8324   "TARGET_SIMD && TARGET_SHA3"
8325   "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
8326   [(set_attr "type" "crypto_sha3")]
8327 )
8328
8329 ;; SM3
8330
8331 (define_insn "aarch64_sm3ss1qv4si"
8332   [(set (match_operand:V4SI 0 "register_operand" "=w")
8333         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
8334                       (match_operand:V4SI 2 "register_operand" "w")
8335                       (match_operand:V4SI 3 "register_operand" "w")]
8336          UNSPEC_SM3SS1))]
8337   "TARGET_SIMD && TARGET_SM4"
8338   "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
8339   [(set_attr "type" "crypto_sm3")]
8340 )
8341
8342
8343 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
8344   [(set (match_operand:V4SI 0 "register_operand" "=w")
8345         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8346                       (match_operand:V4SI 2 "register_operand" "w")
8347                       (match_operand:V4SI 3 "register_operand" "w")
8348                       (match_operand:SI 4 "aarch64_imm2" "Ui2")]
8349          CRYPTO_SM3TT))]
8350   "TARGET_SIMD && TARGET_SM4"
8351   "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
8352   [(set_attr "type" "crypto_sm3")]
8353 )
8354
8355 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
8356   [(set (match_operand:V4SI 0 "register_operand" "=w")
8357         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8358                       (match_operand:V4SI 2 "register_operand" "w")
8359                       (match_operand:V4SI 3 "register_operand" "w")]
8360          CRYPTO_SM3PART))]
8361   "TARGET_SIMD && TARGET_SM4"
8362   "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
8363   [(set_attr "type" "crypto_sm3")]
8364 )
8365
8366 ;; SM4
8367
8368 (define_insn "aarch64_sm4eqv4si"
8369   [(set (match_operand:V4SI 0 "register_operand" "=w")
8370         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8371                       (match_operand:V4SI 2 "register_operand" "w")]
8372          UNSPEC_SM4E))]
8373   "TARGET_SIMD && TARGET_SM4"
8374   "sm4e\\t%0.4s, %2.4s"
8375   [(set_attr "type" "crypto_sm4")]
8376 )
8377
8378 (define_insn "aarch64_sm4ekeyqv4si"
8379   [(set (match_operand:V4SI 0 "register_operand" "=w")
8380         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
8381                       (match_operand:V4SI 2 "register_operand" "w")]
8382          UNSPEC_SM4EKEY))]
8383   "TARGET_SIMD && TARGET_SM4"
8384   "sm4ekey\\t%0.4s, %1.4s, %2.4s"
8385   [(set_attr "type" "crypto_sm4")]
8386 )
8387
8388 ;; fp16fml
8389
8390 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
8391   [(set (match_operand:VDQSF 0 "register_operand")
8392         (unspec:VDQSF
8393          [(match_operand:VDQSF 1 "register_operand")
8394           (match_operand:<VFMLA_W> 2 "register_operand")
8395           (match_operand:<VFMLA_W> 3 "register_operand")]
8396          VFMLA16_LOW))]
8397   "TARGET_F16FML"
8398 {
8399   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
8400                                             <nunits> * 2, false);
8401   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
8402                                             <nunits> * 2, false);
8403
8404   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
8405                                                                 operands[1],
8406                                                                 operands[2],
8407                                                                 operands[3],
8408                                                                 p1, p2));
8409   DONE;
8410
8411 })
8412
8413 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
8414   [(set (match_operand:VDQSF 0 "register_operand")
8415         (unspec:VDQSF
8416          [(match_operand:VDQSF 1 "register_operand")
8417           (match_operand:<VFMLA_W> 2 "register_operand")
8418           (match_operand:<VFMLA_W> 3 "register_operand")]
8419          VFMLA16_HIGH))]
8420   "TARGET_F16FML"
8421 {
8422   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
8423   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
8424
8425   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
8426                                                                  operands[1],
8427                                                                  operands[2],
8428                                                                  operands[3],
8429                                                                  p1, p2));
8430   DONE;
8431 })
8432
8433 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
8434   [(set (match_operand:VDQSF 0 "register_operand" "=w")
8435         (fma:VDQSF
8436          (float_extend:VDQSF
8437           (vec_select:<VFMLA_SEL_W>
8438            (match_operand:<VFMLA_W> 2 "register_operand" "w")
8439            (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
8440          (float_extend:VDQSF
8441           (vec_select:<VFMLA_SEL_W>
8442            (match_operand:<VFMLA_W> 3 "register_operand" "w")
8443            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
8444          (match_operand:VDQSF 1 "register_operand" "0")))]
8445   "TARGET_F16FML"
8446   "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
8447   [(set_attr "type" "neon_fp_mul_s")]
8448 )
8449
8450 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
8451   [(set (match_operand:VDQSF 0 "register_operand" "=w")
8452         (fma:VDQSF
8453          (float_extend:VDQSF
8454           (neg:<VFMLA_SEL_W>
8455            (vec_select:<VFMLA_SEL_W>
8456             (match_operand:<VFMLA_W> 2 "register_operand" "w")
8457             (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
8458          (float_extend:VDQSF
8459           (vec_select:<VFMLA_SEL_W>
8460            (match_operand:<VFMLA_W> 3 "register_operand" "w")
8461            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
8462          (match_operand:VDQSF 1 "register_operand" "0")))]
8463   "TARGET_F16FML"
8464   "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
8465   [(set_attr "type" "neon_fp_mul_s")]
8466 )
8467
8468 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
8469   [(set (match_operand:VDQSF 0 "register_operand" "=w")
8470         (fma:VDQSF
8471          (float_extend:VDQSF
8472           (vec_select:<VFMLA_SEL_W>
8473            (match_operand:<VFMLA_W> 2 "register_operand" "w")
8474            (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
8475          (float_extend:VDQSF
8476           (vec_select:<VFMLA_SEL_W>
8477            (match_operand:<VFMLA_W> 3 "register_operand" "w")
8478            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
8479          (match_operand:VDQSF 1 "register_operand" "0")))]
8480   "TARGET_F16FML"
8481   "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
8482   [(set_attr "type" "neon_fp_mul_s")]
8483 )
8484
8485 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
8486   [(set (match_operand:VDQSF 0 "register_operand" "=w")
8487         (fma:VDQSF
8488          (float_extend:VDQSF
8489           (neg:<VFMLA_SEL_W>
8490            (vec_select:<VFMLA_SEL_W>
8491             (match_operand:<VFMLA_W> 2 "register_operand" "w")
8492             (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
8493          (float_extend:VDQSF
8494           (vec_select:<VFMLA_SEL_W>
8495            (match_operand:<VFMLA_W> 3 "register_operand" "w")
8496            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
8497          (match_operand:VDQSF 1 "register_operand" "0")))]
8498   "TARGET_F16FML"
8499   "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
8500   [(set_attr "type" "neon_fp_mul_s")]
8501 )
8502
8503 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
8504   [(set (match_operand:V2SF 0 "register_operand")
8505         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
8506                            (match_operand:V4HF 2 "register_operand")
8507                            (match_operand:V4HF 3 "register_operand")
8508                            (match_operand:SI 4 "aarch64_imm2")]
8509          VFMLA16_LOW))]
8510   "TARGET_F16FML"
8511 {
8512     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
8513     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
8514
8515     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
8516                                                             operands[1],
8517                                                             operands[2],
8518                                                             operands[3],
8519                                                             p1, lane));
8520     DONE;
8521 }
8522 )
8523
8524 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
8525   [(set (match_operand:V2SF 0 "register_operand")
8526         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
8527                            (match_operand:V4HF 2 "register_operand")
8528                            (match_operand:V4HF 3 "register_operand")
8529                            (match_operand:SI 4 "aarch64_imm2")]
8530          VFMLA16_HIGH))]
8531   "TARGET_F16FML"
8532 {
8533     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
8534     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
8535
8536     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
8537                                                              operands[1],
8538                                                              operands[2],
8539                                                              operands[3],
8540                                                              p1, lane));
8541     DONE;
8542 })
8543
8544 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
8545   [(set (match_operand:V2SF 0 "register_operand" "=w")
8546         (fma:V2SF
8547          (float_extend:V2SF
8548            (vec_select:V2HF
8549             (match_operand:V4HF 2 "register_operand" "w")
8550             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
8551          (float_extend:V2SF
8552            (vec_duplicate:V2HF
8553             (vec_select:HF
8554              (match_operand:V4HF 3 "register_operand" "x")
8555              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8556          (match_operand:V2SF 1 "register_operand" "0")))]
8557   "TARGET_F16FML"
8558   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
8559   [(set_attr "type" "neon_fp_mul_s")]
8560 )
8561
8562 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
8563   [(set (match_operand:V2SF 0 "register_operand" "=w")
8564         (fma:V2SF
8565          (float_extend:V2SF
8566           (neg:V2HF
8567            (vec_select:V2HF
8568             (match_operand:V4HF 2 "register_operand" "w")
8569             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
8570          (float_extend:V2SF
8571           (vec_duplicate:V2HF
8572            (vec_select:HF
8573             (match_operand:V4HF 3 "register_operand" "x")
8574             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8575          (match_operand:V2SF 1 "register_operand" "0")))]
8576   "TARGET_F16FML"
8577   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
8578   [(set_attr "type" "neon_fp_mul_s")]
8579 )
8580
8581 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
8582   [(set (match_operand:V2SF 0 "register_operand" "=w")
8583         (fma:V2SF
8584          (float_extend:V2SF
8585            (vec_select:V2HF
8586             (match_operand:V4HF 2 "register_operand" "w")
8587             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
8588          (float_extend:V2SF
8589            (vec_duplicate:V2HF
8590             (vec_select:HF
8591              (match_operand:V4HF 3 "register_operand" "x")
8592              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8593          (match_operand:V2SF 1 "register_operand" "0")))]
8594   "TARGET_F16FML"
8595   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
8596   [(set_attr "type" "neon_fp_mul_s")]
8597 )
8598
8599 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
8600   [(set (match_operand:V2SF 0 "register_operand" "=w")
8601         (fma:V2SF
8602          (float_extend:V2SF
8603            (neg:V2HF
8604             (vec_select:V2HF
8605              (match_operand:V4HF 2 "register_operand" "w")
8606              (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
8607          (float_extend:V2SF
8608            (vec_duplicate:V2HF
8609             (vec_select:HF
8610              (match_operand:V4HF 3 "register_operand" "x")
8611              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8612          (match_operand:V2SF 1 "register_operand" "0")))]
8613   "TARGET_F16FML"
8614   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
8615   [(set_attr "type" "neon_fp_mul_s")]
8616 )
8617
8618 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
8619   [(set (match_operand:V4SF 0 "register_operand")
8620         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
8621                            (match_operand:V8HF 2 "register_operand")
8622                            (match_operand:V8HF 3 "register_operand")
8623                            (match_operand:SI 4 "aarch64_lane_imm3")]
8624          VFMLA16_LOW))]
8625   "TARGET_F16FML"
8626 {
8627     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
8628     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
8629
8630     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
8631                                                               operands[1],
8632                                                               operands[2],
8633                                                               operands[3],
8634                                                               p1, lane));
8635     DONE;
8636 })
8637
8638 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
8639   [(set (match_operand:V4SF 0 "register_operand")
8640         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
8641                            (match_operand:V8HF 2 "register_operand")
8642                            (match_operand:V8HF 3 "register_operand")
8643                            (match_operand:SI 4 "aarch64_lane_imm3")]
8644          VFMLA16_HIGH))]
8645   "TARGET_F16FML"
8646 {
8647     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
8648     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
8649
8650     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
8651                                                                operands[1],
8652                                                                operands[2],
8653                                                                operands[3],
8654                                                                p1, lane));
8655     DONE;
8656 })
8657
8658 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
8659   [(set (match_operand:V4SF 0 "register_operand" "=w")
8660         (fma:V4SF
8661          (float_extend:V4SF
8662           (vec_select:V4HF
8663             (match_operand:V8HF 2 "register_operand" "w")
8664             (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
8665          (float_extend:V4SF
8666           (vec_duplicate:V4HF
8667            (vec_select:HF
8668             (match_operand:V8HF 3 "register_operand" "x")
8669             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
8670          (match_operand:V4SF 1 "register_operand" "0")))]
8671   "TARGET_F16FML"
8672   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
8673   [(set_attr "type" "neon_fp_mul_s")]
8674 )
8675
8676 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
8677   [(set (match_operand:V4SF 0 "register_operand" "=w")
8678         (fma:V4SF
8679           (float_extend:V4SF
8680            (neg:V4HF
8681             (vec_select:V4HF
8682              (match_operand:V8HF 2 "register_operand" "w")
8683              (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
8684          (float_extend:V4SF
8685           (vec_duplicate:V4HF
8686            (vec_select:HF
8687             (match_operand:V8HF 3 "register_operand" "x")
8688             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
8689          (match_operand:V4SF 1 "register_operand" "0")))]
8690   "TARGET_F16FML"
8691   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
8692   [(set_attr "type" "neon_fp_mul_s")]
8693 )
8694
8695 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
8696   [(set (match_operand:V4SF 0 "register_operand" "=w")
8697         (fma:V4SF
8698          (float_extend:V4SF
8699           (vec_select:V4HF
8700             (match_operand:V8HF 2 "register_operand" "w")
8701             (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
8702          (float_extend:V4SF
8703           (vec_duplicate:V4HF
8704            (vec_select:HF
8705             (match_operand:V8HF 3 "register_operand" "x")
8706             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
8707          (match_operand:V4SF 1 "register_operand" "0")))]
8708   "TARGET_F16FML"
8709   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
8710   [(set_attr "type" "neon_fp_mul_s")]
8711 )
8712
8713 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
8714   [(set (match_operand:V4SF 0 "register_operand" "=w")
8715         (fma:V4SF
8716          (float_extend:V4SF
8717           (neg:V4HF
8718            (vec_select:V4HF
8719             (match_operand:V8HF 2 "register_operand" "w")
8720             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
8721          (float_extend:V4SF
8722           (vec_duplicate:V4HF
8723            (vec_select:HF
8724             (match_operand:V8HF 3 "register_operand" "x")
8725             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
8726          (match_operand:V4SF 1 "register_operand" "0")))]
8727   "TARGET_F16FML"
8728   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
8729   [(set_attr "type" "neon_fp_mul_s")]
8730 )
8731
8732 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
8733   [(set (match_operand:V2SF 0 "register_operand")
8734         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
8735                       (match_operand:V4HF 2 "register_operand")
8736                       (match_operand:V8HF 3 "register_operand")
8737                       (match_operand:SI 4 "aarch64_lane_imm3")]
8738          VFMLA16_LOW))]
8739   "TARGET_F16FML"
8740 {
8741     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
8742     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
8743
8744     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
8745                                                              operands[1],
8746                                                              operands[2],
8747                                                              operands[3],
8748                                                              p1, lane));
8749     DONE;
8750
8751 })
8752
8753 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
8754   [(set (match_operand:V2SF 0 "register_operand")
8755         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
8756                       (match_operand:V4HF 2 "register_operand")
8757                       (match_operand:V8HF 3 "register_operand")
8758                       (match_operand:SI 4 "aarch64_lane_imm3")]
8759          VFMLA16_HIGH))]
8760   "TARGET_F16FML"
8761 {
8762     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
8763     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
8764
8765     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
8766                                                               operands[1],
8767                                                               operands[2],
8768                                                               operands[3],
8769                                                               p1, lane));
8770     DONE;
8771
8772 })
8773
8774 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
8775   [(set (match_operand:V2SF 0 "register_operand" "=w")
8776         (fma:V2SF
8777          (float_extend:V2SF
8778            (vec_select:V2HF
8779             (match_operand:V4HF 2 "register_operand" "w")
8780             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
8781          (float_extend:V2SF
8782           (vec_duplicate:V2HF
8783            (vec_select:HF
8784             (match_operand:V8HF 3 "register_operand" "x")
8785             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
8786          (match_operand:V2SF 1 "register_operand" "0")))]
8787   "TARGET_F16FML"
8788   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
8789   [(set_attr "type" "neon_fp_mul_s")]
8790 )
8791
8792 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
8793   [(set (match_operand:V2SF 0 "register_operand" "=w")
8794         (fma:V2SF
8795          (float_extend:V2SF
8796           (neg:V2HF
8797            (vec_select:V2HF
8798             (match_operand:V4HF 2 "register_operand" "w")
8799             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
8800          (float_extend:V2SF
8801           (vec_duplicate:V2HF
8802            (vec_select:HF
8803             (match_operand:V8HF 3 "register_operand" "x")
8804             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
8805          (match_operand:V2SF 1 "register_operand" "0")))]
8806   "TARGET_F16FML"
8807   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
8808   [(set_attr "type" "neon_fp_mul_s")]
8809 )
8810
8811 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
8812   [(set (match_operand:V2SF 0 "register_operand" "=w")
8813         (fma:V2SF
8814          (float_extend:V2SF
8815            (vec_select:V2HF
8816             (match_operand:V4HF 2 "register_operand" "w")
8817             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
8818          (float_extend:V2SF
8819           (vec_duplicate:V2HF
8820            (vec_select:HF
8821             (match_operand:V8HF 3 "register_operand" "x")
8822             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
8823          (match_operand:V2SF 1 "register_operand" "0")))]
8824   "TARGET_F16FML"
8825   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
8826   [(set_attr "type" "neon_fp_mul_s")]
8827 )
8828
8829 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
8830   [(set (match_operand:V2SF 0 "register_operand" "=w")
8831         (fma:V2SF
8832          (float_extend:V2SF
8833           (neg:V2HF
8834            (vec_select:V2HF
8835             (match_operand:V4HF 2 "register_operand" "w")
8836             (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
8837          (float_extend:V2SF
8838           (vec_duplicate:V2HF
8839            (vec_select:HF
8840             (match_operand:V8HF 3 "register_operand" "x")
8841             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
8842          (match_operand:V2SF 1 "register_operand" "0")))]
8843   "TARGET_F16FML"
8844   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
8845   [(set_attr "type" "neon_fp_mul_s")]
8846 )
8847
8848 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
8849   [(set (match_operand:V4SF 0 "register_operand")
8850         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
8851                       (match_operand:V8HF 2 "register_operand")
8852                       (match_operand:V4HF 3 "register_operand")
8853                       (match_operand:SI 4 "aarch64_imm2")]
8854          VFMLA16_LOW))]
8855   "TARGET_F16FML"
8856 {
8857     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
8858     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
8859
8860     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
8861                                                              operands[1],
8862                                                              operands[2],
8863                                                              operands[3],
8864                                                              p1, lane));
8865     DONE;
8866 })
8867
8868 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
8869   [(set (match_operand:V4SF 0 "register_operand")
8870         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
8871                       (match_operand:V8HF 2 "register_operand")
8872                       (match_operand:V4HF 3 "register_operand")
8873                       (match_operand:SI 4 "aarch64_imm2")]
8874          VFMLA16_HIGH))]
8875   "TARGET_F16FML"
8876 {
8877     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
8878     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
8879
8880     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
8881                                                               operands[1],
8882                                                               operands[2],
8883                                                               operands[3],
8884                                                               p1, lane));
8885     DONE;
8886 })
8887
8888 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
8889   [(set (match_operand:V4SF 0 "register_operand" "=w")
8890         (fma:V4SF
8891          (float_extend:V4SF
8892           (vec_select:V4HF
8893            (match_operand:V8HF 2 "register_operand" "w")
8894            (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
8895          (float_extend:V4SF
8896           (vec_duplicate:V4HF
8897            (vec_select:HF
8898             (match_operand:V4HF 3 "register_operand" "x")
8899             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8900          (match_operand:V4SF 1 "register_operand" "0")))]
8901   "TARGET_F16FML"
8902   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
8903   [(set_attr "type" "neon_fp_mul_s")]
8904 )
8905
8906 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
8907   [(set (match_operand:V4SF 0 "register_operand" "=w")
8908         (fma:V4SF
8909          (float_extend:V4SF
8910           (neg:V4HF
8911            (vec_select:V4HF
8912             (match_operand:V8HF 2 "register_operand" "w")
8913             (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
8914          (float_extend:V4SF
8915           (vec_duplicate:V4HF
8916            (vec_select:HF
8917             (match_operand:V4HF 3 "register_operand" "x")
8918             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8919          (match_operand:V4SF 1 "register_operand" "0")))]
8920   "TARGET_F16FML"
8921   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
8922   [(set_attr "type" "neon_fp_mul_s")]
8923 )
8924
8925 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
8926   [(set (match_operand:V4SF 0 "register_operand" "=w")
8927         (fma:V4SF
8928          (float_extend:V4SF
8929           (vec_select:V4HF
8930            (match_operand:V8HF 2 "register_operand" "w")
8931            (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
8932          (float_extend:V4SF
8933           (vec_duplicate:V4HF
8934            (vec_select:HF
8935             (match_operand:V4HF 3 "register_operand" "x")
8936             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8937          (match_operand:V4SF 1 "register_operand" "0")))]
8938   "TARGET_F16FML"
8939   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
8940   [(set_attr "type" "neon_fp_mul_s")]
8941 )
8942
8943 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
8944   [(set (match_operand:V4SF 0 "register_operand" "=w")
8945         (fma:V4SF
8946          (float_extend:V4SF
8947           (neg:V4HF
8948            (vec_select:V4HF
8949             (match_operand:V8HF 2 "register_operand" "w")
8950             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
8951          (float_extend:V4SF
8952           (vec_duplicate:V4HF
8953            (vec_select:HF
8954             (match_operand:V4HF 3 "register_operand" "x")
8955             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8956          (match_operand:V4SF 1 "register_operand" "0")))]
8957   "TARGET_F16FML"
8958   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
8959   [(set_attr "type" "neon_fp_mul_s")]
8960 )
8961
8962 ;; pmull
8963
8964 (define_insn "aarch64_crypto_pmulldi"
8965   [(set (match_operand:TI 0 "register_operand" "=w")
8966         (unspec:TI  [(match_operand:DI 1 "register_operand" "w")
8967                      (match_operand:DI 2 "register_operand" "w")]
8968                     UNSPEC_PMULL))]
8969  "TARGET_SIMD && TARGET_AES"
8970  "pmull\\t%0.1q, %1.1d, %2.1d"
8971   [(set_attr "type" "crypto_pmull")]
8972 )
8973
8974 (define_insn "aarch64_crypto_pmullv2di"
8975  [(set (match_operand:TI 0 "register_operand" "=w")
8976        (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
8977                    (match_operand:V2DI 2 "register_operand" "w")]
8978                   UNSPEC_PMULL2))]
8979   "TARGET_SIMD && TARGET_AES"
8980   "pmull2\\t%0.1q, %1.2d, %2.2d"
8981   [(set_attr "type" "crypto_pmull")]
8982 )
8983
8984 ;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector.
8985 (define_insn "<optab><Vnarrowq><mode>2"
8986   [(set (match_operand:VQN 0 "register_operand" "=w")
8987         (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
8988   "TARGET_SIMD"
8989   "<su>xtl\t%0.<Vtype>, %1.<Vntype>"
8990   [(set_attr "type" "neon_shift_imm_long")]
8991 )
8992
8993 (define_expand "aarch64_<su>xtl<mode>"
8994   [(set (match_operand:VQN 0 "register_operand" "=w")
8995         (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
8996   "TARGET_SIMD"
8997   ""
8998 )
8999
9000 ;; Truncate a 128-bit integer vector to a 64-bit vector.
9001 (define_insn "trunc<mode><Vnarrowq>2"
9002   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
9003         (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
9004   "TARGET_SIMD"
9005   "xtn\t%0.<Vntype>, %1.<Vtype>"
9006   [(set_attr "type" "neon_move_narrow_q")]
9007 )
9008
9009 (define_insn "aarch64_bfdot<mode>"
9010   [(set (match_operand:VDQSF 0 "register_operand" "=w")
9011         (plus:VDQSF
9012           (unspec:VDQSF
9013            [(match_operand:<VBFMLA_W> 2 "register_operand" "w")
9014             (match_operand:<VBFMLA_W> 3 "register_operand" "w")]
9015             UNSPEC_BFDOT)
9016           (match_operand:VDQSF 1 "register_operand" "0")))]
9017   "TARGET_BF16_SIMD"
9018   "bfdot\t%0.<Vtype>, %2.<Vbfdottype>, %3.<Vbfdottype>"
9019   [(set_attr "type" "neon_dot<q>")]
9020 )
9021
9022 (define_insn "aarch64_bfdot_lane<VBF:isquadop><VDQSF:mode>"
9023   [(set (match_operand:VDQSF 0 "register_operand" "=w")
9024         (plus:VDQSF
9025           (unspec:VDQSF
9026            [(match_operand:<VDQSF:VBFMLA_W> 2 "register_operand" "w")
9027             (match_operand:VBF 3 "register_operand" "w")
9028             (match_operand:SI 4 "const_int_operand" "n")]
9029             UNSPEC_BFDOT)
9030           (match_operand:VDQSF 1 "register_operand" "0")))]
9031   "TARGET_BF16_SIMD"
9032 {
9033   int nunits = GET_MODE_NUNITS (<VBF:MODE>mode).to_constant ();
9034   int lane = INTVAL (operands[4]);
9035   operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane), SImode);
9036   return "bfdot\t%0.<VDQSF:Vtype>, %2.<VDQSF:Vbfdottype>, %3.2h[%4]";
9037 }
9038   [(set_attr "type" "neon_dot<VDQSF:q>")]
9039 )
9040
9041 ;; vget_low/high_bf16
9042 (define_expand "aarch64_vget_lo_halfv8bf"
9043   [(match_operand:V4BF 0 "register_operand")
9044    (match_operand:V8BF 1 "register_operand")]
9045   "TARGET_BF16_SIMD"
9046 {
9047   rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, false);
9048   emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
9049   DONE;
9050 })
9051
9052 (define_expand "aarch64_vget_hi_halfv8bf"
9053   [(match_operand:V4BF 0 "register_operand")
9054    (match_operand:V8BF 1 "register_operand")]
9055   "TARGET_BF16_SIMD"
9056 {
9057   rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, true);
9058   emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
9059   DONE;
9060 })
9061
9062 ;; bfmmla
9063 (define_insn "aarch64_bfmmlaqv4sf"
9064   [(set (match_operand:V4SF 0 "register_operand" "=w")
9065         (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
9066                    (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9067                                  (match_operand:V8BF 3 "register_operand" "w")]
9068                     UNSPEC_BFMMLA)))]
9069   "TARGET_BF16_SIMD"
9070   "bfmmla\\t%0.4s, %2.8h, %3.8h"
9071   [(set_attr "type" "neon_fp_mla_s_q")]
9072 )
9073
9074 ;; bfmlal<bt>
9075 (define_insn "aarch64_bfmlal<bt>v4sf"
9076   [(set (match_operand:V4SF 0 "register_operand" "=w")
9077         (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
9078                     (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9079                                   (match_operand:V8BF 3 "register_operand" "w")]
9080                      BF_MLA)))]
9081   "TARGET_BF16_SIMD"
9082   "bfmlal<bt>\\t%0.4s, %2.8h, %3.8h"
9083   [(set_attr "type" "neon_fp_mla_s_q")]
9084 )
9085
9086 (define_insn "aarch64_bfmlal<bt>_lane<q>v4sf"
9087   [(set (match_operand:V4SF 0 "register_operand" "=w")
9088         (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
9089                     (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9090                                   (match_operand:VBF 3 "register_operand" "w")
9091                                   (match_operand:SI 4 "const_int_operand" "n")]
9092                      BF_MLA)))]
9093   "TARGET_BF16_SIMD"
9094 {
9095   operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
9096   return "bfmlal<bt>\\t%0.4s, %2.8h, %3.h[%4]";
9097 }
9098   [(set_attr "type" "neon_fp_mla_s_scalar_q")]
9099 )
9100
9101 ;; 8-bit integer matrix multiply-accumulate
9102 (define_insn "aarch64_simd_<sur>mmlav16qi"
9103   [(set (match_operand:V4SI 0 "register_operand" "=w")
9104         (plus:V4SI
9105          (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w")
9106                        (match_operand:V16QI 3 "register_operand" "w")] MATMUL)
9107          (match_operand:V4SI 1 "register_operand" "0")))]
9108   "TARGET_I8MM"
9109   "<sur>mmla\\t%0.4s, %2.16b, %3.16b"
9110   [(set_attr "type" "neon_mla_s_q")]
9111 )
9112
9113 ;; bfcvtn
9114 (define_insn "aarch64_bfcvtn<q><mode>"
9115   [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w")
9116         (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")]
9117                             UNSPEC_BFCVTN))]
9118   "TARGET_BF16_SIMD"
9119   "bfcvtn\\t%0.4h, %1.4s"
9120   [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
9121 )
9122
9123 (define_insn "aarch64_bfcvtn2v8bf"
9124   [(set (match_operand:V8BF 0 "register_operand" "=w")
9125         (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0")
9126                       (match_operand:V4SF 2 "register_operand" "w")]
9127                       UNSPEC_BFCVTN2))]
9128   "TARGET_BF16_SIMD"
9129   "bfcvtn2\\t%0.8h, %2.4s"
9130   [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
9131 )
9132
9133 (define_insn "aarch64_bfcvtbf"
9134   [(set (match_operand:BF 0 "register_operand" "=w")
9135         (unspec:BF [(match_operand:SF 1 "register_operand" "w")]
9136                     UNSPEC_BFCVT))]
9137   "TARGET_BF16_FP"
9138   "bfcvt\\t%h0, %s1"
9139   [(set_attr "type" "f_cvt")]
9140 )
9141
9142 ;; Use shl/shll/shll2 to convert BF scalar/vector modes to SF modes.
9143 (define_insn "aarch64_vbfcvt<mode>"
9144   [(set (match_operand:V4SF 0 "register_operand" "=w")
9145         (unspec:V4SF [(match_operand:VBF 1 "register_operand" "w")]
9146                       UNSPEC_BFCVTN))]
9147   "TARGET_BF16_SIMD"
9148   "shll\\t%0.4s, %1.4h, #16"
9149   [(set_attr "type" "neon_shift_imm_long")]
9150 )
9151
9152 (define_insn "aarch64_vbfcvt_highv8bf"
9153   [(set (match_operand:V4SF 0 "register_operand" "=w")
9154         (unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
9155                       UNSPEC_BFCVTN2))]
9156   "TARGET_BF16_SIMD"
9157   "shll2\\t%0.4s, %1.8h, #16"
9158   [(set_attr "type" "neon_shift_imm_long")]
9159 )
9160
9161 (define_insn "aarch64_bfcvtsf"
9162   [(set (match_operand:SF 0 "register_operand" "=w")
9163         (unspec:SF [(match_operand:BF 1 "register_operand" "w")]
9164                     UNSPEC_BFCVT))]
9165   "TARGET_BF16_FP"
9166   "shl\\t%d0, %d1, #16"
9167   [(set_attr "type" "neon_shift_imm")]
9168 )