gcc/config/aarch64/aarch64-simd.md

   1 ;; Machine description for AArch64 AdvSIMD architecture.
   2 ;; Copyright (C) 2011-2019 Free Software Foundation, Inc.
   3 ;; Contributed by ARM Ltd.
   4 ;;
   5 ;; This file is part of GCC.
   6 ;;
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published by
   9 ;; the Free Software Foundation; either version 3, or (at your option)
  10 ;; any later version.
  11 ;;
  12 ;; GCC is distributed in the hope that it will be useful, but
  13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 ;; General Public License for more details.
  16 ;;
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 (define_expand "mov<mode>"
  22   [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
  23         (match_operand:VALL_F16 1 "general_operand" ""))]
  24   "TARGET_SIMD"
  25   "
  26   /* Force the operand into a register if it is not an
  27      immediate whose use can be replaced with xzr.
  28      If the mode is 16 bytes wide, then we will be doing
  29      a stp in DI mode, so we check the validity of that.
  30      If the mode is 8 bytes wide, then we will do doing a
  31      normal str, so the check need not apply.  */
  32   if (GET_CODE (operands[0]) == MEM
  33       && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
  34            && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
  35                 && aarch64_mem_pair_operand (operands[0], DImode))
  36                || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
  37       operands[1] = force_reg (<MODE>mode, operands[1]);
  38   "
  39 )
  40
  41 (define_expand "movmisalign<mode>"
  42   [(set (match_operand:VALL 0 "nonimmediate_operand" "")
  43         (match_operand:VALL 1 "general_operand" ""))]
  44   "TARGET_SIMD"
  45 {
  46   /* This pattern is not permitted to fail during expansion: if both arguments
  47      are non-registers (e.g. memory := constant, which can be created by the
  48      auto-vectorizer), force operand 1 into a register.  */
  49   if (!register_operand (operands[0], <MODE>mode)
  50       && !register_operand (operands[1], <MODE>mode))
  51     operands[1] = force_reg (<MODE>mode, operands[1]);
  52 })
  53
  54 (define_insn "aarch64_simd_dup<mode>"
  55   [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
  56         (vec_duplicate:VDQ_I
  57           (match_operand:<VEL> 1 "register_operand" "w,?r")))]
  58   "TARGET_SIMD"
  59   "@
  60    dup\\t%0.<Vtype>, %1.<Vetype>[0]
  61    dup\\t%0.<Vtype>, %<vw>1"
  62   [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
  63 )
  64
  65 (define_insn "aarch64_simd_dup<mode>"
  66   [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
  67         (vec_duplicate:VDQF_F16
  68           (match_operand:<VEL> 1 "register_operand" "w")))]
  69   "TARGET_SIMD"
  70   "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
  71   [(set_attr "type" "neon_dup<q>")]
  72 )
  73
  74 (define_insn "aarch64_dup_lane<mode>"
  75   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
  76         (vec_duplicate:VALL_F16
  77           (vec_select:<VEL>
  78             (match_operand:VALL_F16 1 "register_operand" "w")
  79             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  80           )))]
  81   "TARGET_SIMD"
  82   {
  83     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
  84     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
  85   }
  86   [(set_attr "type" "neon_dup<q>")]
  87 )
  88
  89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
  90   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
  91         (vec_duplicate:VALL_F16_NO_V2Q
  92           (vec_select:<VEL>
  93             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
  94             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  95           )))]
  96   "TARGET_SIMD"
  97   {
  98     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
  99     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
 100   }
 101   [(set_attr "type" "neon_dup<q>")]
 102 )
 103
 104 (define_insn "*aarch64_simd_mov<VD:mode>"
 105   [(set (match_operand:VD 0 "nonimmediate_operand"
 106                 "=w, m,  m,  w, ?r, ?w, ?r, w")
 107         (match_operand:VD 1 "general_operand"
 108                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 109   "TARGET_SIMD
 110    && (register_operand (operands[0], <MODE>mode)
 111        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 112 {
 113    switch (which_alternative)
 114      {
 115      case 0: return "ldr\t%d0, %1";
 116      case 1: return "str\txzr, %0";
 117      case 2: return "str\t%d1, %0";
 118      case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 119      case 4: return "umov\t%0, %1.d[0]";
 120      case 5: return "fmov\t%d0, %1";
 121      case 6: return "mov\t%0, %1";
 122      case 7:
 123         return aarch64_output_simd_mov_immediate (operands[1], 64);
 124      default: gcc_unreachable ();
 125      }
 126 }
 127   [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
 128                      neon_logic<q>, neon_to_gp<q>, f_mcr,\
 129                      mov_reg, neon_move<q>")]
 130 )
 131
 132 (define_insn "*aarch64_simd_mov<VQ:mode>"
 133   [(set (match_operand:VQ 0 "nonimmediate_operand"
 134                 "=w, Umn,  m,  w, ?r, ?w, ?r, w")
 135         (match_operand:VQ 1 "general_operand"
 136                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 137   "TARGET_SIMD
 138    && (register_operand (operands[0], <MODE>mode)
 139        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 140 {
 141   switch (which_alternative)
 142     {
 143     case 0:
 144         return "ldr\t%q0, %1";
 145     case 1:
 146         return "stp\txzr, xzr, %0";
 147     case 2:
 148         return "str\t%q1, %0";
 149     case 3:
 150         return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 151     case 4:
 152     case 5:
 153     case 6:
 154         return "#";
 155     case 7:
 156         return aarch64_output_simd_mov_immediate (operands[1], 128);
 157     default:
 158         gcc_unreachable ();
 159     }
 160 }
 161   [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
 162                      neon_logic<q>, multiple, multiple,\
 163                      multiple, neon_move<q>")
 164    (set_attr "length" "4,4,4,4,8,8,8,4")]
 165 )
 166
 167 ;; When storing lane zero we can use the normal STR and its more permissive
 168 ;; addressing modes.
 169
 170 (define_insn "aarch64_store_lane0<mode>"
 171   [(set (match_operand:<VEL> 0 "memory_operand" "=m")
 172         (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
 173                         (parallel [(match_operand 2 "const_int_operand" "n")])))]
 174   "TARGET_SIMD
 175    && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
 176   "str\\t%<Vetype>1, %0"
 177   [(set_attr "type" "neon_store1_1reg<q>")]
 178 )
 179
 180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
 181   [(set (match_operand:DREG 0 "register_operand" "=w")
 182         (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
 183    (set (match_operand:DREG2 2 "register_operand" "=w")
 184         (match_operand:DREG2 3 "memory_operand" "m"))]
 185   "TARGET_SIMD
 186    && rtx_equal_p (XEXP (operands[3], 0),
 187                    plus_constant (Pmode,
 188                                   XEXP (operands[1], 0),
 189                                   GET_MODE_SIZE (<DREG:MODE>mode)))"
 190   "ldp\\t%d0, %d2, %1"
 191   [(set_attr "type" "neon_ldp")]
 192 )
 193
 194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
 195   [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
 196         (match_operand:DREG 1 "register_operand" "w"))
 197    (set (match_operand:DREG2 2 "memory_operand" "=m")
 198         (match_operand:DREG2 3 "register_operand" "w"))]
 199   "TARGET_SIMD
 200    && rtx_equal_p (XEXP (operands[2], 0),
 201                    plus_constant (Pmode,
 202                                   XEXP (operands[0], 0),
 203                                   GET_MODE_SIZE (<DREG:MODE>mode)))"
 204   "stp\\t%d1, %d3, %0"
 205   [(set_attr "type" "neon_stp")]
 206 )
 207
 208 (define_insn "load_pair<VQ:mode><VQ2:mode>"
 209   [(set (match_operand:VQ 0 "register_operand" "=w")
 210         (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
 211    (set (match_operand:VQ2 2 "register_operand" "=w")
 212         (match_operand:VQ2 3 "memory_operand" "m"))]
 213   "TARGET_SIMD
 214     && rtx_equal_p (XEXP (operands[3], 0),
 215                     plus_constant (Pmode,
 216                                XEXP (operands[1], 0),
 217                                GET_MODE_SIZE (<VQ:MODE>mode)))"
 218   "ldp\\t%q0, %q2, %1"
 219   [(set_attr "type" "neon_ldp_q")]
 220 )
 221
 222 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
 223   [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
 224         (match_operand:VQ 1 "register_operand" "w"))
 225    (set (match_operand:VQ2 2 "memory_operand" "=m")
 226         (match_operand:VQ2 3 "register_operand" "w"))]
 227   "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
 228                 plus_constant (Pmode,
 229                                XEXP (operands[0], 0),
 230                                GET_MODE_SIZE (<VQ:MODE>mode)))"
 231   "stp\\t%q1, %q3, %0"
 232   [(set_attr "type" "neon_stp_q")]
 233 )
 234
 235
 236 (define_split
 237   [(set (match_operand:VQ 0 "register_operand" "")
 238       (match_operand:VQ 1 "register_operand" ""))]
 239   "TARGET_SIMD && reload_completed
 240    && GP_REGNUM_P (REGNO (operands[0]))
 241    && GP_REGNUM_P (REGNO (operands[1]))"
 242   [(const_int 0)]
 243 {
 244   aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
 245   DONE;
 246 })
 247
 248 (define_split
 249   [(set (match_operand:VQ 0 "register_operand" "")
 250         (match_operand:VQ 1 "register_operand" ""))]
 251   "TARGET_SIMD && reload_completed
 252    && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
 253        || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
 254   [(const_int 0)]
 255 {
 256   aarch64_split_simd_move (operands[0], operands[1]);
 257   DONE;
 258 })
 259
 260 (define_expand "@aarch64_split_simd_mov<mode>"
 261   [(set (match_operand:VQ 0)
 262         (match_operand:VQ 1))]
 263   "TARGET_SIMD"
 264   {
 265     rtx dst = operands[0];
 266     rtx src = operands[1];
 267
 268     if (GP_REGNUM_P (REGNO (src)))
 269       {
 270         rtx src_low_part = gen_lowpart (<VHALF>mode, src);
 271         rtx src_high_part = gen_highpart (<VHALF>mode, src);
 272
 273         emit_insn
 274           (gen_move_lo_quad_<mode> (dst, src_low_part));
 275         emit_insn
 276           (gen_move_hi_quad_<mode> (dst, src_high_part));
 277       }
 278
 279     else
 280       {
 281         rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
 282         rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
 283         rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
 284         rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
 285
 286         emit_insn
 287           (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
 288         emit_insn
 289           (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
 290       }
 291     DONE;
 292   }
 293 )
 294
 295 (define_insn "aarch64_simd_mov_from_<mode>low"
 296   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
 297         (vec_select:<VHALF>
 298           (match_operand:VQ 1 "register_operand" "w")
 299           (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
 300   "TARGET_SIMD && reload_completed"
 301   "umov\t%0, %1.d[0]"
 302   [(set_attr "type" "neon_to_gp<q>")
 303    (set_attr "length" "4")
 304   ])
 305
 306 (define_insn "aarch64_simd_mov_from_<mode>high"
 307   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
 308         (vec_select:<VHALF>
 309           (match_operand:VQ 1 "register_operand" "w")
 310           (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
 311   "TARGET_SIMD && reload_completed"
 312   "umov\t%0, %1.d[1]"
 313   [(set_attr "type" "neon_to_gp<q>")
 314    (set_attr "length" "4")
 315   ])
 316
 317 (define_insn "orn<mode>3"
 318  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 319        (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 320                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 321  "TARGET_SIMD"
 322  "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 323   [(set_attr "type" "neon_logic<q>")]
 324 )
 325
 326 (define_insn "bic<mode>3"
 327  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 328        (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 329                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 330  "TARGET_SIMD"
 331  "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 332   [(set_attr "type" "neon_logic<q>")]
 333 )
 334
 335 (define_insn "add<mode>3"
 336   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 337         (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 338                   (match_operand:VDQ_I 2 "register_operand" "w")))]
 339   "TARGET_SIMD"
 340   "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 341   [(set_attr "type" "neon_add<q>")]
 342 )
 343
 344 (define_insn "sub<mode>3"
 345   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 346         (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 347                    (match_operand:VDQ_I 2 "register_operand" "w")))]
 348   "TARGET_SIMD"
 349   "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 350   [(set_attr "type" "neon_sub<q>")]
 351 )
 352
 353 (define_insn "mul<mode>3"
 354   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 355         (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
 356                    (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
 357   "TARGET_SIMD"
 358   "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 359   [(set_attr "type" "neon_mul_<Vetype><q>")]
 360 )
 361
 362 (define_insn "bswap<mode>2"
 363   [(set (match_operand:VDQHSD 0 "register_operand" "=w")
 364         (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
 365   "TARGET_SIMD"
 366   "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
 367   [(set_attr "type" "neon_rev<q>")]
 368 )
 369
 370 (define_insn "aarch64_rbit<mode>"
 371   [(set (match_operand:VB 0 "register_operand" "=w")
 372         (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
 373                    UNSPEC_RBIT))]
 374   "TARGET_SIMD"
 375   "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
 376   [(set_attr "type" "neon_rbit")]
 377 )
 378
 379 (define_expand "ctz<mode>2"
 380   [(set (match_operand:VS 0 "register_operand")
 381         (ctz:VS (match_operand:VS 1 "register_operand")))]
 382   "TARGET_SIMD"
 383   {
 384      emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
 385      rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
 386                                              <MODE>mode, 0);
 387      emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
 388      emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
 389      DONE;
 390   }
 391 )
 392
 393 (define_expand "xorsign<mode>3"
 394   [(match_operand:VHSDF 0 "register_operand")
 395    (match_operand:VHSDF 1 "register_operand")
 396    (match_operand:VHSDF 2 "register_operand")]
 397   "TARGET_SIMD"
 398 {
 399
 400   machine_mode imode = <V_INT_EQUIV>mode;
 401   rtx v_bitmask = gen_reg_rtx (imode);
 402   rtx op1x = gen_reg_rtx (imode);
 403   rtx op2x = gen_reg_rtx (imode);
 404
 405   rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
 406   rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
 407
 408   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 409
 410   emit_move_insn (v_bitmask,
 411                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 412                                                      HOST_WIDE_INT_M1U << bits));
 413
 414   emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
 415   emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
 416   emit_move_insn (operands[0],
 417                   lowpart_subreg (<MODE>mode, op1x, imode));
 418   DONE;
 419 }
 420 )
 421
 422 ;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
 423 ;; fact that their usage need to guarantee that the source vectors are
 424 ;; contiguous.  It would be wrong to describe the operation without being able
 425 ;; to describe the permute that is also required, but even if that is done
 426 ;; the permute would have been created as a LOAD_LANES which means the values
 427 ;; in the registers are in the wrong order.
 428 (define_insn "aarch64_fcadd<rot><mode>"
 429   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 430         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
 431                        (match_operand:VHSDF 2 "register_operand" "w")]
 432                        FCADD))]
 433   "TARGET_COMPLEX"
 434   "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
 435   [(set_attr "type" "neon_fcadd")]
 436 )
 437
 438 (define_insn "aarch64_fcmla<rot><mode>"
 439   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 440         (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
 441                     (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
 442                                    (match_operand:VHSDF 3 "register_operand" "w")]
 443                                    FCMLA)))]
 444   "TARGET_COMPLEX"
 445   "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
 446   [(set_attr "type" "neon_fcmla")]
 447 )
 448
 449
 450 (define_insn "aarch64_fcmla_lane<rot><mode>"
 451   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 452         (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
 453                     (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
 454                                    (match_operand:VHSDF 3 "register_operand" "w")
 455                                    (match_operand:SI 4 "const_int_operand" "n")]
 456                                    FCMLA)))]
 457   "TARGET_COMPLEX"
 458 {
 459   operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
 460   return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
 461 }
 462   [(set_attr "type" "neon_fcmla")]
 463 )
 464
 465 (define_insn "aarch64_fcmla_laneq<rot>v4hf"
 466   [(set (match_operand:V4HF 0 "register_operand" "=w")
 467         (plus:V4HF (match_operand:V4HF 1 "register_operand" "0")
 468                    (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
 469                                  (match_operand:V8HF 3 "register_operand" "w")
 470                                  (match_operand:SI 4 "const_int_operand" "n")]
 471                                  FCMLA)))]
 472   "TARGET_COMPLEX"
 473 {
 474   operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
 475   return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
 476 }
 477   [(set_attr "type" "neon_fcmla")]
 478 )
 479
 480 (define_insn "aarch64_fcmlaq_lane<rot><mode>"
 481   [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
 482         (plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0")
 483                      (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
 484                                      (match_operand:<VHALF> 3 "register_operand" "w")
 485                                      (match_operand:SI 4 "const_int_operand" "n")]
 486                                      FCMLA)))]
 487   "TARGET_COMPLEX"
 488 {
 489   int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
 490   operands[4]
 491     = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
 492   return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
 493 }
 494   [(set_attr "type" "neon_fcmla")]
 495 )
 496
 497 ;; These instructions map to the __builtins for the Dot Product operations.
 498 (define_insn "aarch64_<sur>dot<vsi2qi>"
 499   [(set (match_operand:VS 0 "register_operand" "=w")
 500         (plus:VS (match_operand:VS 1 "register_operand" "0")
 501                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 502                             (match_operand:<VSI2QI> 3 "register_operand" "w")]
 503                 DOTPROD)))]
 504   "TARGET_DOTPROD"
 505   "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
 506   [(set_attr "type" "neon_dot<q>")]
 507 )
 508
 509 ;; These expands map to the Dot Product optab the vectorizer checks for.
 510 ;; The auto-vectorizer expects a dot product builtin that also does an
 511 ;; accumulation into the provided register.
 512 ;; Given the following pattern
 513 ;;
 514 ;; for (i=0; i<len; i++) {
 515 ;;     c = a[i] * b[i];
 516 ;;     r += c;
 517 ;; }
 518 ;; return result;
 519 ;;
 520 ;; This can be auto-vectorized to
 521 ;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
 522 ;;
 523 ;; given enough iterations.  However the vectorizer can keep unrolling the loop
 524 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
 525 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
 526 ;; ...
 527 ;;
 528 ;; and so the vectorizer provides r, in which the result has to be accumulated.
 529 (define_expand "<sur>dot_prod<vsi2qi>"
 530   [(set (match_operand:VS 0 "register_operand")
 531         (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
 532                             (match_operand:<VSI2QI> 2 "register_operand")]
 533                  DOTPROD)
 534                 (match_operand:VS 3 "register_operand")))]
 535   "TARGET_DOTPROD"
 536 {
 537   emit_insn (
 538     gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
 539                                     operands[2]));
 540   emit_insn (gen_rtx_SET (operands[0], operands[3]));
 541   DONE;
 542 })
 543
 544 ;; These instructions map to the __builtins for the Dot Product
 545 ;; indexed operations.
 546 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
 547   [(set (match_operand:VS 0 "register_operand" "=w")
 548         (plus:VS (match_operand:VS 1 "register_operand" "0")
 549                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 550                             (match_operand:V8QI 3 "register_operand" "<h_con>")
 551                             (match_operand:SI 4 "immediate_operand" "i")]
 552                 DOTPROD)))]
 553   "TARGET_DOTPROD"
 554   {
 555     operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
 556     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 557   }
 558   [(set_attr "type" "neon_dot<q>")]
 559 )
 560
 561 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
 562   [(set (match_operand:VS 0 "register_operand" "=w")
 563         (plus:VS (match_operand:VS 1 "register_operand" "0")
 564                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 565                             (match_operand:V16QI 3 "register_operand" "<h_con>")
 566                             (match_operand:SI 4 "immediate_operand" "i")]
 567                 DOTPROD)))]
 568   "TARGET_DOTPROD"
 569   {
 570     operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
 571     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 572   }
 573   [(set_attr "type" "neon_dot<q>")]
 574 )
 575
 576 (define_expand "copysign<mode>3"
 577   [(match_operand:VHSDF 0 "register_operand")
 578    (match_operand:VHSDF 1 "register_operand")
 579    (match_operand:VHSDF 2 "register_operand")]
 580   "TARGET_FLOAT && TARGET_SIMD"
 581 {
 582   rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
 583   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 584
 585   emit_move_insn (v_bitmask,
 586                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 587                                                      HOST_WIDE_INT_M1U << bits));
 588   emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
 589                                          operands[2], operands[1]));
 590   DONE;
 591 }
 592 )
 593
 594 (define_insn "*aarch64_mul3_elt<mode>"
 595  [(set (match_operand:VMUL 0 "register_operand" "=w")
 596     (mult:VMUL
 597       (vec_duplicate:VMUL
 598           (vec_select:<VEL>
 599             (match_operand:VMUL 1 "register_operand" "<h_con>")
 600             (parallel [(match_operand:SI 2 "immediate_operand")])))
 601       (match_operand:VMUL 3 "register_operand" "w")))]
 602   "TARGET_SIMD"
 603   {
 604     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
 605     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 606   }
 607   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 608 )
 609
 610 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
 611   [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
 612      (mult:VMUL_CHANGE_NLANES
 613        (vec_duplicate:VMUL_CHANGE_NLANES
 614           (vec_select:<VEL>
 615             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
 616             (parallel [(match_operand:SI 2 "immediate_operand")])))
 617       (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
 618   "TARGET_SIMD"
 619   {
 620     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
 621     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 622   }
 623   [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
 624 )
 625
 626 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
 627  [(set (match_operand:VMUL 0 "register_operand" "=w")
 628     (mult:VMUL
 629       (vec_duplicate:VMUL
 630             (match_operand:<VEL> 1 "register_operand" "<h_con>"))
 631       (match_operand:VMUL 2 "register_operand" "w")))]
 632   "TARGET_SIMD"
 633   "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
 634   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 635 )
 636
 637 (define_insn "@aarch64_rsqrte<mode>"
 638   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 639         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
 640                      UNSPEC_RSQRTE))]
 641   "TARGET_SIMD"
 642   "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 643   [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
 644
 645 (define_insn "@aarch64_rsqrts<mode>"
 646   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 647         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
 648                             (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
 649          UNSPEC_RSQRTS))]
 650   "TARGET_SIMD"
 651   "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 652   [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
 653
 654 (define_expand "rsqrt<mode>2"
 655   [(set (match_operand:VALLF 0 "register_operand" "=w")
 656         (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
 657                      UNSPEC_RSQRT))]
 658   "TARGET_SIMD"
 659 {
 660   aarch64_emit_approx_sqrt (operands[0], operands[1], true);
 661   DONE;
 662 })
 663
 664 (define_insn "*aarch64_mul3_elt_to_64v2df"
 665   [(set (match_operand:DF 0 "register_operand" "=w")
 666      (mult:DF
 667        (vec_select:DF
 668          (match_operand:V2DF 1 "register_operand" "w")
 669          (parallel [(match_operand:SI 2 "immediate_operand")]))
 670        (match_operand:DF 3 "register_operand" "w")))]
 671   "TARGET_SIMD"
 672   {
 673     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
 674     return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
 675   }
 676   [(set_attr "type" "neon_fp_mul_d_scalar_q")]
 677 )
 678
 679 (define_insn "neg<mode>2"
 680   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 681         (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 682   "TARGET_SIMD"
 683   "neg\t%0.<Vtype>, %1.<Vtype>"
 684   [(set_attr "type" "neon_neg<q>")]
 685 )
 686
 687 (define_insn "abs<mode>2"
 688   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 689         (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 690   "TARGET_SIMD"
 691   "abs\t%0.<Vtype>, %1.<Vtype>"
 692   [(set_attr "type" "neon_abs<q>")]
 693 )
 694
 695 ;; The intrinsic version of integer ABS must not be allowed to
 696 ;; combine with any operation with an integerated ABS step, such
 697 ;; as SABD.
 698 (define_insn "aarch64_abs<mode>"
 699   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
 700           (unspec:VSDQ_I_DI
 701             [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
 702            UNSPEC_ABS))]
 703   "TARGET_SIMD"
 704   "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 705   [(set_attr "type" "neon_abs<q>")]
 706 )
 707
 708 ;; It's tempting to represent SABD as ABS (MINUS op1 op2).
 709 ;; This isn't accurate as ABS treats always its input as a signed value.
 710 ;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
 711 ;; Whereas SABD would return 192 (-64 signed) on the above example.
 712 ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
 713 (define_insn "*aarch64_<su>abd<mode>_3"
 714   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 715         (minus:VDQ_BHSI
 716           (USMAX:VDQ_BHSI
 717             (match_operand:VDQ_BHSI 1 "register_operand" "w")
 718             (match_operand:VDQ_BHSI 2 "register_operand" "w"))
 719           (<max_opp>:VDQ_BHSI
 720             (match_dup 1)
 721             (match_dup 2))))]
 722   "TARGET_SIMD"
 723   "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 724   [(set_attr "type" "neon_abd<q>")]
 725 )
 726
 727 (define_insn "aarch64_<sur>abdl2<mode>_3"
 728   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 729         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 730                           (match_operand:VDQV_S 2 "register_operand" "w")]
 731         ABDL2))]
 732   "TARGET_SIMD"
 733   "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
 734   [(set_attr "type" "neon_abd<q>")]
 735 )
 736
 737 (define_insn "aarch64_<sur>abal<mode>_4"
 738   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 739         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 740                           (match_operand:VDQV_S 2 "register_operand" "w")
 741                          (match_operand:<VDBLW> 3 "register_operand" "0")]
 742         ABAL))]
 743   "TARGET_SIMD"
 744   "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
 745   [(set_attr "type" "neon_arith_acc<q>")]
 746 )
 747
 748 (define_insn "aarch64_<sur>adalp<mode>_3"
 749   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 750         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 751                           (match_operand:<VDBLW> 2 "register_operand" "0")]
 752         ADALP))]
 753   "TARGET_SIMD"
 754   "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
 755   [(set_attr "type" "neon_reduc_add<q>")]
 756 )
 757
 758 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
 759 ;; inputs in operands 1 and 2.  The sequence also has to perform a widening
 760 ;; reduction of the difference into a V4SI vector and accumulate that into
 761 ;; operand 3 before copying that into the result operand 0.
 762 ;; Perform that with a sequence of:
 763 ;; UABDL2       tmp.8h, op1.16b, op2.16b
 764 ;; UABAL        tmp.8h, op1.16b, op2.16b
 765 ;; UADALP       op3.4s, tmp.8h
 766 ;; MOV          op0, op3 // should be eliminated in later passes.
 767 ;; The signed version just uses the signed variants of the above instructions.
 768
 769 (define_expand "<sur>sadv16qi"
 770   [(use (match_operand:V4SI 0 "register_operand"))
 771    (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
 772                   (use (match_operand:V16QI 2 "register_operand"))] ABAL)
 773    (use (match_operand:V4SI 3 "register_operand"))]
 774   "TARGET_SIMD"
 775   {
 776     rtx reduc = gen_reg_rtx (V8HImode);
 777     emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
 778                                                operands[2]));
 779     emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
 780                                               operands[2], reduc));
 781     emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
 782                                               operands[3]));
 783     emit_move_insn (operands[0], operands[3]);
 784     DONE;
 785   }
 786 )
 787
 788 (define_insn "aba<mode>_3"
 789   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 790         (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
 791                          (match_operand:VDQ_BHSI 1 "register_operand" "w")
 792                          (match_operand:VDQ_BHSI 2 "register_operand" "w")))
 793                        (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
 794   "TARGET_SIMD"
 795   "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 796   [(set_attr "type" "neon_arith_acc<q>")]
 797 )
 798
 799 (define_insn "fabd<mode>3"
 800   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 801         (abs:VHSDF_HSDF
 802           (minus:VHSDF_HSDF
 803             (match_operand:VHSDF_HSDF 1 "register_operand" "w")
 804             (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
 805   "TARGET_SIMD"
 806   "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 807   [(set_attr "type" "neon_fp_abd_<stype><q>")]
 808 )
 809
 810 ;; For AND (vector, register) and BIC (vector, immediate)
 811 (define_insn "and<mode>3"
 812   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
 813         (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
 814                    (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
 815   "TARGET_SIMD"
 816   {
 817     switch (which_alternative)
 818       {
 819       case 0:
 820         return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
 821       case 1:
 822         return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
 823                                                   AARCH64_CHECK_BIC);
 824       default:
 825         gcc_unreachable ();
 826       }
 827   }
 828   [(set_attr "type" "neon_logic<q>")]
 829 )
 830
 831 ;; For ORR (vector, register) and ORR (vector, immediate)
 832 (define_insn "ior<mode>3"
 833   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
 834         (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
 835                    (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
 836   "TARGET_SIMD"
 837   {
 838     switch (which_alternative)
 839       {
 840       case 0:
 841         return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
 842       case 1:
 843         return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
 844                                                   AARCH64_CHECK_ORR);
 845       default:
 846         gcc_unreachable ();
 847       }
 848   }
 849   [(set_attr "type" "neon_logic<q>")]
 850 )
 851
 852 (define_insn "xor<mode>3"
 853   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 854         (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 855                  (match_operand:VDQ_I 2 "register_operand" "w")))]
 856   "TARGET_SIMD"
 857   "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
 858   [(set_attr "type" "neon_logic<q>")]
 859 )
 860
 861 (define_insn "one_cmpl<mode>2"
 862   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 863         (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 864   "TARGET_SIMD"
 865   "not\t%0.<Vbtype>, %1.<Vbtype>"
 866   [(set_attr "type" "neon_logic<q>")]
 867 )
 868
 869 (define_insn "aarch64_simd_vec_set<mode>"
 870   [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
 871         (vec_merge:VALL_F16
 872             (vec_duplicate:VALL_F16
 873                 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
 874             (match_operand:VALL_F16 3 "register_operand" "0,0,0")
 875             (match_operand:SI 2 "immediate_operand" "i,i,i")))]
 876   "TARGET_SIMD"
 877   {
 878    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 879    operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
 880    switch (which_alternative)
 881      {
 882      case 0:
 883         return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
 884      case 1:
 885         return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
 886      case 2:
 887         return "ld1\\t{%0.<Vetype>}[%p2], %1";
 888      default:
 889         gcc_unreachable ();
 890      }
 891   }
 892   [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
 893 )
 894
 895 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
 896   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
 897         (vec_merge:VALL_F16
 898             (vec_duplicate:VALL_F16
 899               (vec_select:<VEL>
 900                 (match_operand:VALL_F16 3 "register_operand" "w")
 901                 (parallel
 902                   [(match_operand:SI 4 "immediate_operand" "i")])))
 903             (match_operand:VALL_F16 1 "register_operand" "0")
 904             (match_operand:SI 2 "immediate_operand" "i")))]
 905   "TARGET_SIMD"
 906   {
 907     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 908     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
 909     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
 910
 911     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
 912   }
 913   [(set_attr "type" "neon_ins<q>")]
 914 )
 915
 916 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
 917   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
 918         (vec_merge:VALL_F16_NO_V2Q
 919             (vec_duplicate:VALL_F16_NO_V2Q
 920               (vec_select:<VEL>
 921                 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
 922                 (parallel
 923                   [(match_operand:SI 4 "immediate_operand" "i")])))
 924             (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
 925             (match_operand:SI 2 "immediate_operand" "i")))]
 926   "TARGET_SIMD"
 927   {
 928     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 929     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
 930     operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
 931                                            INTVAL (operands[4]));
 932
 933     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
 934   }
 935   [(set_attr "type" "neon_ins<q>")]
 936 )
 937
 938 (define_insn "aarch64_simd_lshr<mode>"
 939  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 940        (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 941                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
 942  "TARGET_SIMD"
 943  "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
 944   [(set_attr "type" "neon_shift_imm<q>")]
 945 )
 946
 947 (define_insn "aarch64_simd_ashr<mode>"
 948  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 949        (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 950                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
 951  "TARGET_SIMD"
 952  "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
 953   [(set_attr "type" "neon_shift_imm<q>")]
 954 )
 955
 956 (define_insn "aarch64_simd_imm_shl<mode>"
 957  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 958        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 959                    (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm" "Dl")))]
 960  "TARGET_SIMD"
 961   "shl\t%0.<Vtype>, %1.<Vtype>, %2"
 962   [(set_attr "type" "neon_shift_imm<q>")]
 963 )
 964
 965 (define_insn "aarch64_simd_reg_sshl<mode>"
 966  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 967        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 968                    (match_operand:VDQ_I 2 "register_operand" "w")))]
 969  "TARGET_SIMD"
 970  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 971   [(set_attr "type" "neon_shift_reg<q>")]
 972 )
 973
 974 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
 975  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 976        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
 977                     (match_operand:VDQ_I 2 "register_operand" "w")]
 978                    UNSPEC_ASHIFT_UNSIGNED))]
 979  "TARGET_SIMD"
 980  "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 981   [(set_attr "type" "neon_shift_reg<q>")]
 982 )
 983
 984 (define_insn "aarch64_simd_reg_shl<mode>_signed"
 985  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 986        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
 987                     (match_operand:VDQ_I 2 "register_operand" "w")]
 988                    UNSPEC_ASHIFT_SIGNED))]
 989  "TARGET_SIMD"
 990  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 991   [(set_attr "type" "neon_shift_reg<q>")]
 992 )
 993
 994 (define_expand "ashl<mode>3"
 995   [(match_operand:VDQ_I 0 "register_operand" "")
 996    (match_operand:VDQ_I 1 "register_operand" "")
 997    (match_operand:SI  2 "general_operand" "")]
 998  "TARGET_SIMD"
 999 {
1000   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1001   int shift_amount;
1002
1003   if (CONST_INT_P (operands[2]))
1004     {
1005       shift_amount = INTVAL (operands[2]);
1006       if (shift_amount >= 0 && shift_amount < bit_width)
1007         {
1008           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1009                                                        shift_amount);
1010           emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
1011                                                      operands[1],
1012                                                      tmp));
1013           DONE;
1014         }
1015       else
1016         {
1017           operands[2] = force_reg (SImode, operands[2]);
1018         }
1019     }
1020   else if (MEM_P (operands[2]))
1021     {
1022       operands[2] = force_reg (SImode, operands[2]);
1023     }
1024
1025   if (REG_P (operands[2]))
1026     {
1027       rtx tmp = gen_reg_rtx (<MODE>mode);
1028       emit_insn (gen_aarch64_simd_dup<mode> (tmp,
1029                                              convert_to_mode (<VEL>mode,
1030                                                               operands[2],
1031                                                               0)));
1032       emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1033                                                   tmp));
1034       DONE;
1035     }
1036   else
1037     FAIL;
1038 }
1039 )
1040
1041 (define_expand "lshr<mode>3"
1042   [(match_operand:VDQ_I 0 "register_operand" "")
1043    (match_operand:VDQ_I 1 "register_operand" "")
1044    (match_operand:SI  2 "general_operand" "")]
1045  "TARGET_SIMD"
1046 {
1047   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1048   int shift_amount;
1049
1050   if (CONST_INT_P (operands[2]))
1051     {
1052       shift_amount = INTVAL (operands[2]);
1053       if (shift_amount > 0 && shift_amount <= bit_width)
1054         {
1055           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1056                                                        shift_amount);
1057           emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
1058                                                   operands[1],
1059                                                   tmp));
1060           DONE;
1061         }
1062       else
1063         operands[2] = force_reg (SImode, operands[2]);
1064     }
1065   else if (MEM_P (operands[2]))
1066     {
1067       operands[2] = force_reg (SImode, operands[2]);
1068     }
1069
1070   if (REG_P (operands[2]))
1071     {
1072       rtx tmp = gen_reg_rtx (SImode);
1073       rtx tmp1 = gen_reg_rtx (<MODE>mode);
1074       emit_insn (gen_negsi2 (tmp, operands[2]));
1075       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1076                                              convert_to_mode (<VEL>mode,
1077                                                               tmp, 0)));
1078       emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
1079                                                           operands[1],
1080                                                           tmp1));
1081       DONE;
1082     }
1083   else
1084     FAIL;
1085 }
1086 )
1087
1088 (define_expand "ashr<mode>3"
1089   [(match_operand:VDQ_I 0 "register_operand" "")
1090    (match_operand:VDQ_I 1 "register_operand" "")
1091    (match_operand:SI  2 "general_operand" "")]
1092  "TARGET_SIMD"
1093 {
1094   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1095   int shift_amount;
1096
1097   if (CONST_INT_P (operands[2]))
1098     {
1099       shift_amount = INTVAL (operands[2]);
1100       if (shift_amount > 0 && shift_amount <= bit_width)
1101         {
1102           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1103                                                        shift_amount);
1104           emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1105                                                   operands[1],
1106                                                   tmp));
1107           DONE;
1108         }
1109       else
1110         operands[2] = force_reg (SImode, operands[2]);
1111     }
1112   else if (MEM_P (operands[2]))
1113     {
1114       operands[2] = force_reg (SImode, operands[2]);
1115     }
1116
1117   if (REG_P (operands[2]))
1118     {
1119       rtx tmp = gen_reg_rtx (SImode);
1120       rtx tmp1 = gen_reg_rtx (<MODE>mode);
1121       emit_insn (gen_negsi2 (tmp, operands[2]));
1122       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1123                                              convert_to_mode (<VEL>mode,
1124                                                               tmp, 0)));
1125       emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
1126                                                         operands[1],
1127                                                         tmp1));
1128       DONE;
1129     }
1130   else
1131     FAIL;
1132 }
1133 )
1134
1135 (define_expand "vashl<mode>3"
1136  [(match_operand:VDQ_I 0 "register_operand" "")
1137   (match_operand:VDQ_I 1 "register_operand" "")
1138   (match_operand:VDQ_I 2 "register_operand" "")]
1139  "TARGET_SIMD"
1140 {
1141   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1142                                               operands[2]));
1143   DONE;
1144 })
1145
1146 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1147 ;; Negating individual lanes most certainly offsets the
1148 ;; gain from vectorization.
1149 (define_expand "vashr<mode>3"
1150  [(match_operand:VDQ_BHSI 0 "register_operand" "")
1151   (match_operand:VDQ_BHSI 1 "register_operand" "")
1152   (match_operand:VDQ_BHSI 2 "register_operand" "")]
1153  "TARGET_SIMD"
1154 {
1155   rtx neg = gen_reg_rtx (<MODE>mode);
1156   emit (gen_neg<mode>2 (neg, operands[2]));
1157   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1158                                                     neg));
1159   DONE;
1160 })
1161
1162 ;; DI vector shift
1163 (define_expand "aarch64_ashr_simddi"
1164   [(match_operand:DI 0 "register_operand" "=w")
1165    (match_operand:DI 1 "register_operand" "w")
1166    (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1167   "TARGET_SIMD"
1168   {
1169     /* An arithmetic shift right by 64 fills the result with copies of the sign
1170        bit, just like asr by 63 - however the standard pattern does not handle
1171        a shift by 64.  */
1172     if (INTVAL (operands[2]) == 64)
1173       operands[2] = GEN_INT (63);
1174     emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1175     DONE;
1176   }
1177 )
1178
1179 (define_expand "vlshr<mode>3"
1180  [(match_operand:VDQ_BHSI 0 "register_operand" "")
1181   (match_operand:VDQ_BHSI 1 "register_operand" "")
1182   (match_operand:VDQ_BHSI 2 "register_operand" "")]
1183  "TARGET_SIMD"
1184 {
1185   rtx neg = gen_reg_rtx (<MODE>mode);
1186   emit (gen_neg<mode>2 (neg, operands[2]));
1187   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1188                                                       neg));
1189   DONE;
1190 })
1191
1192 (define_expand "aarch64_lshr_simddi"
1193   [(match_operand:DI 0 "register_operand" "=w")
1194    (match_operand:DI 1 "register_operand" "w")
1195    (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1196   "TARGET_SIMD"
1197   {
1198     if (INTVAL (operands[2]) == 64)
1199       emit_move_insn (operands[0], const0_rtx);
1200     else
1201       emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1202     DONE;
1203   }
1204 )
1205
1206 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1207 (define_insn "vec_shr_<mode>"
1208   [(set (match_operand:VD 0 "register_operand" "=w")
1209         (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1210                     (match_operand:SI 2 "immediate_operand" "i")]
1211                    UNSPEC_VEC_SHR))]
1212   "TARGET_SIMD"
1213   {
1214     if (BYTES_BIG_ENDIAN)
1215       return "shl %d0, %d1, %2";
1216     else
1217       return "ushr %d0, %d1, %2";
1218   }
1219   [(set_attr "type" "neon_shift_imm")]
1220 )
1221
1222 (define_expand "vec_set<mode>"
1223   [(match_operand:VALL_F16 0 "register_operand" "+w")
1224    (match_operand:<VEL> 1 "register_operand" "w")
1225    (match_operand:SI 2 "immediate_operand" "")]
1226   "TARGET_SIMD"
1227   {
1228     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1229     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1230                                           GEN_INT (elem), operands[0]));
1231     DONE;
1232   }
1233 )
1234
1235
1236 (define_insn "aarch64_mla<mode>"
1237  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1238        (plus:VDQ_BHSI (mult:VDQ_BHSI
1239                         (match_operand:VDQ_BHSI 2 "register_operand" "w")
1240                         (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1241                       (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1242  "TARGET_SIMD"
1243  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1244   [(set_attr "type" "neon_mla_<Vetype><q>")]
1245 )
1246
1247 (define_insn "*aarch64_mla_elt<mode>"
1248  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1249        (plus:VDQHS
1250          (mult:VDQHS
1251            (vec_duplicate:VDQHS
1252               (vec_select:<VEL>
1253                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1254                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1255            (match_operand:VDQHS 3 "register_operand" "w"))
1256          (match_operand:VDQHS 4 "register_operand" "0")))]
1257  "TARGET_SIMD"
1258   {
1259     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1260     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1261   }
1262   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1263 )
1264
1265 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1266  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1267        (plus:VDQHS
1268          (mult:VDQHS
1269            (vec_duplicate:VDQHS
1270               (vec_select:<VEL>
1271                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1272                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1273            (match_operand:VDQHS 3 "register_operand" "w"))
1274          (match_operand:VDQHS 4 "register_operand" "0")))]
1275  "TARGET_SIMD"
1276   {
1277     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1278     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1279   }
1280   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1281 )
1282
1283 (define_insn "*aarch64_mla_elt_merge<mode>"
1284   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1285         (plus:VDQHS
1286           (mult:VDQHS (vec_duplicate:VDQHS
1287                   (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1288                 (match_operand:VDQHS 2 "register_operand" "w"))
1289           (match_operand:VDQHS 3 "register_operand" "0")))]
1290  "TARGET_SIMD"
1291  "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1292   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1293 )
1294
1295 (define_insn "aarch64_mls<mode>"
1296  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1297        (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1298                    (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1299                               (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1300  "TARGET_SIMD"
1301  "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1302   [(set_attr "type" "neon_mla_<Vetype><q>")]
1303 )
1304
1305 (define_insn "*aarch64_mls_elt<mode>"
1306  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1307        (minus:VDQHS
1308          (match_operand:VDQHS 4 "register_operand" "0")
1309          (mult:VDQHS
1310            (vec_duplicate:VDQHS
1311               (vec_select:<VEL>
1312                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1313                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1314            (match_operand:VDQHS 3 "register_operand" "w"))))]
1315  "TARGET_SIMD"
1316   {
1317     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1318     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1319   }
1320   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1321 )
1322
1323 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1324  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1325        (minus:VDQHS
1326          (match_operand:VDQHS 4 "register_operand" "0")
1327          (mult:VDQHS
1328            (vec_duplicate:VDQHS
1329               (vec_select:<VEL>
1330                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1331                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1332            (match_operand:VDQHS 3 "register_operand" "w"))))]
1333  "TARGET_SIMD"
1334   {
1335     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1336     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1337   }
1338   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1339 )
1340
1341 (define_insn "*aarch64_mls_elt_merge<mode>"
1342   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1343         (minus:VDQHS
1344           (match_operand:VDQHS 1 "register_operand" "0")
1345           (mult:VDQHS (vec_duplicate:VDQHS
1346                   (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1347                 (match_operand:VDQHS 3 "register_operand" "w"))))]
1348   "TARGET_SIMD"
1349   "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1350   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1351 )
1352
1353 ;; Max/Min operations.
1354 (define_insn "<su><maxmin><mode>3"
1355  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1356        (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1357                     (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1358  "TARGET_SIMD"
1359  "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1360   [(set_attr "type" "neon_minmax<q>")]
1361 )
1362
1363 (define_expand "<su><maxmin>v2di3"
1364  [(set (match_operand:V2DI 0 "register_operand" "")
1365        (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1366                     (match_operand:V2DI 2 "register_operand" "")))]
1367  "TARGET_SIMD"
1368 {
1369   enum rtx_code cmp_operator;
1370   rtx cmp_fmt;
1371
1372   switch (<CODE>)
1373     {
1374     case UMIN:
1375       cmp_operator = LTU;
1376       break;
1377     case SMIN:
1378       cmp_operator = LT;
1379       break;
1380     case UMAX:
1381       cmp_operator = GTU;
1382       break;
1383     case SMAX:
1384       cmp_operator = GT;
1385       break;
1386     default:
1387       gcc_unreachable ();
1388     }
1389
1390   cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1391   emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1392               operands[2], cmp_fmt, operands[1], operands[2]));
1393   DONE;
1394 })
1395
1396 ;; Pairwise Integer Max/Min operations.
1397 (define_insn "aarch64_<maxmin_uns>p<mode>"
1398  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1399        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1400                          (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1401                         MAXMINV))]
1402  "TARGET_SIMD"
1403  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1404   [(set_attr "type" "neon_minmax<q>")]
1405 )
1406
1407 ;; Pairwise FP Max/Min operations.
1408 (define_insn "aarch64_<maxmin_uns>p<mode>"
1409  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1410        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1411                       (match_operand:VHSDF 2 "register_operand" "w")]
1412                       FMAXMINV))]
1413  "TARGET_SIMD"
1414  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1415   [(set_attr "type" "neon_minmax<q>")]
1416 )
1417
1418 ;; vec_concat gives a new vector with the low elements from operand 1, and
1419 ;; the high elements from operand 2.  That is to say, given op1 = { a, b }
1420 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1421 ;; What that means, is that the RTL descriptions of the below patterns
1422 ;; need to change depending on endianness.
1423
1424 ;; Move to the low architectural bits of the register.
1425 ;; On little-endian this is { operand, zeroes }
1426 ;; On big-endian this is { zeroes, operand }
1427
1428 (define_insn "move_lo_quad_internal_<mode>"
1429   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1430         (vec_concat:VQ_NO2E
1431           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1432           (vec_duplicate:<VHALF> (const_int 0))))]
1433   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1434   "@
1435    dup\\t%d0, %1.d[0]
1436    fmov\\t%d0, %1
1437    dup\\t%d0, %1"
1438   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1439    (set_attr "length" "4")
1440    (set_attr "arch" "simd,fp,simd")]
1441 )
1442
1443 (define_insn "move_lo_quad_internal_<mode>"
1444   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1445         (vec_concat:VQ_2E
1446           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1447           (const_int 0)))]
1448   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1449   "@
1450    dup\\t%d0, %1.d[0]
1451    fmov\\t%d0, %1
1452    dup\\t%d0, %1"
1453   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1454    (set_attr "length" "4")
1455    (set_attr "arch" "simd,fp,simd")]
1456 )
1457
1458 (define_insn "move_lo_quad_internal_be_<mode>"
1459   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1460         (vec_concat:VQ_NO2E
1461           (vec_duplicate:<VHALF> (const_int 0))
1462           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1463   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1464   "@
1465    dup\\t%d0, %1.d[0]
1466    fmov\\t%d0, %1
1467    dup\\t%d0, %1"
1468   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1469    (set_attr "length" "4")
1470    (set_attr "arch" "simd,fp,simd")]
1471 )
1472
1473 (define_insn "move_lo_quad_internal_be_<mode>"
1474   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1475         (vec_concat:VQ_2E
1476           (const_int 0)
1477           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1478   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1479   "@
1480    dup\\t%d0, %1.d[0]
1481    fmov\\t%d0, %1
1482    dup\\t%d0, %1"
1483   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1484    (set_attr "length" "4")
1485    (set_attr "arch" "simd,fp,simd")]
1486 )
1487
1488 (define_expand "move_lo_quad_<mode>"
1489   [(match_operand:VQ 0 "register_operand")
1490    (match_operand:VQ 1 "register_operand")]
1491   "TARGET_SIMD"
1492 {
1493   if (BYTES_BIG_ENDIAN)
1494     emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1495   else
1496     emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1497   DONE;
1498 }
1499 )
1500
1501 ;; Move operand1 to the high architectural bits of the register, keeping
1502 ;; the low architectural bits of operand2.
1503 ;; For little-endian this is { operand2, operand1 }
1504 ;; For big-endian this is { operand1, operand2 }
1505
1506 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1507   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1508         (vec_concat:VQ
1509           (vec_select:<VHALF>
1510                 (match_dup 0)
1511                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1512           (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1513   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1514   "@
1515    ins\\t%0.d[1], %1.d[0]
1516    ins\\t%0.d[1], %1"
1517   [(set_attr "type" "neon_ins")]
1518 )
1519
1520 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1521   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1522         (vec_concat:VQ
1523           (match_operand:<VHALF> 1 "register_operand" "w,r")
1524           (vec_select:<VHALF>
1525                 (match_dup 0)
1526                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1527   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1528   "@
1529    ins\\t%0.d[1], %1.d[0]
1530    ins\\t%0.d[1], %1"
1531   [(set_attr "type" "neon_ins")]
1532 )
1533
1534 (define_expand "move_hi_quad_<mode>"
1535  [(match_operand:VQ 0 "register_operand" "")
1536   (match_operand:<VHALF> 1 "register_operand" "")]
1537  "TARGET_SIMD"
1538 {
1539   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1540   if (BYTES_BIG_ENDIAN)
1541     emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1542                     operands[1], p));
1543   else
1544     emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1545                     operands[1], p));
1546   DONE;
1547 })
1548
1549 ;; Narrowing operations.
1550
1551 ;; For doubles.
1552 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1553  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1554        (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1555  "TARGET_SIMD"
1556  "xtn\\t%0.<Vntype>, %1.<Vtype>"
1557   [(set_attr "type" "neon_shift_imm_narrow_q")]
1558 )
1559
1560 (define_expand "vec_pack_trunc_<mode>"
1561  [(match_operand:<VNARROWD> 0 "register_operand" "")
1562   (match_operand:VDN 1 "register_operand" "")
1563   (match_operand:VDN 2 "register_operand" "")]
1564  "TARGET_SIMD"
1565 {
1566   rtx tempreg = gen_reg_rtx (<VDBL>mode);
1567   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1568   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1569
1570   emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1571   emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1572   emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1573   DONE;
1574 })
1575
1576 ;; For quads.
1577
1578 (define_insn "vec_pack_trunc_<mode>"
1579  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1580        (vec_concat:<VNARROWQ2>
1581          (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1582          (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1583  "TARGET_SIMD"
1584  {
1585    if (BYTES_BIG_ENDIAN)
1586      return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1587    else
1588      return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1589  }
1590   [(set_attr "type" "multiple")
1591    (set_attr "length" "8")]
1592 )
1593
1594 ;; Widening operations.
1595
1596 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1597   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1598         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1599                                (match_operand:VQW 1 "register_operand" "w")
1600                                (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1601                             )))]
1602   "TARGET_SIMD"
1603   "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1604   [(set_attr "type" "neon_shift_imm_long")]
1605 )
1606
1607 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1608   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1609         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1610                                (match_operand:VQW 1 "register_operand" "w")
1611                                (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1612                             )))]
1613   "TARGET_SIMD"
1614   "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1615   [(set_attr "type" "neon_shift_imm_long")]
1616 )
1617
1618 (define_expand "vec_unpack<su>_hi_<mode>"
1619   [(match_operand:<VWIDE> 0 "register_operand" "")
1620    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1621   "TARGET_SIMD"
1622   {
1623     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1624     emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1625                                                           operands[1], p));
1626     DONE;
1627   }
1628 )
1629
1630 (define_expand "vec_unpack<su>_lo_<mode>"
1631   [(match_operand:<VWIDE> 0 "register_operand" "")
1632    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1633   "TARGET_SIMD"
1634   {
1635     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1636     emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1637                                                           operands[1], p));
1638     DONE;
1639   }
1640 )
1641
1642 ;; Widening arithmetic.
1643
1644 (define_insn "*aarch64_<su>mlal_lo<mode>"
1645   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1646         (plus:<VWIDE>
1647           (mult:<VWIDE>
1648               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1649                  (match_operand:VQW 2 "register_operand" "w")
1650                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1651               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1652                  (match_operand:VQW 4 "register_operand" "w")
1653                  (match_dup 3))))
1654           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1655   "TARGET_SIMD"
1656   "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1657   [(set_attr "type" "neon_mla_<Vetype>_long")]
1658 )
1659
1660 (define_insn "*aarch64_<su>mlal_hi<mode>"
1661   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1662         (plus:<VWIDE>
1663           (mult:<VWIDE>
1664               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1665                  (match_operand:VQW 2 "register_operand" "w")
1666                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1667               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1668                  (match_operand:VQW 4 "register_operand" "w")
1669                  (match_dup 3))))
1670           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1671   "TARGET_SIMD"
1672   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1673   [(set_attr "type" "neon_mla_<Vetype>_long")]
1674 )
1675
1676 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1677   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1678         (minus:<VWIDE>
1679           (match_operand:<VWIDE> 1 "register_operand" "0")
1680           (mult:<VWIDE>
1681               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1682                  (match_operand:VQW 2 "register_operand" "w")
1683                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1684               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1685                  (match_operand:VQW 4 "register_operand" "w")
1686                  (match_dup 3))))))]
1687   "TARGET_SIMD"
1688   "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1689   [(set_attr "type" "neon_mla_<Vetype>_long")]
1690 )
1691
1692 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1693   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1694         (minus:<VWIDE>
1695           (match_operand:<VWIDE> 1 "register_operand" "0")
1696           (mult:<VWIDE>
1697               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1698                  (match_operand:VQW 2 "register_operand" "w")
1699                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1700               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1701                  (match_operand:VQW 4 "register_operand" "w")
1702                  (match_dup 3))))))]
1703   "TARGET_SIMD"
1704   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1705   [(set_attr "type" "neon_mla_<Vetype>_long")]
1706 )
1707
1708 (define_insn "*aarch64_<su>mlal<mode>"
1709   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1710         (plus:<VWIDE>
1711           (mult:<VWIDE>
1712             (ANY_EXTEND:<VWIDE>
1713               (match_operand:VD_BHSI 1 "register_operand" "w"))
1714             (ANY_EXTEND:<VWIDE>
1715               (match_operand:VD_BHSI 2 "register_operand" "w")))
1716           (match_operand:<VWIDE> 3 "register_operand" "0")))]
1717   "TARGET_SIMD"
1718   "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1719   [(set_attr "type" "neon_mla_<Vetype>_long")]
1720 )
1721
1722 (define_insn "*aarch64_<su>mlsl<mode>"
1723   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1724         (minus:<VWIDE>
1725           (match_operand:<VWIDE> 1 "register_operand" "0")
1726           (mult:<VWIDE>
1727             (ANY_EXTEND:<VWIDE>
1728               (match_operand:VD_BHSI 2 "register_operand" "w"))
1729             (ANY_EXTEND:<VWIDE>
1730               (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1731   "TARGET_SIMD"
1732   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1733   [(set_attr "type" "neon_mla_<Vetype>_long")]
1734 )
1735
1736 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1737  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1738        (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1739                            (match_operand:VQW 1 "register_operand" "w")
1740                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1741                      (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1742                            (match_operand:VQW 2 "register_operand" "w")
1743                            (match_dup 3)))))]
1744   "TARGET_SIMD"
1745   "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1746   [(set_attr "type" "neon_mul_<Vetype>_long")]
1747 )
1748
1749 (define_expand "vec_widen_<su>mult_lo_<mode>"
1750   [(match_operand:<VWIDE> 0 "register_operand" "")
1751    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1752    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1753  "TARGET_SIMD"
1754  {
1755    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1756    emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1757                                                        operands[1],
1758                                                        operands[2], p));
1759    DONE;
1760  }
1761 )
1762
1763 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1764  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1765       (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1766                             (match_operand:VQW 1 "register_operand" "w")
1767                             (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1768                     (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1769                             (match_operand:VQW 2 "register_operand" "w")
1770                             (match_dup 3)))))]
1771   "TARGET_SIMD"
1772   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1773   [(set_attr "type" "neon_mul_<Vetype>_long")]
1774 )
1775
1776 (define_expand "vec_widen_<su>mult_hi_<mode>"
1777   [(match_operand:<VWIDE> 0 "register_operand" "")
1778    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1779    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1780  "TARGET_SIMD"
1781  {
1782    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1783    emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1784                                                        operands[1],
1785                                                        operands[2], p));
1786    DONE;
1787
1788  }
1789 )
1790
1791 ;; FP vector operations.
1792 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1793 ;; double-precision (64-bit) floating-point data types and arithmetic as
1794 ;; defined by the IEEE 754-2008 standard.  This makes them vectorizable
1795 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1796 ;;
1797 ;; Floating-point operations can raise an exception.  Vectorizing such
1798 ;; operations are safe because of reasons explained below.
1799 ;;
1800 ;; ARMv8 permits an extension to enable trapped floating-point
1801 ;; exception handling, however this is an optional feature.  In the
1802 ;; event of a floating-point exception being raised by vectorised
1803 ;; code then:
1804 ;; 1.  If trapped floating-point exceptions are available, then a trap
1805 ;;     will be taken when any lane raises an enabled exception.  A trap
1806 ;;     handler may determine which lane raised the exception.
1807 ;; 2.  Alternatively a sticky exception flag is set in the
1808 ;;     floating-point status register (FPSR).  Software may explicitly
1809 ;;     test the exception flags, in which case the tests will either
1810 ;;     prevent vectorisation, allowing precise identification of the
1811 ;;     failing operation, or if tested outside of vectorisable regions
1812 ;;     then the specific operation and lane are not of interest.
1813
1814 ;; FP arithmetic operations.
1815
1816 (define_insn "add<mode>3"
1817  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1818        (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1819                    (match_operand:VHSDF 2 "register_operand" "w")))]
1820  "TARGET_SIMD"
1821  "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1822   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1823 )
1824
1825 (define_insn "sub<mode>3"
1826  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1827        (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1828                     (match_operand:VHSDF 2 "register_operand" "w")))]
1829  "TARGET_SIMD"
1830  "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1831   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1832 )
1833
1834 (define_insn "mul<mode>3"
1835  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1836        (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1837                    (match_operand:VHSDF 2 "register_operand" "w")))]
1838  "TARGET_SIMD"
1839  "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1840   [(set_attr "type" "neon_fp_mul_<stype><q>")]
1841 )
1842
1843 (define_expand "div<mode>3"
1844  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1845        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1846                   (match_operand:VHSDF 2 "register_operand" "w")))]
1847  "TARGET_SIMD"
1848 {
1849   if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1850     DONE;
1851
1852   operands[1] = force_reg (<MODE>mode, operands[1]);
1853 })
1854
1855 (define_insn "*div<mode>3"
1856  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1857        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1858                  (match_operand:VHSDF 2 "register_operand" "w")))]
1859  "TARGET_SIMD"
1860  "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1861   [(set_attr "type" "neon_fp_div_<stype><q>")]
1862 )
1863
1864 (define_insn "neg<mode>2"
1865  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1866        (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1867  "TARGET_SIMD"
1868  "fneg\\t%0.<Vtype>, %1.<Vtype>"
1869   [(set_attr "type" "neon_fp_neg_<stype><q>")]
1870 )
1871
1872 (define_insn "abs<mode>2"
1873  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1874        (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1875  "TARGET_SIMD"
1876  "fabs\\t%0.<Vtype>, %1.<Vtype>"
1877   [(set_attr "type" "neon_fp_abs_<stype><q>")]
1878 )
1879
1880 (define_insn "fma<mode>4"
1881   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1882        (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1883                   (match_operand:VHSDF 2 "register_operand" "w")
1884                   (match_operand:VHSDF 3 "register_operand" "0")))]
1885   "TARGET_SIMD"
1886  "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1887   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1888 )
1889
1890 (define_insn "*aarch64_fma4_elt<mode>"
1891   [(set (match_operand:VDQF 0 "register_operand" "=w")
1892     (fma:VDQF
1893       (vec_duplicate:VDQF
1894         (vec_select:<VEL>
1895           (match_operand:VDQF 1 "register_operand" "<h_con>")
1896           (parallel [(match_operand:SI 2 "immediate_operand")])))
1897       (match_operand:VDQF 3 "register_operand" "w")
1898       (match_operand:VDQF 4 "register_operand" "0")))]
1899   "TARGET_SIMD"
1900   {
1901     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1902     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1903   }
1904   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1905 )
1906
1907 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1908   [(set (match_operand:VDQSF 0 "register_operand" "=w")
1909     (fma:VDQSF
1910       (vec_duplicate:VDQSF
1911         (vec_select:<VEL>
1912           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1913           (parallel [(match_operand:SI 2 "immediate_operand")])))
1914       (match_operand:VDQSF 3 "register_operand" "w")
1915       (match_operand:VDQSF 4 "register_operand" "0")))]
1916   "TARGET_SIMD"
1917   {
1918     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1919     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1920   }
1921   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1922 )
1923
1924 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1925   [(set (match_operand:VMUL 0 "register_operand" "=w")
1926     (fma:VMUL
1927       (vec_duplicate:VMUL
1928           (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1929       (match_operand:VMUL 2 "register_operand" "w")
1930       (match_operand:VMUL 3 "register_operand" "0")))]
1931   "TARGET_SIMD"
1932   "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1933   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1934 )
1935
1936 (define_insn "*aarch64_fma4_elt_to_64v2df"
1937   [(set (match_operand:DF 0 "register_operand" "=w")
1938     (fma:DF
1939         (vec_select:DF
1940           (match_operand:V2DF 1 "register_operand" "w")
1941           (parallel [(match_operand:SI 2 "immediate_operand")]))
1942       (match_operand:DF 3 "register_operand" "w")
1943       (match_operand:DF 4 "register_operand" "0")))]
1944   "TARGET_SIMD"
1945   {
1946     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1947     return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1948   }
1949   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1950 )
1951
1952 (define_insn "fnma<mode>4"
1953   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1954         (fma:VHSDF
1955           (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1956           (match_operand:VHSDF 2 "register_operand" "w")
1957           (match_operand:VHSDF 3 "register_operand" "0")))]
1958   "TARGET_SIMD"
1959   "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1960   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1961 )
1962
1963 (define_insn "*aarch64_fnma4_elt<mode>"
1964   [(set (match_operand:VDQF 0 "register_operand" "=w")
1965     (fma:VDQF
1966       (neg:VDQF
1967         (match_operand:VDQF 3 "register_operand" "w"))
1968       (vec_duplicate:VDQF
1969         (vec_select:<VEL>
1970           (match_operand:VDQF 1 "register_operand" "<h_con>")
1971           (parallel [(match_operand:SI 2 "immediate_operand")])))
1972       (match_operand:VDQF 4 "register_operand" "0")))]
1973   "TARGET_SIMD"
1974   {
1975     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1976     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1977   }
1978   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1979 )
1980
1981 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1982   [(set (match_operand:VDQSF 0 "register_operand" "=w")
1983     (fma:VDQSF
1984       (neg:VDQSF
1985         (match_operand:VDQSF 3 "register_operand" "w"))
1986       (vec_duplicate:VDQSF
1987         (vec_select:<VEL>
1988           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1989           (parallel [(match_operand:SI 2 "immediate_operand")])))
1990       (match_operand:VDQSF 4 "register_operand" "0")))]
1991   "TARGET_SIMD"
1992   {
1993     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1994     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1995   }
1996   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1997 )
1998
1999 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
2000   [(set (match_operand:VMUL 0 "register_operand" "=w")
2001     (fma:VMUL
2002       (neg:VMUL
2003         (match_operand:VMUL 2 "register_operand" "w"))
2004       (vec_duplicate:VMUL
2005         (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2006       (match_operand:VMUL 3 "register_operand" "0")))]
2007   "TARGET_SIMD"
2008   "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2009   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2010 )
2011
2012 (define_insn "*aarch64_fnma4_elt_to_64v2df"
2013   [(set (match_operand:DF 0 "register_operand" "=w")
2014     (fma:DF
2015       (vec_select:DF
2016         (match_operand:V2DF 1 "register_operand" "w")
2017         (parallel [(match_operand:SI 2 "immediate_operand")]))
2018       (neg:DF
2019         (match_operand:DF 3 "register_operand" "w"))
2020       (match_operand:DF 4 "register_operand" "0")))]
2021   "TARGET_SIMD"
2022   {
2023     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2024     return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
2025   }
2026   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2027 )
2028
2029 ;; Vector versions of the floating-point frint patterns.
2030 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
2031 (define_insn "<frint_pattern><mode>2"
2032   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2033         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2034                        FRINT))]
2035   "TARGET_SIMD"
2036   "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
2037   [(set_attr "type" "neon_fp_round_<stype><q>")]
2038 )
2039
2040 ;; Vector versions of the fcvt standard patterns.
2041 ;; Expands to lbtrunc, lround, lceil, lfloor
2042 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
2043   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2044         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2045                                [(match_operand:VHSDF 1 "register_operand" "w")]
2046                                FCVT)))]
2047   "TARGET_SIMD"
2048   "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
2049   [(set_attr "type" "neon_fp_to_int_<stype><q>")]
2050 )
2051
2052 ;; HF Scalar variants of related SIMD instructions.
2053 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
2054   [(set (match_operand:HI 0 "register_operand" "=w")
2055         (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
2056                       FCVT)))]
2057   "TARGET_SIMD_F16INST"
2058   "fcvt<frint_suffix><su>\t%h0, %h1"
2059   [(set_attr "type" "neon_fp_to_int_s")]
2060 )
2061
2062 (define_insn "<optab>_trunchfhi2"
2063   [(set (match_operand:HI 0 "register_operand" "=w")
2064         (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
2065   "TARGET_SIMD_F16INST"
2066   "fcvtz<su>\t%h0, %h1"
2067   [(set_attr "type" "neon_fp_to_int_s")]
2068 )
2069
2070 (define_insn "<optab>hihf2"
2071   [(set (match_operand:HF 0 "register_operand" "=w")
2072         (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
2073   "TARGET_SIMD_F16INST"
2074   "<su_optab>cvtf\t%h0, %h1"
2075   [(set_attr "type" "neon_int_to_fp_s")]
2076 )
2077
2078 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
2079   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2080         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2081                                [(mult:VDQF
2082          (match_operand:VDQF 1 "register_operand" "w")
2083          (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
2084                                UNSPEC_FRINTZ)))]
2085   "TARGET_SIMD
2086    && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
2087                 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
2088   {
2089     int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
2090     char buf[64];
2091     snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
2092     output_asm_insn (buf, operands);
2093     return "";
2094   }
2095   [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
2096 )
2097
2098 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
2099   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2100         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2101                                [(match_operand:VHSDF 1 "register_operand")]
2102                                 UNSPEC_FRINTZ)))]
2103   "TARGET_SIMD"
2104   {})
2105
2106 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2107   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2108         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2109                                [(match_operand:VHSDF 1 "register_operand")]
2110                                 UNSPEC_FRINTZ)))]
2111   "TARGET_SIMD"
2112   {})
2113
2114 (define_expand "ftrunc<VHSDF:mode>2"
2115   [(set (match_operand:VHSDF 0 "register_operand")
2116         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2117                        UNSPEC_FRINTZ))]
2118   "TARGET_SIMD"
2119   {})
2120
2121 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2122   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2123         (FLOATUORS:VHSDF
2124           (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2125   "TARGET_SIMD"
2126   "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2127   [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2128 )
2129
2130 ;; Conversions between vectors of floats and doubles.
2131 ;; Contains a mix of patterns to match standard pattern names
2132 ;; and those for intrinsics.
2133
2134 ;; Float widening operations.
2135
2136 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2137   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2138         (float_extend:<VWIDE> (vec_select:<VHALF>
2139                                (match_operand:VQ_HSF 1 "register_operand" "w")
2140                                (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2141                             )))]
2142   "TARGET_SIMD"
2143   "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2144   [(set_attr "type" "neon_fp_cvt_widen_s")]
2145 )
2146
2147 ;; Convert between fixed-point and floating-point (vector modes)
2148
2149 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2150   [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2151         (unspec:<VHSDF:FCVT_TARGET>
2152           [(match_operand:VHSDF 1 "register_operand" "w")
2153            (match_operand:SI 2 "immediate_operand" "i")]
2154          FCVT_F2FIXED))]
2155   "TARGET_SIMD"
2156   "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2157   [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2158 )
2159
2160 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2161   [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2162         (unspec:<VDQ_HSDI:FCVT_TARGET>
2163           [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2164            (match_operand:SI 2 "immediate_operand" "i")]
2165          FCVT_FIXED2F))]
2166   "TARGET_SIMD"
2167   "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2168   [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2169 )
2170
2171 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2172 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2173 ;; the meaning of HI and LO changes depending on the target endianness.
2174 ;; While elsewhere we map the higher numbered elements of a vector to
2175 ;; the lower architectural lanes of the vector, for these patterns we want
2176 ;; to always treat "hi" as referring to the higher architectural lanes.
2177 ;; Consequently, while the patterns below look inconsistent with our
2178 ;; other big-endian patterns their behavior is as required.
2179
2180 (define_expand "vec_unpacks_lo_<mode>"
2181   [(match_operand:<VWIDE> 0 "register_operand" "")
2182    (match_operand:VQ_HSF 1 "register_operand" "")]
2183   "TARGET_SIMD"
2184   {
2185     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2186     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2187                                                        operands[1], p));
2188     DONE;
2189   }
2190 )
2191
2192 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2193   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2194         (float_extend:<VWIDE> (vec_select:<VHALF>
2195                                (match_operand:VQ_HSF 1 "register_operand" "w")
2196                                (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2197                             )))]
2198   "TARGET_SIMD"
2199   "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2200   [(set_attr "type" "neon_fp_cvt_widen_s")]
2201 )
2202
2203 (define_expand "vec_unpacks_hi_<mode>"
2204   [(match_operand:<VWIDE> 0 "register_operand" "")
2205    (match_operand:VQ_HSF 1 "register_operand" "")]
2206   "TARGET_SIMD"
2207   {
2208     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2209     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2210                                                        operands[1], p));
2211     DONE;
2212   }
2213 )
2214 (define_insn "aarch64_float_extend_lo_<Vwide>"
2215   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2216         (float_extend:<VWIDE>
2217           (match_operand:VDF 1 "register_operand" "w")))]
2218   "TARGET_SIMD"
2219   "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2220   [(set_attr "type" "neon_fp_cvt_widen_s")]
2221 )
2222
2223 ;; Float narrowing operations.
2224
2225 (define_insn "aarch64_float_truncate_lo_<mode>"
2226   [(set (match_operand:VDF 0 "register_operand" "=w")
2227       (float_truncate:VDF
2228         (match_operand:<VWIDE> 1 "register_operand" "w")))]
2229   "TARGET_SIMD"
2230   "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2231   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2232 )
2233
2234 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2235   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2236     (vec_concat:<VDBL>
2237       (match_operand:VDF 1 "register_operand" "0")
2238       (float_truncate:VDF
2239         (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2240   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2241   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2242   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2243 )
2244
2245 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2246   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2247     (vec_concat:<VDBL>
2248       (float_truncate:VDF
2249         (match_operand:<VWIDE> 2 "register_operand" "w"))
2250       (match_operand:VDF 1 "register_operand" "0")))]
2251   "TARGET_SIMD && BYTES_BIG_ENDIAN"
2252   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2253   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2254 )
2255
2256 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2257   [(match_operand:<VDBL> 0 "register_operand" "=w")
2258    (match_operand:VDF 1 "register_operand" "0")
2259    (match_operand:<VWIDE> 2 "register_operand" "w")]
2260   "TARGET_SIMD"
2261 {
2262   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2263                              ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2264                              : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2265   emit_insn (gen (operands[0], operands[1], operands[2]));
2266   DONE;
2267 }
2268 )
2269
2270 (define_expand "vec_pack_trunc_v2df"
2271   [(set (match_operand:V4SF 0 "register_operand")
2272       (vec_concat:V4SF
2273         (float_truncate:V2SF
2274             (match_operand:V2DF 1 "register_operand"))
2275         (float_truncate:V2SF
2276             (match_operand:V2DF 2 "register_operand"))
2277           ))]
2278   "TARGET_SIMD"
2279   {
2280     rtx tmp = gen_reg_rtx (V2SFmode);
2281     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2282     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2283
2284     emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2285     emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2286                                                    tmp, operands[hi]));
2287     DONE;
2288   }
2289 )
2290
2291 (define_expand "vec_pack_trunc_df"
2292   [(set (match_operand:V2SF 0 "register_operand")
2293       (vec_concat:V2SF
2294         (float_truncate:SF
2295             (match_operand:DF 1 "register_operand"))
2296         (float_truncate:SF
2297             (match_operand:DF 2 "register_operand"))
2298           ))]
2299   "TARGET_SIMD"
2300   {
2301     rtx tmp = gen_reg_rtx (V2SFmode);
2302     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2303     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2304
2305     emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2306     emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2307     emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2308     DONE;
2309   }
2310 )
2311
2312 ;; FP Max/Min
2313 ;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
2314 ;; expression like:
2315 ;;      a = (b < c) ? b : c;
2316 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
2317 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
2318 ;; -ffast-math.
2319 ;;
2320 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2321 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2322 ;; operand will be returned when both operands are zero (i.e. they may not
2323 ;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
2324 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2325 ;; NaNs.
2326
2327 (define_insn "<su><maxmin><mode>3"
2328   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2329         (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2330                        (match_operand:VHSDF 2 "register_operand" "w")))]
2331   "TARGET_SIMD"
2332   "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2333   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2334 )
2335
2336 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2337 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2338 ;; which implement the IEEE fmax ()/fmin () functions.
2339 (define_insn "<maxmin_uns><mode>3"
2340   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2341        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2342                       (match_operand:VHSDF 2 "register_operand" "w")]
2343                       FMAXMIN_UNS))]
2344   "TARGET_SIMD"
2345   "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2346   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2347 )
2348
2349 ;; 'across lanes' add.
2350
2351 (define_expand "reduc_plus_scal_<mode>"
2352   [(match_operand:<VEL> 0 "register_operand" "=w")
2353    (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2354                UNSPEC_ADDV)]
2355   "TARGET_SIMD"
2356   {
2357     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2358     rtx scratch = gen_reg_rtx (<MODE>mode);
2359     emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2360     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2361     DONE;
2362   }
2363 )
2364
2365 (define_insn "aarch64_faddp<mode>"
2366  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2367        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2368                       (match_operand:VHSDF 2 "register_operand" "w")]
2369         UNSPEC_FADDV))]
2370  "TARGET_SIMD"
2371  "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2372   [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2373 )
2374
2375 (define_insn "aarch64_reduc_plus_internal<mode>"
2376  [(set (match_operand:VDQV 0 "register_operand" "=w")
2377        (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2378                     UNSPEC_ADDV))]
2379  "TARGET_SIMD"
2380  "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2381   [(set_attr "type" "neon_reduc_add<q>")]
2382 )
2383
2384 (define_insn "aarch64_reduc_plus_internalv2si"
2385  [(set (match_operand:V2SI 0 "register_operand" "=w")
2386        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2387                     UNSPEC_ADDV))]
2388  "TARGET_SIMD"
2389  "addp\\t%0.2s, %1.2s, %1.2s"
2390   [(set_attr "type" "neon_reduc_add")]
2391 )
2392
2393 (define_insn "reduc_plus_scal_<mode>"
2394  [(set (match_operand:<VEL> 0 "register_operand" "=w")
2395        (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2396                    UNSPEC_FADDV))]
2397  "TARGET_SIMD"
2398  "faddp\\t%<Vetype>0, %1.<Vtype>"
2399   [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2400 )
2401
2402 (define_expand "reduc_plus_scal_v4sf"
2403  [(set (match_operand:SF 0 "register_operand")
2404        (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2405                     UNSPEC_FADDV))]
2406  "TARGET_SIMD"
2407 {
2408   rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2409   rtx scratch = gen_reg_rtx (V4SFmode);
2410   emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2411   emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2412   emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2413   DONE;
2414 })
2415
2416 (define_insn "clrsb<mode>2"
2417   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2418         (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2419   "TARGET_SIMD"
2420   "cls\\t%0.<Vtype>, %1.<Vtype>"
2421   [(set_attr "type" "neon_cls<q>")]
2422 )
2423
2424 (define_insn "clz<mode>2"
2425  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2426        (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2427  "TARGET_SIMD"
2428  "clz\\t%0.<Vtype>, %1.<Vtype>"
2429   [(set_attr "type" "neon_cls<q>")]
2430 )
2431
2432 (define_insn "popcount<mode>2"
2433   [(set (match_operand:VB 0 "register_operand" "=w")
2434         (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2435   "TARGET_SIMD"
2436   "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2437   [(set_attr "type" "neon_cnt<q>")]
2438 )
2439
2440 ;; 'across lanes' max and min ops.
2441
2442 ;; Template for outputting a scalar, so we can create __builtins which can be
2443 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function.  (This is FP smax/smin).
2444 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2445   [(match_operand:<VEL> 0 "register_operand")
2446    (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2447                   FMAXMINV)]
2448   "TARGET_SIMD"
2449   {
2450     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2451     rtx scratch = gen_reg_rtx (<MODE>mode);
2452     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2453                                                               operands[1]));
2454     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2455     DONE;
2456   }
2457 )
2458
2459 ;; Likewise for integer cases, signed and unsigned.
2460 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2461   [(match_operand:<VEL> 0 "register_operand")
2462    (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2463                     MAXMINV)]
2464   "TARGET_SIMD"
2465   {
2466     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2467     rtx scratch = gen_reg_rtx (<MODE>mode);
2468     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2469                                                               operands[1]));
2470     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2471     DONE;
2472   }
2473 )
2474
2475 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2476  [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2477        (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2478                     MAXMINV))]
2479  "TARGET_SIMD"
2480  "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2481   [(set_attr "type" "neon_reduc_minmax<q>")]
2482 )
2483
2484 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2485  [(set (match_operand:V2SI 0 "register_operand" "=w")
2486        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2487                     MAXMINV))]
2488  "TARGET_SIMD"
2489  "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2490   [(set_attr "type" "neon_reduc_minmax")]
2491 )
2492
2493 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2494  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2495        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2496                       FMAXMINV))]
2497  "TARGET_SIMD"
2498  "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2499   [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2500 )
2501
2502 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2503 ;; allocation.
2504 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2505 ;; to select.
2506 ;;
2507 ;; Thus our BSL is of the form:
2508 ;;   op0 = bsl (mask, op2, op3)
2509 ;; We can use any of:
2510 ;;
2511 ;;   if (op0 = mask)
2512 ;;     bsl mask, op1, op2
2513 ;;   if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2514 ;;     bit op0, op2, mask
2515 ;;   if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2516 ;;     bif op0, op1, mask
2517 ;;
2518 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2519 ;; Some forms of straight-line code may generate the equivalent form
2520 ;; in *aarch64_simd_bsl<mode>_alt.
2521
2522 (define_insn "aarch64_simd_bsl<mode>_internal"
2523   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2524         (xor:VDQ_I
2525            (and:VDQ_I
2526              (xor:VDQ_I
2527                (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2528                (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2529              (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2530           (match_dup:<V_INT_EQUIV> 3)
2531         ))]
2532   "TARGET_SIMD"
2533   "@
2534   bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2535   bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2536   bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2537   [(set_attr "type" "neon_bsl<q>")]
2538 )
2539
2540 ;; We need this form in addition to the above pattern to match the case
2541 ;; when combine tries merging three insns such that the second operand of
2542 ;; the outer XOR matches the second operand of the inner XOR rather than
2543 ;; the first.  The two are equivalent but since recog doesn't try all
2544 ;; permutations of commutative operations, we have to have a separate pattern.
2545
2546 (define_insn "*aarch64_simd_bsl<mode>_alt"
2547   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2548         (xor:VDQ_I
2549            (and:VDQ_I
2550              (xor:VDQ_I
2551                (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2552                (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2553               (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2554           (match_dup:<V_INT_EQUIV> 2)))]
2555   "TARGET_SIMD"
2556   "@
2557   bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2558   bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2559   bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2560   [(set_attr "type" "neon_bsl<q>")]
2561 )
2562
2563 ;; DImode is special, we want to avoid computing operations which are
2564 ;; more naturally computed in general purpose registers in the vector
2565 ;; registers.  If we do that, we need to move all three operands from general
2566 ;; purpose registers to vector registers, then back again.  However, we
2567 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2568 ;; optimizations based on the component operations of a BSL.
2569 ;;
2570 ;; That means we need a splitter back to the individual operations, if they
2571 ;; would be better calculated on the integer side.
2572
2573 (define_insn_and_split "aarch64_simd_bsldi_internal"
2574   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2575         (xor:DI
2576            (and:DI
2577              (xor:DI
2578                (match_operand:DI 3 "register_operand" "w,0,w,r")
2579                (match_operand:DI 2 "register_operand" "w,w,0,r"))
2580              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2581           (match_dup:DI 3)
2582         ))]
2583   "TARGET_SIMD"
2584   "@
2585   bsl\\t%0.8b, %2.8b, %3.8b
2586   bit\\t%0.8b, %2.8b, %1.8b
2587   bif\\t%0.8b, %3.8b, %1.8b
2588   #"
2589   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2590   [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2591 {
2592   /* Split back to individual operations.  If we're before reload, and
2593      able to create a temporary register, do so.  If we're after reload,
2594      we've got an early-clobber destination register, so use that.
2595      Otherwise, we can't create pseudos and we can't yet guarantee that
2596      operands[0] is safe to write, so FAIL to split.  */
2597
2598   rtx scratch;
2599   if (reload_completed)
2600     scratch = operands[0];
2601   else if (can_create_pseudo_p ())
2602     scratch = gen_reg_rtx (DImode);
2603   else
2604     FAIL;
2605
2606   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2607   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2608   emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2609   DONE;
2610 }
2611   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2612    (set_attr "length" "4,4,4,12")]
2613 )
2614
2615 (define_insn_and_split "aarch64_simd_bsldi_alt"
2616   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2617         (xor:DI
2618            (and:DI
2619              (xor:DI
2620                (match_operand:DI 3 "register_operand" "w,w,0,r")
2621                (match_operand:DI 2 "register_operand" "w,0,w,r"))
2622              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2623           (match_dup:DI 2)
2624         ))]
2625   "TARGET_SIMD"
2626   "@
2627   bsl\\t%0.8b, %3.8b, %2.8b
2628   bit\\t%0.8b, %3.8b, %1.8b
2629   bif\\t%0.8b, %2.8b, %1.8b
2630   #"
2631   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2632   [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2633 {
2634   /* Split back to individual operations.  If we're before reload, and
2635      able to create a temporary register, do so.  If we're after reload,
2636      we've got an early-clobber destination register, so use that.
2637      Otherwise, we can't create pseudos and we can't yet guarantee that
2638      operands[0] is safe to write, so FAIL to split.  */
2639
2640   rtx scratch;
2641   if (reload_completed)
2642     scratch = operands[0];
2643   else if (can_create_pseudo_p ())
2644     scratch = gen_reg_rtx (DImode);
2645   else
2646     FAIL;
2647
2648   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2649   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2650   emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2651   DONE;
2652 }
2653   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2654    (set_attr "length" "4,4,4,12")]
2655 )
2656
2657 (define_expand "aarch64_simd_bsl<mode>"
2658   [(match_operand:VALLDIF 0 "register_operand")
2659    (match_operand:<V_INT_EQUIV> 1 "register_operand")
2660    (match_operand:VALLDIF 2 "register_operand")
2661    (match_operand:VALLDIF 3 "register_operand")]
2662  "TARGET_SIMD"
2663 {
2664   /* We can't alias operands together if they have different modes.  */
2665   rtx tmp = operands[0];
2666   if (FLOAT_MODE_P (<MODE>mode))
2667     {
2668       operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2669       operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2670       tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2671     }
2672   operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2673   emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2674                                                          operands[1],
2675                                                          operands[2],
2676                                                          operands[3]));
2677   if (tmp != operands[0])
2678     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2679
2680   DONE;
2681 })
2682
2683 (define_expand "vcond_mask_<mode><v_int_equiv>"
2684   [(match_operand:VALLDI 0 "register_operand")
2685    (match_operand:VALLDI 1 "nonmemory_operand")
2686    (match_operand:VALLDI 2 "nonmemory_operand")
2687    (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2688   "TARGET_SIMD"
2689 {
2690   /* If we have (a = (P) ? -1 : 0);
2691      Then we can simply move the generated mask (result must be int).  */
2692   if (operands[1] == CONSTM1_RTX (<MODE>mode)
2693       && operands[2] == CONST0_RTX (<MODE>mode))
2694     emit_move_insn (operands[0], operands[3]);
2695   /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask.  */
2696   else if (operands[1] == CONST0_RTX (<MODE>mode)
2697            && operands[2] == CONSTM1_RTX (<MODE>mode))
2698     emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2699   else
2700     {
2701       if (!REG_P (operands[1]))
2702         operands[1] = force_reg (<MODE>mode, operands[1]);
2703       if (!REG_P (operands[2]))
2704         operands[2] = force_reg (<MODE>mode, operands[2]);
2705       emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2706                                              operands[1], operands[2]));
2707     }
2708
2709   DONE;
2710 })
2711
2712 ;; Patterns comparing two vectors to produce a mask.
2713
2714 (define_expand "vec_cmp<mode><mode>"
2715   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2716           (match_operator 1 "comparison_operator"
2717             [(match_operand:VSDQ_I_DI 2 "register_operand")
2718              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2719   "TARGET_SIMD"
2720 {
2721   rtx mask = operands[0];
2722   enum rtx_code code = GET_CODE (operands[1]);
2723
2724   switch (code)
2725     {
2726     case NE:
2727     case LE:
2728     case LT:
2729     case GE:
2730     case GT:
2731     case EQ:
2732       if (operands[3] == CONST0_RTX (<MODE>mode))
2733         break;
2734
2735       /* Fall through.  */
2736     default:
2737       if (!REG_P (operands[3]))
2738         operands[3] = force_reg (<MODE>mode, operands[3]);
2739
2740       break;
2741     }
2742
2743   switch (code)
2744     {
2745     case LT:
2746       emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2747       break;
2748
2749     case GE:
2750       emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2751       break;
2752
2753     case LE:
2754       emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2755       break;
2756
2757     case GT:
2758       emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2759       break;
2760
2761     case LTU:
2762       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2763       break;
2764
2765     case GEU:
2766       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2767       break;
2768
2769     case LEU:
2770       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2771       break;
2772
2773     case GTU:
2774       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2775       break;
2776
2777     case NE:
2778       /* Handle NE as !EQ.  */
2779       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2780       emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2781       break;
2782
2783     case EQ:
2784       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2785       break;
2786
2787     default:
2788       gcc_unreachable ();
2789     }
2790
2791   DONE;
2792 })
2793
2794 (define_expand "vec_cmp<mode><v_int_equiv>"
2795   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2796         (match_operator 1 "comparison_operator"
2797             [(match_operand:VDQF 2 "register_operand")
2798              (match_operand:VDQF 3 "nonmemory_operand")]))]
2799   "TARGET_SIMD"
2800 {
2801   int use_zero_form = 0;
2802   enum rtx_code code = GET_CODE (operands[1]);
2803   rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2804
2805   rtx (*comparison) (rtx, rtx, rtx) = NULL;
2806
2807   switch (code)
2808     {
2809     case LE:
2810     case LT:
2811     case GE:
2812     case GT:
2813     case EQ:
2814       if (operands[3] == CONST0_RTX (<MODE>mode))
2815         {
2816           use_zero_form = 1;
2817           break;
2818         }
2819       /* Fall through.  */
2820     default:
2821       if (!REG_P (operands[3]))
2822         operands[3] = force_reg (<MODE>mode, operands[3]);
2823
2824       break;
2825     }
2826
2827   switch (code)
2828     {
2829     case LT:
2830       if (use_zero_form)
2831         {
2832           comparison = gen_aarch64_cmlt<mode>;
2833           break;
2834         }
2835       /* Fall through.  */
2836     case UNLT:
2837       std::swap (operands[2], operands[3]);
2838       /* Fall through.  */
2839     case UNGT:
2840     case GT:
2841       comparison = gen_aarch64_cmgt<mode>;
2842       break;
2843     case LE:
2844       if (use_zero_form)
2845         {
2846           comparison = gen_aarch64_cmle<mode>;
2847           break;
2848         }
2849       /* Fall through.  */
2850     case UNLE:
2851       std::swap (operands[2], operands[3]);
2852       /* Fall through.  */
2853     case UNGE:
2854     case GE:
2855       comparison = gen_aarch64_cmge<mode>;
2856       break;
2857     case NE:
2858     case EQ:
2859       comparison = gen_aarch64_cmeq<mode>;
2860       break;
2861     case UNEQ:
2862     case ORDERED:
2863     case UNORDERED:
2864     case LTGT:
2865       break;
2866     default:
2867       gcc_unreachable ();
2868     }
2869
2870   switch (code)
2871     {
2872     case UNGE:
2873     case UNGT:
2874     case UNLE:
2875     case UNLT:
2876       {
2877         /* All of the above must not raise any FP exceptions.  Thus we first
2878            check each operand for NaNs and force any elements containing NaN to
2879            zero before using them in the compare.
2880            Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2881                                      (cm<cc> (isnan (a) ? 0.0 : a,
2882                                               isnan (b) ? 0.0 : b))
2883            We use the following transformations for doing the comparisions:
2884            a UNGE b -> a GE b
2885            a UNGT b -> a GT b
2886            a UNLE b -> b GE a
2887            a UNLT b -> b GT a.  */
2888
2889         rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2890         rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2891         rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2892         emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2893         emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2894         emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2895         emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2896                                           lowpart_subreg (<V_INT_EQUIV>mode,
2897                                                           operands[2],
2898                                                           <MODE>mode)));
2899         emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2900                                           lowpart_subreg (<V_INT_EQUIV>mode,
2901                                                           operands[3],
2902                                                           <MODE>mode)));
2903         gcc_assert (comparison != NULL);
2904         emit_insn (comparison (operands[0],
2905                                lowpart_subreg (<MODE>mode,
2906                                                tmp0, <V_INT_EQUIV>mode),
2907                                lowpart_subreg (<MODE>mode,
2908                                                tmp1, <V_INT_EQUIV>mode)));
2909         emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2910       }
2911       break;
2912
2913     case LT:
2914     case LE:
2915     case GT:
2916     case GE:
2917     case EQ:
2918     case NE:
2919       /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
2920          As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
2921          a GE b -> a GE b
2922          a GT b -> a GT b
2923          a LE b -> b GE a
2924          a LT b -> b GT a
2925          a EQ b -> a EQ b
2926          a NE b -> ~(a EQ b)  */
2927       gcc_assert (comparison != NULL);
2928       emit_insn (comparison (operands[0], operands[2], operands[3]));
2929       if (code == NE)
2930         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2931       break;
2932
2933     case LTGT:
2934       /* LTGT is not guranteed to not generate a FP exception.  So let's
2935          go the faster way : ((a > b) || (b > a)).  */
2936       emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2937                                          operands[2], operands[3]));
2938       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2939       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2940       break;
2941
2942     case ORDERED:
2943     case UNORDERED:
2944     case UNEQ:
2945       /* cmeq (a, a) & cmeq (b, b).  */
2946       emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2947                                          operands[2], operands[2]));
2948       emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2949       emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2950
2951       if (code == UNORDERED)
2952         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2953       else if (code == UNEQ)
2954         {
2955           emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
2956           emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
2957         }
2958       break;
2959
2960     default:
2961       gcc_unreachable ();
2962     }
2963
2964   DONE;
2965 })
2966
2967 (define_expand "vec_cmpu<mode><mode>"
2968   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2969           (match_operator 1 "comparison_operator"
2970             [(match_operand:VSDQ_I_DI 2 "register_operand")
2971              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2972   "TARGET_SIMD"
2973 {
2974   emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2975                                       operands[2], operands[3]));
2976   DONE;
2977 })
2978
2979 (define_expand "vcond<mode><mode>"
2980   [(set (match_operand:VALLDI 0 "register_operand")
2981         (if_then_else:VALLDI
2982           (match_operator 3 "comparison_operator"
2983             [(match_operand:VALLDI 4 "register_operand")
2984              (match_operand:VALLDI 5 "nonmemory_operand")])
2985           (match_operand:VALLDI 1 "nonmemory_operand")
2986           (match_operand:VALLDI 2 "nonmemory_operand")))]
2987   "TARGET_SIMD"
2988 {
2989   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2990   enum rtx_code code = GET_CODE (operands[3]);
2991
2992   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2993      it as well as switch operands 1/2 in order to avoid the additional
2994      NOT instruction.  */
2995   if (code == NE)
2996     {
2997       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2998                                     operands[4], operands[5]);
2999       std::swap (operands[1], operands[2]);
3000     }
3001   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3002                                              operands[4], operands[5]));
3003   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3004                                                  operands[2], mask));
3005
3006   DONE;
3007 })
3008
3009 (define_expand "vcond<v_cmp_mixed><mode>"
3010   [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
3011         (if_then_else:<V_cmp_mixed>
3012           (match_operator 3 "comparison_operator"
3013             [(match_operand:VDQF_COND 4 "register_operand")
3014              (match_operand:VDQF_COND 5 "nonmemory_operand")])
3015           (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
3016           (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
3017   "TARGET_SIMD"
3018 {
3019   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3020   enum rtx_code code = GET_CODE (operands[3]);
3021
3022   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3023      it as well as switch operands 1/2 in order to avoid the additional
3024      NOT instruction.  */
3025   if (code == NE)
3026     {
3027       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3028                                     operands[4], operands[5]);
3029       std::swap (operands[1], operands[2]);
3030     }
3031   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3032                                              operands[4], operands[5]));
3033   emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
3034                                                 operands[0], operands[1],
3035                                                 operands[2], mask));
3036
3037   DONE;
3038 })
3039
3040 (define_expand "vcondu<mode><mode>"
3041   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3042         (if_then_else:VSDQ_I_DI
3043           (match_operator 3 "comparison_operator"
3044             [(match_operand:VSDQ_I_DI 4 "register_operand")
3045              (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
3046           (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
3047           (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
3048   "TARGET_SIMD"
3049 {
3050   rtx mask = gen_reg_rtx (<MODE>mode);
3051   enum rtx_code code = GET_CODE (operands[3]);
3052
3053   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3054      it as well as switch operands 1/2 in order to avoid the additional
3055      NOT instruction.  */
3056   if (code == NE)
3057     {
3058       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3059                                     operands[4], operands[5]);
3060       std::swap (operands[1], operands[2]);
3061     }
3062   emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
3063                                       operands[4], operands[5]));
3064   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3065                                                  operands[2], mask));
3066   DONE;
3067 })
3068
3069 (define_expand "vcondu<mode><v_cmp_mixed>"
3070   [(set (match_operand:VDQF 0 "register_operand")
3071         (if_then_else:VDQF
3072           (match_operator 3 "comparison_operator"
3073             [(match_operand:<V_cmp_mixed> 4 "register_operand")
3074              (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
3075           (match_operand:VDQF 1 "nonmemory_operand")
3076           (match_operand:VDQF 2 "nonmemory_operand")))]
3077   "TARGET_SIMD"
3078 {
3079   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3080   enum rtx_code code = GET_CODE (operands[3]);
3081
3082   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3083      it as well as switch operands 1/2 in order to avoid the additional
3084      NOT instruction.  */
3085   if (code == NE)
3086     {
3087       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3088                                     operands[4], operands[5]);
3089       std::swap (operands[1], operands[2]);
3090     }
3091   emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
3092                                                   mask, operands[3],
3093                                                   operands[4], operands[5]));
3094   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3095                                                  operands[2], mask));
3096   DONE;
3097 })
3098
3099 ;; Patterns for AArch64 SIMD Intrinsics.
3100
3101 ;; Lane extraction with sign extension to general purpose register.
3102 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3103   [(set (match_operand:GPI 0 "register_operand" "=r")
3104         (sign_extend:GPI
3105           (vec_select:<VEL>
3106             (match_operand:VDQQH 1 "register_operand" "w")
3107             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3108   "TARGET_SIMD"
3109   {
3110     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3111     return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3112   }
3113   [(set_attr "type" "neon_to_gp<q>")]
3114 )
3115
3116 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
3117   [(set (match_operand:GPI 0 "register_operand" "=r")
3118         (zero_extend:GPI
3119           (vec_select:<VEL>
3120             (match_operand:VDQQH 1 "register_operand" "w")
3121             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3122   "TARGET_SIMD"
3123   {
3124     operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3125                                            INTVAL (operands[2]));
3126     return "umov\\t%w0, %1.<Vetype>[%2]";
3127   }
3128   [(set_attr "type" "neon_to_gp<q>")]
3129 )
3130
3131 ;; Lane extraction of a value, neither sign nor zero extension
3132 ;; is guaranteed so upper bits should be considered undefined.
3133 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3134 (define_insn "aarch64_get_lane<mode>"
3135   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3136         (vec_select:<VEL>
3137           (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3138           (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3139   "TARGET_SIMD"
3140   {
3141     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3142     switch (which_alternative)
3143       {
3144         case 0:
3145           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3146         case 1:
3147           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3148         case 2:
3149           return "st1\\t{%1.<Vetype>}[%2], %0";
3150         default:
3151           gcc_unreachable ();
3152       }
3153   }
3154   [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3155 )
3156
3157 (define_insn "load_pair_lanes<mode>"
3158   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3159         (vec_concat:<VDBL>
3160            (match_operand:VDC 1 "memory_operand" "Utq")
3161            (match_operand:VDC 2 "memory_operand" "m")))]
3162   "TARGET_SIMD && !STRICT_ALIGNMENT
3163    && rtx_equal_p (XEXP (operands[2], 0),
3164                    plus_constant (Pmode,
3165                                   XEXP (operands[1], 0),
3166                                   GET_MODE_SIZE (<MODE>mode)))"
3167   "ldr\\t%q0, %1"
3168   [(set_attr "type" "neon_load1_1reg_q")]
3169 )
3170
3171 (define_insn "store_pair_lanes<mode>"
3172   [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
3173         (vec_concat:<VDBL>
3174            (match_operand:VDC 1 "register_operand" "w, r")
3175            (match_operand:VDC 2 "register_operand" "w, r")))]
3176   "TARGET_SIMD"
3177   "@
3178    stp\\t%d1, %d2, %y0
3179    stp\\t%x1, %x2, %y0"
3180   [(set_attr "type" "neon_stp, store_16")]
3181 )
3182
3183 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3184 ;; dest vector.
3185
3186 (define_insn "*aarch64_combinez<mode>"
3187   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3188         (vec_concat:<VDBL>
3189           (match_operand:VDC 1 "general_operand" "w,?r,m")
3190           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3191   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3192   "@
3193    mov\\t%0.8b, %1.8b
3194    fmov\t%d0, %1
3195    ldr\\t%d0, %1"
3196   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3197    (set_attr "arch" "simd,fp,simd")]
3198 )
3199
3200 (define_insn "*aarch64_combinez_be<mode>"
3201   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3202         (vec_concat:<VDBL>
3203           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3204           (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3205   "TARGET_SIMD && BYTES_BIG_ENDIAN"
3206   "@
3207    mov\\t%0.8b, %1.8b
3208    fmov\t%d0, %1
3209    ldr\\t%d0, %1"
3210   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3211    (set_attr "arch" "simd,fp,simd")]
3212 )
3213
3214 (define_expand "aarch64_combine<mode>"
3215   [(match_operand:<VDBL> 0 "register_operand")
3216    (match_operand:VDC 1 "register_operand")
3217    (match_operand:VDC 2 "register_operand")]
3218   "TARGET_SIMD"
3219 {
3220   aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3221
3222   DONE;
3223 }
3224 )
3225
3226 (define_expand "@aarch64_simd_combine<mode>"
3227   [(match_operand:<VDBL> 0 "register_operand")
3228    (match_operand:VDC 1 "register_operand")
3229    (match_operand:VDC 2 "register_operand")]
3230   "TARGET_SIMD"
3231   {
3232     emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3233     emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3234     DONE;
3235   }
3236 [(set_attr "type" "multiple")]
3237 )
3238
3239 ;; <su><addsub>l<q>.
3240
3241 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3242  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3243        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3244                            (match_operand:VQW 1 "register_operand" "w")
3245                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3246                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3247                            (match_operand:VQW 2 "register_operand" "w")
3248                            (match_dup 3)))))]
3249   "TARGET_SIMD"
3250   "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3251   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3252 )
3253
3254 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3255  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3256        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3257                            (match_operand:VQW 1 "register_operand" "w")
3258                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3259                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3260                            (match_operand:VQW 2 "register_operand" "w")
3261                            (match_dup 3)))))]
3262   "TARGET_SIMD"
3263   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3264   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3265 )
3266
3267
3268 (define_expand "aarch64_saddl2<mode>"
3269   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3270    (match_operand:VQW 1 "register_operand" "w")
3271    (match_operand:VQW 2 "register_operand" "w")]
3272   "TARGET_SIMD"
3273 {
3274   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3275   emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3276                                                   operands[2], p));
3277   DONE;
3278 })
3279
3280 (define_expand "aarch64_uaddl2<mode>"
3281   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3282    (match_operand:VQW 1 "register_operand" "w")
3283    (match_operand:VQW 2 "register_operand" "w")]
3284   "TARGET_SIMD"
3285 {
3286   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3287   emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3288                                                   operands[2], p));
3289   DONE;
3290 })
3291
3292 (define_expand "aarch64_ssubl2<mode>"
3293   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3294    (match_operand:VQW 1 "register_operand" "w")
3295    (match_operand:VQW 2 "register_operand" "w")]
3296   "TARGET_SIMD"
3297 {
3298   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3299   emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3300                                                 operands[2], p));
3301   DONE;
3302 })
3303
3304 (define_expand "aarch64_usubl2<mode>"
3305   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3306    (match_operand:VQW 1 "register_operand" "w")
3307    (match_operand:VQW 2 "register_operand" "w")]
3308   "TARGET_SIMD"
3309 {
3310   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3311   emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3312                                                 operands[2], p));
3313   DONE;
3314 })
3315
3316 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3317  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3318        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3319                            (match_operand:VD_BHSI 1 "register_operand" "w"))
3320                        (ANY_EXTEND:<VWIDE>
3321                            (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3322   "TARGET_SIMD"
3323   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3324   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3325 )
3326
3327 ;; <su><addsub>w<q>.
3328
3329 (define_expand "widen_ssum<mode>3"
3330   [(set (match_operand:<VDBLW> 0 "register_operand" "")
3331         (plus:<VDBLW> (sign_extend:<VDBLW>
3332                         (match_operand:VQW 1 "register_operand" ""))
3333                       (match_operand:<VDBLW> 2 "register_operand" "")))]
3334   "TARGET_SIMD"
3335   {
3336     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3337     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3338
3339     emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3340                                                 operands[1], p));
3341     emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3342     DONE;
3343   }
3344 )
3345
3346 (define_expand "widen_ssum<mode>3"
3347   [(set (match_operand:<VWIDE> 0 "register_operand" "")
3348         (plus:<VWIDE> (sign_extend:<VWIDE>
3349                         (match_operand:VD_BHSI 1 "register_operand" ""))
3350                       (match_operand:<VWIDE> 2 "register_operand" "")))]
3351   "TARGET_SIMD"
3352 {
3353   emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3354   DONE;
3355 })
3356
3357 (define_expand "widen_usum<mode>3"
3358   [(set (match_operand:<VDBLW> 0 "register_operand" "")
3359         (plus:<VDBLW> (zero_extend:<VDBLW>
3360                         (match_operand:VQW 1 "register_operand" ""))
3361                       (match_operand:<VDBLW> 2 "register_operand" "")))]
3362   "TARGET_SIMD"
3363   {
3364     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3365     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3366
3367     emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3368                                                  operands[1], p));
3369     emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3370     DONE;
3371   }
3372 )
3373
3374 (define_expand "widen_usum<mode>3"
3375   [(set (match_operand:<VWIDE> 0 "register_operand" "")
3376         (plus:<VWIDE> (zero_extend:<VWIDE>
3377                         (match_operand:VD_BHSI 1 "register_operand" ""))
3378                       (match_operand:<VWIDE> 2 "register_operand" "")))]
3379   "TARGET_SIMD"
3380 {
3381   emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3382   DONE;
3383 })
3384
3385 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
3386   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3387         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3388           (ANY_EXTEND:<VWIDE>
3389             (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3390   "TARGET_SIMD"
3391   "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3392   [(set_attr "type" "neon_sub_widen")]
3393 )
3394
3395 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
3396   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3397         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3398           (ANY_EXTEND:<VWIDE>
3399             (vec_select:<VHALF>
3400               (match_operand:VQW 2 "register_operand" "w")
3401               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3402   "TARGET_SIMD"
3403   "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3404   [(set_attr "type" "neon_sub_widen")]
3405 )
3406
3407 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
3408   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3409         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3410           (ANY_EXTEND:<VWIDE>
3411             (vec_select:<VHALF>
3412               (match_operand:VQW 2 "register_operand" "w")
3413               (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3414   "TARGET_SIMD"
3415   "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3416   [(set_attr "type" "neon_sub_widen")]
3417 )
3418
3419 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
3420   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3421         (plus:<VWIDE>
3422           (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
3423           (match_operand:<VWIDE> 1 "register_operand" "w")))]
3424   "TARGET_SIMD"
3425   "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3426   [(set_attr "type" "neon_add_widen")]
3427 )
3428
3429 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
3430   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3431         (plus:<VWIDE>
3432           (ANY_EXTEND:<VWIDE>
3433             (vec_select:<VHALF>
3434               (match_operand:VQW 2 "register_operand" "w")
3435               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3436           (match_operand:<VWIDE> 1 "register_operand" "w")))]
3437   "TARGET_SIMD"
3438   "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3439   [(set_attr "type" "neon_add_widen")]
3440 )
3441
3442 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
3443   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3444         (plus:<VWIDE>
3445           (ANY_EXTEND:<VWIDE>
3446             (vec_select:<VHALF>
3447               (match_operand:VQW 2 "register_operand" "w")
3448               (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3449           (match_operand:<VWIDE> 1 "register_operand" "w")))]
3450   "TARGET_SIMD"
3451   "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3452   [(set_attr "type" "neon_add_widen")]
3453 )
3454
3455 (define_expand "aarch64_saddw2<mode>"
3456   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3457    (match_operand:<VWIDE> 1 "register_operand" "w")
3458    (match_operand:VQW 2 "register_operand" "w")]
3459   "TARGET_SIMD"
3460 {
3461   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3462   emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3463                                                 operands[2], p));
3464   DONE;
3465 })
3466
3467 (define_expand "aarch64_uaddw2<mode>"
3468   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3469    (match_operand:<VWIDE> 1 "register_operand" "w")
3470    (match_operand:VQW 2 "register_operand" "w")]
3471   "TARGET_SIMD"
3472 {
3473   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3474   emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3475                                                 operands[2], p));
3476   DONE;
3477 })
3478
3479
3480 (define_expand "aarch64_ssubw2<mode>"
3481   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3482    (match_operand:<VWIDE> 1 "register_operand" "w")
3483    (match_operand:VQW 2 "register_operand" "w")]
3484   "TARGET_SIMD"
3485 {
3486   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3487   emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3488                                                 operands[2], p));
3489   DONE;
3490 })
3491
3492 (define_expand "aarch64_usubw2<mode>"
3493   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3494    (match_operand:<VWIDE> 1 "register_operand" "w")
3495    (match_operand:VQW 2 "register_operand" "w")]
3496   "TARGET_SIMD"
3497 {
3498   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3499   emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3500                                                 operands[2], p));
3501   DONE;
3502 })
3503
3504 ;; <su><r>h<addsub>.
3505
3506 (define_expand "<u>avg<mode>3_floor"
3507   [(set (match_operand:VDQ_BHSI 0 "register_operand")
3508         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3509                           (match_operand:VDQ_BHSI 2 "register_operand")]
3510                          HADD))]
3511   "TARGET_SIMD"
3512 )
3513
3514 (define_expand "<u>avg<mode>3_ceil"
3515   [(set (match_operand:VDQ_BHSI 0 "register_operand")
3516         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3517                           (match_operand:VDQ_BHSI 2 "register_operand")]
3518                          RHADD))]
3519   "TARGET_SIMD"
3520 )
3521
3522 (define_insn "aarch64_<sur>h<addsub><mode>"
3523   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3524         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3525                       (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3526                      HADDSUB))]
3527   "TARGET_SIMD"
3528   "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3529   [(set_attr "type" "neon_<addsub>_halve<q>")]
3530 )
3531
3532 ;; <r><addsub>hn<q>.
3533
3534 (define_insn "aarch64_<sur><addsub>hn<mode>"
3535   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3536         (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3537                             (match_operand:VQN 2 "register_operand" "w")]
3538                            ADDSUBHN))]
3539   "TARGET_SIMD"
3540   "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3541   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3542 )
3543
3544 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3545   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3546         (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3547                              (match_operand:VQN 2 "register_operand" "w")
3548                              (match_operand:VQN 3 "register_operand" "w")]
3549                             ADDSUBHN2))]
3550   "TARGET_SIMD"
3551   "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3552   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3553 )
3554
3555 ;; pmul.
3556
3557 (define_insn "aarch64_pmul<mode>"
3558   [(set (match_operand:VB 0 "register_operand" "=w")
3559         (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3560                     (match_operand:VB 2 "register_operand" "w")]
3561                    UNSPEC_PMUL))]
3562  "TARGET_SIMD"
3563  "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3564   [(set_attr "type" "neon_mul_<Vetype><q>")]
3565 )
3566
3567 ;; fmulx.
3568
3569 (define_insn "aarch64_fmulx<mode>"
3570   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3571         (unspec:VHSDF_HSDF
3572           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3573            (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3574            UNSPEC_FMULX))]
3575  "TARGET_SIMD"
3576  "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3577  [(set_attr "type" "neon_fp_mul_<stype>")]
3578 )
3579
3580 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3581
3582 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3583   [(set (match_operand:VDQSF 0 "register_operand" "=w")
3584         (unspec:VDQSF
3585          [(match_operand:VDQSF 1 "register_operand" "w")
3586           (vec_duplicate:VDQSF
3587            (vec_select:<VEL>
3588             (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3589             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3590          UNSPEC_FMULX))]
3591   "TARGET_SIMD"
3592   {
3593     operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3594     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3595   }
3596   [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3597 )
3598
3599 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3600
3601 (define_insn "*aarch64_mulx_elt<mode>"
3602   [(set (match_operand:VDQF 0 "register_operand" "=w")
3603         (unspec:VDQF
3604          [(match_operand:VDQF 1 "register_operand" "w")
3605           (vec_duplicate:VDQF
3606            (vec_select:<VEL>
3607             (match_operand:VDQF 2 "register_operand" "w")
3608             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3609          UNSPEC_FMULX))]
3610   "TARGET_SIMD"
3611   {
3612     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3613     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3614   }
3615   [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3616 )
3617
3618 ;; vmulxq_lane
3619
3620 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3621   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3622         (unspec:VHSDF
3623          [(match_operand:VHSDF 1 "register_operand" "w")
3624           (vec_duplicate:VHSDF
3625             (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3626          UNSPEC_FMULX))]
3627   "TARGET_SIMD"
3628   "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3629   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3630 )
3631
3632 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3633 ;; vmulxd_lane_f64 ==  vmulx_lane_f64
3634 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3635
3636 (define_insn "*aarch64_vgetfmulx<mode>"
3637   [(set (match_operand:<VEL> 0 "register_operand" "=w")
3638         (unspec:<VEL>
3639          [(match_operand:<VEL> 1 "register_operand" "w")
3640           (vec_select:<VEL>
3641            (match_operand:VDQF 2 "register_operand" "w")
3642             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3643          UNSPEC_FMULX))]
3644   "TARGET_SIMD"
3645   {
3646     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3647     return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3648   }
3649   [(set_attr "type" "fmul<Vetype>")]
3650 )
3651 ;; <su>q<addsub>
3652
3653 (define_insn "aarch64_<su_optab><optab><mode>"
3654   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3655         (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3656                           (match_operand:VSDQ_I 2 "register_operand" "w")))]
3657   "TARGET_SIMD"
3658   "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3659   [(set_attr "type" "neon_<optab><q>")]
3660 )
3661
3662 ;; suqadd and usqadd
3663
3664 (define_insn "aarch64_<sur>qadd<mode>"
3665   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3666         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3667                         (match_operand:VSDQ_I 2 "register_operand" "w")]
3668                        USSUQADD))]
3669   "TARGET_SIMD"
3670   "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3671   [(set_attr "type" "neon_qadd<q>")]
3672 )
3673
3674 ;; sqmovun
3675
3676 (define_insn "aarch64_sqmovun<mode>"
3677   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3678         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3679                             UNSPEC_SQXTUN))]
3680    "TARGET_SIMD"
3681    "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3682    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3683 )
3684
3685 ;; sqmovn and uqmovn
3686
3687 (define_insn "aarch64_<sur>qmovn<mode>"
3688   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3689         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3690                             SUQMOVN))]
3691   "TARGET_SIMD"
3692   "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3693    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3694 )
3695
3696 ;; <su>q<absneg>
3697
3698 (define_insn "aarch64_s<optab><mode>"
3699   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3700         (UNQOPS:VSDQ_I
3701           (match_operand:VSDQ_I 1 "register_operand" "w")))]
3702   "TARGET_SIMD"
3703   "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3704   [(set_attr "type" "neon_<optab><q>")]
3705 )
3706
3707 ;; sq<r>dmulh.
3708
3709 (define_insn "aarch64_sq<r>dmulh<mode>"
3710   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3711         (unspec:VSDQ_HSI
3712           [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3713            (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3714          VQDMULH))]
3715   "TARGET_SIMD"
3716   "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3717   [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3718 )
3719
3720 ;; sq<r>dmulh_lane
3721
3722 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3723   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3724         (unspec:VDQHS
3725           [(match_operand:VDQHS 1 "register_operand" "w")
3726            (vec_select:<VEL>
3727              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3728              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3729          VQDMULH))]
3730   "TARGET_SIMD"
3731   "*
3732    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3733    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3734   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3735 )
3736
3737 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3738   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3739         (unspec:VDQHS
3740           [(match_operand:VDQHS 1 "register_operand" "w")
3741            (vec_select:<VEL>
3742              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3743              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3744          VQDMULH))]
3745   "TARGET_SIMD"
3746   "*
3747    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3748    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3749   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3750 )
3751
3752 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3753   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3754         (unspec:SD_HSI
3755           [(match_operand:SD_HSI 1 "register_operand" "w")
3756            (vec_select:<VEL>
3757              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3758              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3759          VQDMULH))]
3760   "TARGET_SIMD"
3761   "*
3762    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3763    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3764   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3765 )
3766
3767 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3768   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3769         (unspec:SD_HSI
3770           [(match_operand:SD_HSI 1 "register_operand" "w")
3771            (vec_select:<VEL>
3772              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3773              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3774          VQDMULH))]
3775   "TARGET_SIMD"
3776   "*
3777    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3778    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3779   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3780 )
3781
3782 ;; sqrdml[as]h.
3783
3784 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3785   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3786         (unspec:VSDQ_HSI
3787           [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3788            (match_operand:VSDQ_HSI 2 "register_operand" "w")
3789            (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3790           SQRDMLH_AS))]
3791    "TARGET_SIMD_RDMA"
3792    "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3793    [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3794 )
3795
3796 ;; sqrdml[as]h_lane.
3797
3798 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3799   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3800         (unspec:VDQHS
3801           [(match_operand:VDQHS 1 "register_operand" "0")
3802            (match_operand:VDQHS 2 "register_operand" "w")
3803            (vec_select:<VEL>
3804              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3805              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3806           SQRDMLH_AS))]
3807    "TARGET_SIMD_RDMA"
3808    {
3809      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3810      return
3811       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3812    }
3813    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3814 )
3815
3816 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3817   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3818         (unspec:SD_HSI
3819           [(match_operand:SD_HSI 1 "register_operand" "0")
3820            (match_operand:SD_HSI 2 "register_operand" "w")
3821            (vec_select:<VEL>
3822              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3823              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3824           SQRDMLH_AS))]
3825    "TARGET_SIMD_RDMA"
3826    {
3827      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3828      return
3829       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3830    }
3831    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3832 )
3833
3834 ;; sqrdml[as]h_laneq.
3835
3836 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3837   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3838         (unspec:VDQHS
3839           [(match_operand:VDQHS 1 "register_operand" "0")
3840            (match_operand:VDQHS 2 "register_operand" "w")
3841            (vec_select:<VEL>
3842              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3843              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3844           SQRDMLH_AS))]
3845    "TARGET_SIMD_RDMA"
3846    {
3847      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3848      return
3849       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3850    }
3851    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3852 )
3853
3854 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3855   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3856         (unspec:SD_HSI
3857           [(match_operand:SD_HSI 1 "register_operand" "0")
3858            (match_operand:SD_HSI 2 "register_operand" "w")
3859            (vec_select:<VEL>
3860              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3861              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3862           SQRDMLH_AS))]
3863    "TARGET_SIMD_RDMA"
3864    {
3865      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3866      return
3867       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3868    }
3869    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3870 )
3871
3872 ;; vqdml[sa]l
3873
3874 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3875   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3876         (SBINQOPS:<VWIDE>
3877           (match_operand:<VWIDE> 1 "register_operand" "0")
3878           (ss_ashift:<VWIDE>
3879               (mult:<VWIDE>
3880                 (sign_extend:<VWIDE>
3881                       (match_operand:VSD_HSI 2 "register_operand" "w"))
3882                 (sign_extend:<VWIDE>
3883                       (match_operand:VSD_HSI 3 "register_operand" "w")))
3884               (const_int 1))))]
3885   "TARGET_SIMD"
3886   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3887   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3888 )
3889
3890 ;; vqdml[sa]l_lane
3891
3892 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3893   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3894         (SBINQOPS:<VWIDE>
3895           (match_operand:<VWIDE> 1 "register_operand" "0")
3896           (ss_ashift:<VWIDE>
3897             (mult:<VWIDE>
3898               (sign_extend:<VWIDE>
3899                 (match_operand:VD_HSI 2 "register_operand" "w"))
3900               (sign_extend:<VWIDE>
3901                 (vec_duplicate:VD_HSI
3902                   (vec_select:<VEL>
3903                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3904                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3905               ))
3906             (const_int 1))))]
3907   "TARGET_SIMD"
3908   {
3909     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3910     return
3911       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3912   }
3913   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3914 )
3915
3916 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3917   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3918         (SBINQOPS:<VWIDE>
3919           (match_operand:<VWIDE> 1 "register_operand" "0")
3920           (ss_ashift:<VWIDE>
3921             (mult:<VWIDE>
3922               (sign_extend:<VWIDE>
3923                 (match_operand:VD_HSI 2 "register_operand" "w"))
3924               (sign_extend:<VWIDE>
3925                 (vec_duplicate:VD_HSI
3926                   (vec_select:<VEL>
3927                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3928                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3929               ))
3930             (const_int 1))))]
3931   "TARGET_SIMD"
3932   {
3933     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3934     return
3935       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3936   }
3937   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3938 )
3939
3940 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3941   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3942         (SBINQOPS:<VWIDE>
3943           (match_operand:<VWIDE> 1 "register_operand" "0")
3944           (ss_ashift:<VWIDE>
3945             (mult:<VWIDE>
3946               (sign_extend:<VWIDE>
3947                 (match_operand:SD_HSI 2 "register_operand" "w"))
3948               (sign_extend:<VWIDE>
3949                 (vec_select:<VEL>
3950                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3951                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3952               )
3953             (const_int 1))))]
3954   "TARGET_SIMD"
3955   {
3956     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3957     return
3958       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3959   }
3960   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3961 )
3962
3963 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3964   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3965         (SBINQOPS:<VWIDE>
3966           (match_operand:<VWIDE> 1 "register_operand" "0")
3967           (ss_ashift:<VWIDE>
3968             (mult:<VWIDE>
3969               (sign_extend:<VWIDE>
3970                 (match_operand:SD_HSI 2 "register_operand" "w"))
3971               (sign_extend:<VWIDE>
3972                 (vec_select:<VEL>
3973                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3974                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3975               )
3976             (const_int 1))))]
3977   "TARGET_SIMD"
3978   {
3979     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3980     return
3981       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3982   }
3983   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3984 )
3985
3986 ;; vqdml[sa]l_n
3987
3988 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3989   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3990         (SBINQOPS:<VWIDE>
3991           (match_operand:<VWIDE> 1 "register_operand" "0")
3992           (ss_ashift:<VWIDE>
3993               (mult:<VWIDE>
3994                 (sign_extend:<VWIDE>
3995                       (match_operand:VD_HSI 2 "register_operand" "w"))
3996                 (sign_extend:<VWIDE>
3997                   (vec_duplicate:VD_HSI
3998                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3999               (const_int 1))))]
4000   "TARGET_SIMD"
4001   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4002   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4003 )
4004
4005 ;; sqdml[as]l2
4006
4007 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
4008   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4009         (SBINQOPS:<VWIDE>
4010          (match_operand:<VWIDE> 1 "register_operand" "0")
4011          (ss_ashift:<VWIDE>
4012              (mult:<VWIDE>
4013                (sign_extend:<VWIDE>
4014                  (vec_select:<VHALF>
4015                      (match_operand:VQ_HSI 2 "register_operand" "w")
4016                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4017                (sign_extend:<VWIDE>
4018                  (vec_select:<VHALF>
4019                      (match_operand:VQ_HSI 3 "register_operand" "w")
4020                      (match_dup 4))))
4021              (const_int 1))))]
4022   "TARGET_SIMD"
4023   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4024   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4025 )
4026
4027 (define_expand "aarch64_sqdmlal2<mode>"
4028   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4029    (match_operand:<VWIDE> 1 "register_operand" "w")
4030    (match_operand:VQ_HSI 2 "register_operand" "w")
4031    (match_operand:VQ_HSI 3 "register_operand" "w")]
4032   "TARGET_SIMD"
4033 {
4034   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4035   emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
4036                                                   operands[2], operands[3], p));
4037   DONE;
4038 })
4039
4040 (define_expand "aarch64_sqdmlsl2<mode>"
4041   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4042    (match_operand:<VWIDE> 1 "register_operand" "w")
4043    (match_operand:VQ_HSI 2 "register_operand" "w")
4044    (match_operand:VQ_HSI 3 "register_operand" "w")]
4045   "TARGET_SIMD"
4046 {
4047   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4048   emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
4049                                                   operands[2], operands[3], p));
4050   DONE;
4051 })
4052
4053 ;; vqdml[sa]l2_lane
4054
4055 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
4056   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4057         (SBINQOPS:<VWIDE>
4058           (match_operand:<VWIDE> 1 "register_operand" "0")
4059           (ss_ashift:<VWIDE>
4060               (mult:<VWIDE>
4061                 (sign_extend:<VWIDE>
4062                   (vec_select:<VHALF>
4063                     (match_operand:VQ_HSI 2 "register_operand" "w")
4064                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4065                 (sign_extend:<VWIDE>
4066                   (vec_duplicate:<VHALF>
4067                     (vec_select:<VEL>
4068                       (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4069                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4070                     ))))
4071               (const_int 1))))]
4072   "TARGET_SIMD"
4073   {
4074     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4075     return
4076      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4077   }
4078   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4079 )
4080
4081 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
4082   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4083         (SBINQOPS:<VWIDE>
4084           (match_operand:<VWIDE> 1 "register_operand" "0")
4085           (ss_ashift:<VWIDE>
4086               (mult:<VWIDE>
4087                 (sign_extend:<VWIDE>
4088                   (vec_select:<VHALF>
4089                     (match_operand:VQ_HSI 2 "register_operand" "w")
4090                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4091                 (sign_extend:<VWIDE>
4092                   (vec_duplicate:<VHALF>
4093                     (vec_select:<VEL>
4094                       (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4095                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4096                     ))))
4097               (const_int 1))))]
4098   "TARGET_SIMD"
4099   {
4100     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4101     return
4102      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4103   }
4104   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4105 )
4106
4107 (define_expand "aarch64_sqdmlal2_lane<mode>"
4108   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4109    (match_operand:<VWIDE> 1 "register_operand" "w")
4110    (match_operand:VQ_HSI 2 "register_operand" "w")
4111    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4112    (match_operand:SI 4 "immediate_operand" "i")]
4113   "TARGET_SIMD"
4114 {
4115   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4116   emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
4117                                                        operands[2], operands[3],
4118                                                        operands[4], p));
4119   DONE;
4120 })
4121
4122 (define_expand "aarch64_sqdmlal2_laneq<mode>"
4123   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4124    (match_operand:<VWIDE> 1 "register_operand" "w")
4125    (match_operand:VQ_HSI 2 "register_operand" "w")
4126    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4127    (match_operand:SI 4 "immediate_operand" "i")]
4128   "TARGET_SIMD"
4129 {
4130   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4131   emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
4132                                                        operands[2], operands[3],
4133                                                        operands[4], p));
4134   DONE;
4135 })
4136
4137 (define_expand "aarch64_sqdmlsl2_lane<mode>"
4138   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4139    (match_operand:<VWIDE> 1 "register_operand" "w")
4140    (match_operand:VQ_HSI 2 "register_operand" "w")
4141    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4142    (match_operand:SI 4 "immediate_operand" "i")]
4143   "TARGET_SIMD"
4144 {
4145   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4146   emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
4147                                                        operands[2], operands[3],
4148                                                        operands[4], p));
4149   DONE;
4150 })
4151
4152 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
4153   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4154    (match_operand:<VWIDE> 1 "register_operand" "w")
4155    (match_operand:VQ_HSI 2 "register_operand" "w")
4156    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4157    (match_operand:SI 4 "immediate_operand" "i")]
4158   "TARGET_SIMD"
4159 {
4160   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4161   emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4162                                                        operands[2], operands[3],
4163                                                        operands[4], p));
4164   DONE;
4165 })
4166
4167 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4168   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4169         (SBINQOPS:<VWIDE>
4170           (match_operand:<VWIDE> 1 "register_operand" "0")
4171           (ss_ashift:<VWIDE>
4172             (mult:<VWIDE>
4173               (sign_extend:<VWIDE>
4174                 (vec_select:<VHALF>
4175                   (match_operand:VQ_HSI 2 "register_operand" "w")
4176                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4177               (sign_extend:<VWIDE>
4178                 (vec_duplicate:<VHALF>
4179                   (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4180             (const_int 1))))]
4181   "TARGET_SIMD"
4182   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4183   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4184 )
4185
4186 (define_expand "aarch64_sqdmlal2_n<mode>"
4187   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4188    (match_operand:<VWIDE> 1 "register_operand" "w")
4189    (match_operand:VQ_HSI 2 "register_operand" "w")
4190    (match_operand:<VEL> 3 "register_operand" "w")]
4191   "TARGET_SIMD"
4192 {
4193   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4194   emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4195                                                     operands[2], operands[3],
4196                                                     p));
4197   DONE;
4198 })
4199
4200 (define_expand "aarch64_sqdmlsl2_n<mode>"
4201   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4202    (match_operand:<VWIDE> 1 "register_operand" "w")
4203    (match_operand:VQ_HSI 2 "register_operand" "w")
4204    (match_operand:<VEL> 3 "register_operand" "w")]
4205   "TARGET_SIMD"
4206 {
4207   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4208   emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4209                                                     operands[2], operands[3],
4210                                                     p));
4211   DONE;
4212 })
4213
4214 ;; vqdmull
4215
4216 (define_insn "aarch64_sqdmull<mode>"
4217   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4218         (ss_ashift:<VWIDE>
4219              (mult:<VWIDE>
4220                (sign_extend:<VWIDE>
4221                      (match_operand:VSD_HSI 1 "register_operand" "w"))
4222                (sign_extend:<VWIDE>
4223                      (match_operand:VSD_HSI 2 "register_operand" "w")))
4224              (const_int 1)))]
4225   "TARGET_SIMD"
4226   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4227   [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4228 )
4229
4230 ;; vqdmull_lane
4231
4232 (define_insn "aarch64_sqdmull_lane<mode>"
4233   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4234         (ss_ashift:<VWIDE>
4235              (mult:<VWIDE>
4236                (sign_extend:<VWIDE>
4237                  (match_operand:VD_HSI 1 "register_operand" "w"))
4238                (sign_extend:<VWIDE>
4239                  (vec_duplicate:VD_HSI
4240                    (vec_select:<VEL>
4241                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4242                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4243                ))
4244              (const_int 1)))]
4245   "TARGET_SIMD"
4246   {
4247     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4248     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4249   }
4250   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4251 )
4252
4253 (define_insn "aarch64_sqdmull_laneq<mode>"
4254   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4255         (ss_ashift:<VWIDE>
4256              (mult:<VWIDE>
4257                (sign_extend:<VWIDE>
4258                  (match_operand:VD_HSI 1 "register_operand" "w"))
4259                (sign_extend:<VWIDE>
4260                  (vec_duplicate:VD_HSI
4261                    (vec_select:<VEL>
4262                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4263                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4264                ))
4265              (const_int 1)))]
4266   "TARGET_SIMD"
4267   {
4268     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4269     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4270   }
4271   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4272 )
4273
4274 (define_insn "aarch64_sqdmull_lane<mode>"
4275   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4276         (ss_ashift:<VWIDE>
4277              (mult:<VWIDE>
4278                (sign_extend:<VWIDE>
4279                  (match_operand:SD_HSI 1 "register_operand" "w"))
4280                (sign_extend:<VWIDE>
4281                  (vec_select:<VEL>
4282                    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4283                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4284                ))
4285              (const_int 1)))]
4286   "TARGET_SIMD"
4287   {
4288     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4289     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4290   }
4291   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4292 )
4293
4294 (define_insn "aarch64_sqdmull_laneq<mode>"
4295   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4296         (ss_ashift:<VWIDE>
4297              (mult:<VWIDE>
4298                (sign_extend:<VWIDE>
4299                  (match_operand:SD_HSI 1 "register_operand" "w"))
4300                (sign_extend:<VWIDE>
4301                  (vec_select:<VEL>
4302                    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4303                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4304                ))
4305              (const_int 1)))]
4306   "TARGET_SIMD"
4307   {
4308     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4309     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4310   }
4311   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4312 )
4313
4314 ;; vqdmull_n
4315
4316 (define_insn "aarch64_sqdmull_n<mode>"
4317   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4318         (ss_ashift:<VWIDE>
4319              (mult:<VWIDE>
4320                (sign_extend:<VWIDE>
4321                  (match_operand:VD_HSI 1 "register_operand" "w"))
4322                (sign_extend:<VWIDE>
4323                  (vec_duplicate:VD_HSI
4324                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4325                )
4326              (const_int 1)))]
4327   "TARGET_SIMD"
4328   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4329   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4330 )
4331
4332 ;; vqdmull2
4333
4334
4335
4336 (define_insn "aarch64_sqdmull2<mode>_internal"
4337   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4338         (ss_ashift:<VWIDE>
4339              (mult:<VWIDE>
4340                (sign_extend:<VWIDE>
4341                  (vec_select:<VHALF>
4342                    (match_operand:VQ_HSI 1 "register_operand" "w")
4343                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4344                (sign_extend:<VWIDE>
4345                  (vec_select:<VHALF>
4346                    (match_operand:VQ_HSI 2 "register_operand" "w")
4347                    (match_dup 3)))
4348                )
4349              (const_int 1)))]
4350   "TARGET_SIMD"
4351   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4352   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4353 )
4354
4355 (define_expand "aarch64_sqdmull2<mode>"
4356   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4357    (match_operand:VQ_HSI 1 "register_operand" "w")
4358    (match_operand:VQ_HSI 2 "register_operand" "w")]
4359   "TARGET_SIMD"
4360 {
4361   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4362   emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4363                                                   operands[2], p));
4364   DONE;
4365 })
4366
4367 ;; vqdmull2_lane
4368
4369 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4370   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4371         (ss_ashift:<VWIDE>
4372              (mult:<VWIDE>
4373                (sign_extend:<VWIDE>
4374                  (vec_select:<VHALF>
4375                    (match_operand:VQ_HSI 1 "register_operand" "w")
4376                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4377                (sign_extend:<VWIDE>
4378                  (vec_duplicate:<VHALF>
4379                    (vec_select:<VEL>
4380                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4381                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4382                ))
4383              (const_int 1)))]
4384   "TARGET_SIMD"
4385   {
4386     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4387     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4388   }
4389   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4390 )
4391
4392 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4393   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4394         (ss_ashift:<VWIDE>
4395              (mult:<VWIDE>
4396                (sign_extend:<VWIDE>
4397                  (vec_select:<VHALF>
4398                    (match_operand:VQ_HSI 1 "register_operand" "w")
4399                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4400                (sign_extend:<VWIDE>
4401                  (vec_duplicate:<VHALF>
4402                    (vec_select:<VEL>
4403                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4404                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4405                ))
4406              (const_int 1)))]
4407   "TARGET_SIMD"
4408   {
4409     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4410     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4411   }
4412   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4413 )
4414
4415 (define_expand "aarch64_sqdmull2_lane<mode>"
4416   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4417    (match_operand:VQ_HSI 1 "register_operand" "w")
4418    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4419    (match_operand:SI 3 "immediate_operand" "i")]
4420   "TARGET_SIMD"
4421 {
4422   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4423   emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4424                                                        operands[2], operands[3],
4425                                                        p));
4426   DONE;
4427 })
4428
4429 (define_expand "aarch64_sqdmull2_laneq<mode>"
4430   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4431    (match_operand:VQ_HSI 1 "register_operand" "w")
4432    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4433    (match_operand:SI 3 "immediate_operand" "i")]
4434   "TARGET_SIMD"
4435 {
4436   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4437   emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4438                                                        operands[2], operands[3],
4439                                                        p));
4440   DONE;
4441 })
4442
4443 ;; vqdmull2_n
4444
4445 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4446   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4447         (ss_ashift:<VWIDE>
4448              (mult:<VWIDE>
4449                (sign_extend:<VWIDE>
4450                  (vec_select:<VHALF>
4451                    (match_operand:VQ_HSI 1 "register_operand" "w")
4452                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4453                (sign_extend:<VWIDE>
4454                  (vec_duplicate:<VHALF>
4455                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4456                )
4457              (const_int 1)))]
4458   "TARGET_SIMD"
4459   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4460   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4461 )
4462
4463 (define_expand "aarch64_sqdmull2_n<mode>"
4464   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4465    (match_operand:VQ_HSI 1 "register_operand" "w")
4466    (match_operand:<VEL> 2 "register_operand" "w")]
4467   "TARGET_SIMD"
4468 {
4469   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4470   emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4471                                                     operands[2], p));
4472   DONE;
4473 })
4474
4475 ;; vshl
4476
4477 (define_insn "aarch64_<sur>shl<mode>"
4478   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4479         (unspec:VSDQ_I_DI
4480           [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4481            (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4482          VSHL))]
4483   "TARGET_SIMD"
4484   "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4485   [(set_attr "type" "neon_shift_reg<q>")]
4486 )
4487
4488
4489 ;; vqshl
4490
4491 (define_insn "aarch64_<sur>q<r>shl<mode>"
4492   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4493         (unspec:VSDQ_I
4494           [(match_operand:VSDQ_I 1 "register_operand" "w")
4495            (match_operand:VSDQ_I 2 "register_operand" "w")]
4496          VQSHL))]
4497   "TARGET_SIMD"
4498   "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4499   [(set_attr "type" "neon_sat_shift_reg<q>")]
4500 )
4501
4502 ;; vshll_n
4503
4504 (define_insn "aarch64_<sur>shll_n<mode>"
4505   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4506         (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4507                          (match_operand:SI 2
4508                            "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4509                          VSHLL))]
4510   "TARGET_SIMD"
4511   {
4512     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4513       return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4514     else
4515       return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4516   }
4517   [(set_attr "type" "neon_shift_imm_long")]
4518 )
4519
4520 ;; vshll_high_n
4521
4522 (define_insn "aarch64_<sur>shll2_n<mode>"
4523   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4524         (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4525                          (match_operand:SI 2 "immediate_operand" "i")]
4526                          VSHLL))]
4527   "TARGET_SIMD"
4528   {
4529     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4530       return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4531     else
4532       return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4533   }
4534   [(set_attr "type" "neon_shift_imm_long")]
4535 )
4536
4537 ;; vrshr_n
4538
4539 (define_insn "aarch64_<sur>shr_n<mode>"
4540   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4541         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4542                            (match_operand:SI 2
4543                              "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4544                           VRSHR_N))]
4545   "TARGET_SIMD"
4546   "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4547   [(set_attr "type" "neon_sat_shift_imm<q>")]
4548 )
4549
4550 ;; v(r)sra_n
4551
4552 (define_insn "aarch64_<sur>sra_n<mode>"
4553   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4554         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4555                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4556                        (match_operand:SI 3
4557                          "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4558                       VSRA))]
4559   "TARGET_SIMD"
4560   "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4561   [(set_attr "type" "neon_shift_acc<q>")]
4562 )
4563
4564 ;; vs<lr>i_n
4565
4566 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4567   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4568         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4569                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4570                        (match_operand:SI 3
4571                          "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4572                       VSLRI))]
4573   "TARGET_SIMD"
4574   "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4575   [(set_attr "type" "neon_shift_imm<q>")]
4576 )
4577
4578 ;; vqshl(u)
4579
4580 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4581   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4582         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4583                        (match_operand:SI 2
4584                          "aarch64_simd_shift_imm_<ve_mode>" "i")]
4585                       VQSHL_N))]
4586   "TARGET_SIMD"
4587   "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4588   [(set_attr "type" "neon_sat_shift_imm<q>")]
4589 )
4590
4591
4592 ;; vq(r)shr(u)n_n
4593
4594 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4595   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4596         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4597                             (match_operand:SI 2
4598                               "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4599                            VQSHRN_N))]
4600   "TARGET_SIMD"
4601   "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4602   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4603 )
4604
4605
4606 ;; cm(eq|ge|gt|lt|le)
4607 ;; Note, we have constraints for Dz and Z as different expanders
4608 ;; have different ideas of what should be passed to this pattern.
4609
4610 (define_insn "aarch64_cm<optab><mode>"
4611   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4612         (neg:<V_INT_EQUIV>
4613           (COMPARISONS:<V_INT_EQUIV>
4614             (match_operand:VDQ_I 1 "register_operand" "w,w")
4615             (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4616           )))]
4617   "TARGET_SIMD"
4618   "@
4619   cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4620   cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4621   [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4622 )
4623
4624 (define_insn_and_split "aarch64_cm<optab>di"
4625   [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4626         (neg:DI
4627           (COMPARISONS:DI
4628             (match_operand:DI 1 "register_operand" "w,w,r")
4629             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4630           )))
4631      (clobber (reg:CC CC_REGNUM))]
4632   "TARGET_SIMD"
4633   "#"
4634   "&& reload_completed"
4635   [(set (match_operand:DI 0 "register_operand")
4636         (neg:DI
4637           (COMPARISONS:DI
4638             (match_operand:DI 1 "register_operand")
4639             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4640           )))]
4641   {
4642     /* If we are in the general purpose register file,
4643        we split to a sequence of comparison and store.  */
4644     if (GP_REGNUM_P (REGNO (operands[0]))
4645         && GP_REGNUM_P (REGNO (operands[1])))
4646       {
4647         machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4648         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4649         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4650         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4651         DONE;
4652       }
4653     /* Otherwise, we expand to a similar pattern which does not
4654        clobber CC_REGNUM.  */
4655   }
4656   [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4657 )
4658
4659 (define_insn "*aarch64_cm<optab>di"
4660   [(set (match_operand:DI 0 "register_operand" "=w,w")
4661         (neg:DI
4662           (COMPARISONS:DI
4663             (match_operand:DI 1 "register_operand" "w,w")
4664             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4665           )))]
4666   "TARGET_SIMD && reload_completed"
4667   "@
4668   cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4669   cm<optab>\t%d0, %d1, #0"
4670   [(set_attr "type" "neon_compare, neon_compare_zero")]
4671 )
4672
4673 ;; cm(hs|hi)
4674
4675 (define_insn "aarch64_cm<optab><mode>"
4676   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4677         (neg:<V_INT_EQUIV>
4678           (UCOMPARISONS:<V_INT_EQUIV>
4679             (match_operand:VDQ_I 1 "register_operand" "w")
4680             (match_operand:VDQ_I 2 "register_operand" "w")
4681           )))]
4682   "TARGET_SIMD"
4683   "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4684   [(set_attr "type" "neon_compare<q>")]
4685 )
4686
4687 (define_insn_and_split "aarch64_cm<optab>di"
4688   [(set (match_operand:DI 0 "register_operand" "=w,r")
4689         (neg:DI
4690           (UCOMPARISONS:DI
4691             (match_operand:DI 1 "register_operand" "w,r")
4692             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4693           )))
4694     (clobber (reg:CC CC_REGNUM))]
4695   "TARGET_SIMD"
4696   "#"
4697   "&& reload_completed"
4698   [(set (match_operand:DI 0 "register_operand")
4699         (neg:DI
4700           (UCOMPARISONS:DI
4701             (match_operand:DI 1 "register_operand")
4702             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4703           )))]
4704   {
4705     /* If we are in the general purpose register file,
4706        we split to a sequence of comparison and store.  */
4707     if (GP_REGNUM_P (REGNO (operands[0]))
4708         && GP_REGNUM_P (REGNO (operands[1])))
4709       {
4710         machine_mode mode = CCmode;
4711         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4712         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4713         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4714         DONE;
4715       }
4716     /* Otherwise, we expand to a similar pattern which does not
4717        clobber CC_REGNUM.  */
4718   }
4719   [(set_attr "type" "neon_compare,multiple")]
4720 )
4721
4722 (define_insn "*aarch64_cm<optab>di"
4723   [(set (match_operand:DI 0 "register_operand" "=w")
4724         (neg:DI
4725           (UCOMPARISONS:DI
4726             (match_operand:DI 1 "register_operand" "w")
4727             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4728           )))]
4729   "TARGET_SIMD && reload_completed"
4730   "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4731   [(set_attr "type" "neon_compare")]
4732 )
4733
4734 ;; cmtst
4735
4736 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4737 ;; we don't have any insns using ne, and aarch64_vcond outputs
4738 ;; not (neg (eq (and x y) 0))
4739 ;; which is rewritten by simplify_rtx as
4740 ;; plus (eq (and x y) 0) -1.
4741
4742 (define_insn "aarch64_cmtst<mode>"
4743   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4744         (plus:<V_INT_EQUIV>
4745           (eq:<V_INT_EQUIV>
4746             (and:VDQ_I
4747               (match_operand:VDQ_I 1 "register_operand" "w")
4748               (match_operand:VDQ_I 2 "register_operand" "w"))
4749             (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4750           (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4751   ]
4752   "TARGET_SIMD"
4753   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4754   [(set_attr "type" "neon_tst<q>")]
4755 )
4756
4757 (define_insn_and_split "aarch64_cmtstdi"
4758   [(set (match_operand:DI 0 "register_operand" "=w,r")
4759         (neg:DI
4760           (ne:DI
4761             (and:DI
4762               (match_operand:DI 1 "register_operand" "w,r")
4763               (match_operand:DI 2 "register_operand" "w,r"))
4764             (const_int 0))))
4765     (clobber (reg:CC CC_REGNUM))]
4766   "TARGET_SIMD"
4767   "#"
4768   "&& reload_completed"
4769   [(set (match_operand:DI 0 "register_operand")
4770         (neg:DI
4771           (ne:DI
4772             (and:DI
4773               (match_operand:DI 1 "register_operand")
4774               (match_operand:DI 2 "register_operand"))
4775             (const_int 0))))]
4776   {
4777     /* If we are in the general purpose register file,
4778        we split to a sequence of comparison and store.  */
4779     if (GP_REGNUM_P (REGNO (operands[0]))
4780         && GP_REGNUM_P (REGNO (operands[1])))
4781       {
4782         rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4783         machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4784         rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4785         rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4786         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4787         DONE;
4788       }
4789     /* Otherwise, we expand to a similar pattern which does not
4790        clobber CC_REGNUM.  */
4791   }
4792   [(set_attr "type" "neon_tst,multiple")]
4793 )
4794
4795 (define_insn "*aarch64_cmtstdi"
4796   [(set (match_operand:DI 0 "register_operand" "=w")
4797         (neg:DI
4798           (ne:DI
4799             (and:DI
4800               (match_operand:DI 1 "register_operand" "w")
4801               (match_operand:DI 2 "register_operand" "w"))
4802             (const_int 0))))]
4803   "TARGET_SIMD"
4804   "cmtst\t%d0, %d1, %d2"
4805   [(set_attr "type" "neon_tst")]
4806 )
4807
4808 ;; fcm(eq|ge|gt|le|lt)
4809
4810 (define_insn "aarch64_cm<optab><mode>"
4811   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4812         (neg:<V_INT_EQUIV>
4813           (COMPARISONS:<V_INT_EQUIV>
4814             (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4815             (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4816           )))]
4817   "TARGET_SIMD"
4818   "@
4819   fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4820   fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4821   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4822 )
4823
4824 ;; fac(ge|gt)
4825 ;; Note we can also handle what would be fac(le|lt) by
4826 ;; generating fac(ge|gt).
4827
4828 (define_insn "aarch64_fac<optab><mode>"
4829   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4830         (neg:<V_INT_EQUIV>
4831           (FAC_COMPARISONS:<V_INT_EQUIV>
4832             (abs:VHSDF_HSDF
4833               (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4834             (abs:VHSDF_HSDF
4835               (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4836   )))]
4837   "TARGET_SIMD"
4838   "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4839   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4840 )
4841
4842 ;; addp
4843
4844 (define_insn "aarch64_addp<mode>"
4845   [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4846         (unspec:VD_BHSI
4847           [(match_operand:VD_BHSI 1 "register_operand" "w")
4848            (match_operand:VD_BHSI 2 "register_operand" "w")]
4849           UNSPEC_ADDP))]
4850   "TARGET_SIMD"
4851   "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4852   [(set_attr "type" "neon_reduc_add<q>")]
4853 )
4854
4855 (define_insn "aarch64_addpdi"
4856   [(set (match_operand:DI 0 "register_operand" "=w")
4857         (unspec:DI
4858           [(match_operand:V2DI 1 "register_operand" "w")]
4859           UNSPEC_ADDP))]
4860   "TARGET_SIMD"
4861   "addp\t%d0, %1.2d"
4862   [(set_attr "type" "neon_reduc_add")]
4863 )
4864
4865 ;; sqrt
4866
4867 (define_expand "sqrt<mode>2"
4868   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4869         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4870   "TARGET_SIMD"
4871 {
4872   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4873     DONE;
4874 })
4875
4876 (define_insn "*sqrt<mode>2"
4877   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4878         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4879   "TARGET_SIMD"
4880   "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4881   [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4882 )
4883
4884 ;; Patterns for vector struct loads and stores.
4885
4886 (define_insn "aarch64_simd_ld2<mode>"
4887   [(set (match_operand:OI 0 "register_operand" "=w")
4888         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4889                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4890                    UNSPEC_LD2))]
4891   "TARGET_SIMD"
4892   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4893   [(set_attr "type" "neon_load2_2reg<q>")]
4894 )
4895
4896 (define_insn "aarch64_simd_ld2r<mode>"
4897   [(set (match_operand:OI 0 "register_operand" "=w")
4898        (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4899                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4900                   UNSPEC_LD2_DUP))]
4901   "TARGET_SIMD"
4902   "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4903   [(set_attr "type" "neon_load2_all_lanes<q>")]
4904 )
4905
4906 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4907   [(set (match_operand:OI 0 "register_operand" "=w")
4908         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4909                     (match_operand:OI 2 "register_operand" "0")
4910                     (match_operand:SI 3 "immediate_operand" "i")
4911                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4912                    UNSPEC_LD2_LANE))]
4913   "TARGET_SIMD"
4914   {
4915     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4916     return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4917   }
4918   [(set_attr "type" "neon_load2_one_lane")]
4919 )
4920
4921 (define_expand "vec_load_lanesoi<mode>"
4922   [(set (match_operand:OI 0 "register_operand" "=w")
4923         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4924                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4925                    UNSPEC_LD2))]
4926   "TARGET_SIMD"
4927 {
4928   if (BYTES_BIG_ENDIAN)
4929     {
4930       rtx tmp = gen_reg_rtx (OImode);
4931       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4932       emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4933       emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4934     }
4935   else
4936     emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4937   DONE;
4938 })
4939
4940 (define_insn "aarch64_simd_st2<mode>"
4941   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4942         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4943                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4944                    UNSPEC_ST2))]
4945   "TARGET_SIMD"
4946   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4947   [(set_attr "type" "neon_store2_2reg<q>")]
4948 )
4949
4950 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4951 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4952   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4953         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4954                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4955                     (match_operand:SI 2 "immediate_operand" "i")]
4956                    UNSPEC_ST2_LANE))]
4957   "TARGET_SIMD"
4958   {
4959     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4960     return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4961   }
4962   [(set_attr "type" "neon_store2_one_lane<q>")]
4963 )
4964
4965 (define_expand "vec_store_lanesoi<mode>"
4966   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4967         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4968                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4969                    UNSPEC_ST2))]
4970   "TARGET_SIMD"
4971 {
4972   if (BYTES_BIG_ENDIAN)
4973     {
4974       rtx tmp = gen_reg_rtx (OImode);
4975       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4976       emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4977       emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4978     }
4979   else
4980     emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4981   DONE;
4982 })
4983
4984 (define_insn "aarch64_simd_ld3<mode>"
4985   [(set (match_operand:CI 0 "register_operand" "=w")
4986         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4987                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4988                    UNSPEC_LD3))]
4989   "TARGET_SIMD"
4990   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4991   [(set_attr "type" "neon_load3_3reg<q>")]
4992 )
4993
4994 (define_insn "aarch64_simd_ld3r<mode>"
4995   [(set (match_operand:CI 0 "register_operand" "=w")
4996        (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4997                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4998                   UNSPEC_LD3_DUP))]
4999   "TARGET_SIMD"
5000   "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5001   [(set_attr "type" "neon_load3_all_lanes<q>")]
5002 )
5003
5004 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
5005   [(set (match_operand:CI 0 "register_operand" "=w")
5006         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5007                     (match_operand:CI 2 "register_operand" "0")
5008                     (match_operand:SI 3 "immediate_operand" "i")
5009                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5010                    UNSPEC_LD3_LANE))]
5011   "TARGET_SIMD"
5012 {
5013     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5014     return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
5015 }
5016   [(set_attr "type" "neon_load3_one_lane")]
5017 )
5018
5019 (define_expand "vec_load_lanesci<mode>"
5020   [(set (match_operand:CI 0 "register_operand" "=w")
5021         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5022                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5023                    UNSPEC_LD3))]
5024   "TARGET_SIMD"
5025 {
5026   if (BYTES_BIG_ENDIAN)
5027     {
5028       rtx tmp = gen_reg_rtx (CImode);
5029       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5030       emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
5031       emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
5032     }
5033   else
5034     emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
5035   DONE;
5036 })
5037
5038 (define_insn "aarch64_simd_st3<mode>"
5039   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5040         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
5041                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5042                    UNSPEC_ST3))]
5043   "TARGET_SIMD"
5044   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5045   [(set_attr "type" "neon_store3_3reg<q>")]
5046 )
5047
5048 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5049 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
5050   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5051         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5052                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5053                      (match_operand:SI 2 "immediate_operand" "i")]
5054                     UNSPEC_ST3_LANE))]
5055   "TARGET_SIMD"
5056   {
5057     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5058     return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
5059   }
5060   [(set_attr "type" "neon_store3_one_lane<q>")]
5061 )
5062
5063 (define_expand "vec_store_lanesci<mode>"
5064   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5065         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
5066                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5067                    UNSPEC_ST3))]
5068   "TARGET_SIMD"
5069 {
5070   if (BYTES_BIG_ENDIAN)
5071     {
5072       rtx tmp = gen_reg_rtx (CImode);
5073       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5074       emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
5075       emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
5076     }
5077   else
5078     emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
5079   DONE;
5080 })
5081
5082 (define_insn "aarch64_simd_ld4<mode>"
5083   [(set (match_operand:XI 0 "register_operand" "=w")
5084         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5085                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5086                    UNSPEC_LD4))]
5087   "TARGET_SIMD"
5088   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5089   [(set_attr "type" "neon_load4_4reg<q>")]
5090 )
5091
5092 (define_insn "aarch64_simd_ld4r<mode>"
5093   [(set (match_operand:XI 0 "register_operand" "=w")
5094        (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5095                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5096                   UNSPEC_LD4_DUP))]
5097   "TARGET_SIMD"
5098   "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5099   [(set_attr "type" "neon_load4_all_lanes<q>")]
5100 )
5101
5102 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
5103   [(set (match_operand:XI 0 "register_operand" "=w")
5104         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5105                     (match_operand:XI 2 "register_operand" "0")
5106                     (match_operand:SI 3 "immediate_operand" "i")
5107                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5108                    UNSPEC_LD4_LANE))]
5109   "TARGET_SIMD"
5110 {
5111     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5112     return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
5113 }
5114   [(set_attr "type" "neon_load4_one_lane")]
5115 )
5116
5117 (define_expand "vec_load_lanesxi<mode>"
5118   [(set (match_operand:XI 0 "register_operand" "=w")
5119         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5120                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5121                    UNSPEC_LD4))]
5122   "TARGET_SIMD"
5123 {
5124   if (BYTES_BIG_ENDIAN)
5125     {
5126       rtx tmp = gen_reg_rtx (XImode);
5127       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5128       emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
5129       emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
5130     }
5131   else
5132     emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
5133   DONE;
5134 })
5135
5136 (define_insn "aarch64_simd_st4<mode>"
5137   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5138         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5139                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5140                    UNSPEC_ST4))]
5141   "TARGET_SIMD"
5142   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5143   [(set_attr "type" "neon_store4_4reg<q>")]
5144 )
5145
5146 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5147 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
5148   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5149         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5150                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5151                      (match_operand:SI 2 "immediate_operand" "i")]
5152                     UNSPEC_ST4_LANE))]
5153   "TARGET_SIMD"
5154   {
5155     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5156     return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5157   }
5158   [(set_attr "type" "neon_store4_one_lane<q>")]
5159 )
5160
5161 (define_expand "vec_store_lanesxi<mode>"
5162   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5163         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5164                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5165                    UNSPEC_ST4))]
5166   "TARGET_SIMD"
5167 {
5168   if (BYTES_BIG_ENDIAN)
5169     {
5170       rtx tmp = gen_reg_rtx (XImode);
5171       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5172       emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5173       emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5174     }
5175   else
5176     emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5177   DONE;
5178 })
5179
5180 (define_insn_and_split "aarch64_rev_reglist<mode>"
5181 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5182         (unspec:VSTRUCT
5183                    [(match_operand:VSTRUCT 1 "register_operand" "w")
5184                     (match_operand:V16QI 2 "register_operand" "w")]
5185                    UNSPEC_REV_REGLIST))]
5186   "TARGET_SIMD"
5187   "#"
5188   "&& reload_completed"
5189   [(const_int 0)]
5190 {
5191   int i;
5192   int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5193   for (i = 0; i < nregs; i++)
5194     {
5195       rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5196       rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5197       emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5198     }
5199   DONE;
5200 }
5201   [(set_attr "type" "neon_tbl1_q")
5202    (set_attr "length" "<insn_count>")]
5203 )
5204
5205 ;; Reload patterns for AdvSIMD register list operands.
5206
5207 (define_expand "mov<mode>"
5208   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
5209         (match_operand:VSTRUCT 1 "general_operand" ""))]
5210   "TARGET_SIMD"
5211 {
5212   if (can_create_pseudo_p ())
5213     {
5214       if (GET_CODE (operands[0]) != REG)
5215         operands[1] = force_reg (<MODE>mode, operands[1]);
5216     }
5217 })
5218
5219
5220 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5221   [(match_operand:CI 0 "register_operand" "=w")
5222    (match_operand:DI 1 "register_operand" "r")
5223    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5224   "TARGET_SIMD"
5225 {
5226   rtx mem = gen_rtx_MEM (CImode, operands[1]);
5227   emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5228   DONE;
5229 })
5230
5231 (define_insn "aarch64_ld1_x3_<mode>"
5232   [(set (match_operand:CI 0 "register_operand" "=w")
5233         (unspec:CI
5234           [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5235            (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5236   "TARGET_SIMD"
5237   "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5238   [(set_attr "type" "neon_load1_3reg<q>")]
5239 )
5240
5241 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5242   [(match_operand:DI 0 "register_operand" "")
5243    (match_operand:OI 1 "register_operand" "")
5244    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5245   "TARGET_SIMD"
5246 {
5247   rtx mem = gen_rtx_MEM (OImode, operands[0]);
5248   emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5249   DONE;
5250 })
5251
5252 (define_insn "aarch64_st1_x2_<mode>"
5253    [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5254          (unspec:OI
5255           [(match_operand:OI 1 "register_operand" "w")
5256           (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5257   "TARGET_SIMD"
5258   "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5259   [(set_attr "type" "neon_store1_2reg<q>")]
5260 )
5261
5262 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5263   [(match_operand:DI 0 "register_operand" "")
5264    (match_operand:CI 1 "register_operand" "")
5265    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5266   "TARGET_SIMD"
5267 {
5268   rtx mem = gen_rtx_MEM (CImode, operands[0]);
5269   emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5270   DONE;
5271 })
5272
5273 (define_insn "aarch64_st1_x3_<mode>"
5274    [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5275         (unspec:CI
5276          [(match_operand:CI 1 "register_operand" "w")
5277           (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5278   "TARGET_SIMD"
5279   "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5280   [(set_attr "type" "neon_store1_3reg<q>")]
5281 )
5282
5283 (define_insn "*aarch64_mov<mode>"
5284   [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5285         (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5286   "TARGET_SIMD && !BYTES_BIG_ENDIAN
5287    && (register_operand (operands[0], <MODE>mode)
5288        || register_operand (operands[1], <MODE>mode))"
5289   "@
5290    #
5291    st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5292    ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5293   [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5294                      neon_load<nregs>_<nregs>reg_q")
5295    (set_attr "length" "<insn_count>,4,4")]
5296 )
5297
5298 (define_insn "aarch64_be_ld1<mode>"
5299   [(set (match_operand:VALLDI_F16 0     "register_operand" "=w")
5300         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5301                              "aarch64_simd_struct_operand" "Utv")]
5302         UNSPEC_LD1))]
5303   "TARGET_SIMD"
5304   "ld1\\t{%0<Vmtype>}, %1"
5305   [(set_attr "type" "neon_load1_1reg<q>")]
5306 )
5307
5308 (define_insn "aarch64_be_st1<mode>"
5309   [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5310         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5311         UNSPEC_ST1))]
5312   "TARGET_SIMD"
5313   "st1\\t{%1<Vmtype>}, %0"
5314   [(set_attr "type" "neon_store1_1reg<q>")]
5315 )
5316
5317 (define_insn "*aarch64_be_movoi"
5318   [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5319         (match_operand:OI 1 "general_operand"      " w,w,m"))]
5320   "TARGET_SIMD && BYTES_BIG_ENDIAN
5321    && (register_operand (operands[0], OImode)
5322        || register_operand (operands[1], OImode))"
5323   "@
5324    #
5325    stp\\t%q1, %R1, %0
5326    ldp\\t%q0, %R0, %1"
5327   [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5328    (set_attr "length" "8,4,4")]
5329 )
5330
5331 (define_insn "*aarch64_be_movci"
5332   [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5333         (match_operand:CI 1 "general_operand"      " w,w,o"))]
5334   "TARGET_SIMD && BYTES_BIG_ENDIAN
5335    && (register_operand (operands[0], CImode)
5336        || register_operand (operands[1], CImode))"
5337   "#"
5338   [(set_attr "type" "multiple")
5339    (set_attr "length" "12,4,4")]
5340 )
5341
5342 (define_insn "*aarch64_be_movxi"
5343   [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5344         (match_operand:XI 1 "general_operand"      " w,w,o"))]
5345   "TARGET_SIMD && BYTES_BIG_ENDIAN
5346    && (register_operand (operands[0], XImode)
5347        || register_operand (operands[1], XImode))"
5348   "#"
5349   [(set_attr "type" "multiple")
5350    (set_attr "length" "16,4,4")]
5351 )
5352
5353 (define_split
5354   [(set (match_operand:OI 0 "register_operand")
5355         (match_operand:OI 1 "register_operand"))]
5356   "TARGET_SIMD && reload_completed"
5357   [(const_int 0)]
5358 {
5359   aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5360   DONE;
5361 })
5362
5363 (define_split
5364   [(set (match_operand:CI 0 "nonimmediate_operand")
5365         (match_operand:CI 1 "general_operand"))]
5366   "TARGET_SIMD && reload_completed"
5367   [(const_int 0)]
5368 {
5369   if (register_operand (operands[0], CImode)
5370       && register_operand (operands[1], CImode))
5371     {
5372       aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5373       DONE;
5374     }
5375   else if (BYTES_BIG_ENDIAN)
5376     {
5377       emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5378                       simplify_gen_subreg (OImode, operands[1], CImode, 0));
5379       emit_move_insn (gen_lowpart (V16QImode,
5380                                    simplify_gen_subreg (TImode, operands[0],
5381                                                         CImode, 32)),
5382                       gen_lowpart (V16QImode,
5383                                    simplify_gen_subreg (TImode, operands[1],
5384                                                         CImode, 32)));
5385       DONE;
5386     }
5387   else
5388     FAIL;
5389 })
5390
5391 (define_split
5392   [(set (match_operand:XI 0 "nonimmediate_operand")
5393         (match_operand:XI 1 "general_operand"))]
5394   "TARGET_SIMD && reload_completed"
5395   [(const_int 0)]
5396 {
5397   if (register_operand (operands[0], XImode)
5398       && register_operand (operands[1], XImode))
5399     {
5400       aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5401       DONE;
5402     }
5403   else if (BYTES_BIG_ENDIAN)
5404     {
5405       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5406                       simplify_gen_subreg (OImode, operands[1], XImode, 0));
5407       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5408                       simplify_gen_subreg (OImode, operands[1], XImode, 32));
5409       DONE;
5410     }
5411   else
5412     FAIL;
5413 })
5414
5415 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5416   [(match_operand:VSTRUCT 0 "register_operand" "=w")
5417    (match_operand:DI 1 "register_operand" "w")
5418    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5419   "TARGET_SIMD"
5420 {
5421   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5422   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5423                      * <VSTRUCT:nregs>);
5424
5425   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5426                                                                 mem));
5427   DONE;
5428 })
5429
5430 (define_insn "aarch64_ld2<mode>_dreg"
5431   [(set (match_operand:OI 0 "register_operand" "=w")
5432         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5433                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5434                    UNSPEC_LD2_DREG))]
5435   "TARGET_SIMD"
5436   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5437   [(set_attr "type" "neon_load2_2reg<q>")]
5438 )
5439
5440 (define_insn "aarch64_ld2<mode>_dreg"
5441   [(set (match_operand:OI 0 "register_operand" "=w")
5442         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5443                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5444                    UNSPEC_LD2_DREG))]
5445   "TARGET_SIMD"
5446   "ld1\\t{%S0.1d - %T0.1d}, %1"
5447   [(set_attr "type" "neon_load1_2reg<q>")]
5448 )
5449
5450 (define_insn "aarch64_ld3<mode>_dreg"
5451   [(set (match_operand:CI 0 "register_operand" "=w")
5452         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5453                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5454                    UNSPEC_LD3_DREG))]
5455   "TARGET_SIMD"
5456   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5457   [(set_attr "type" "neon_load3_3reg<q>")]
5458 )
5459
5460 (define_insn "aarch64_ld3<mode>_dreg"
5461   [(set (match_operand:CI 0 "register_operand" "=w")
5462         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5463                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5464                    UNSPEC_LD3_DREG))]
5465   "TARGET_SIMD"
5466   "ld1\\t{%S0.1d - %U0.1d}, %1"
5467   [(set_attr "type" "neon_load1_3reg<q>")]
5468 )
5469
5470 (define_insn "aarch64_ld4<mode>_dreg"
5471   [(set (match_operand:XI 0 "register_operand" "=w")
5472         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5473                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5474                    UNSPEC_LD4_DREG))]
5475   "TARGET_SIMD"
5476   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5477   [(set_attr "type" "neon_load4_4reg<q>")]
5478 )
5479
5480 (define_insn "aarch64_ld4<mode>_dreg"
5481   [(set (match_operand:XI 0 "register_operand" "=w")
5482         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5483                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5484                    UNSPEC_LD4_DREG))]
5485   "TARGET_SIMD"
5486   "ld1\\t{%S0.1d - %V0.1d}, %1"
5487   [(set_attr "type" "neon_load1_4reg<q>")]
5488 )
5489
5490 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5491  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5492   (match_operand:DI 1 "register_operand" "r")
5493   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5494   "TARGET_SIMD"
5495 {
5496   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5497   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5498
5499   emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5500   DONE;
5501 })
5502
5503 (define_expand "aarch64_ld1<VALL_F16:mode>"
5504  [(match_operand:VALL_F16 0 "register_operand")
5505   (match_operand:DI 1 "register_operand")]
5506   "TARGET_SIMD"
5507 {
5508   machine_mode mode = <VALL_F16:MODE>mode;
5509   rtx mem = gen_rtx_MEM (mode, operands[1]);
5510
5511   if (BYTES_BIG_ENDIAN)
5512     emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5513   else
5514     emit_move_insn (operands[0], mem);
5515   DONE;
5516 })
5517
5518 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5519  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5520   (match_operand:DI 1 "register_operand" "r")
5521   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5522   "TARGET_SIMD"
5523 {
5524   machine_mode mode = <VSTRUCT:MODE>mode;
5525   rtx mem = gen_rtx_MEM (mode, operands[1]);
5526
5527   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5528   DONE;
5529 })
5530
5531 (define_expand "aarch64_ld1x2<VQ:mode>"
5532  [(match_operand:OI 0 "register_operand" "=w")
5533   (match_operand:DI 1 "register_operand" "r")
5534   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5535   "TARGET_SIMD"
5536 {
5537   machine_mode mode = OImode;
5538   rtx mem = gen_rtx_MEM (mode, operands[1]);
5539
5540   emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5541   DONE;
5542 })
5543
5544 (define_expand "aarch64_ld1x2<VDC:mode>"
5545  [(match_operand:OI 0 "register_operand" "=w")
5546   (match_operand:DI 1 "register_operand" "r")
5547   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5548   "TARGET_SIMD"
5549 {
5550   machine_mode mode = OImode;
5551   rtx mem = gen_rtx_MEM (mode, operands[1]);
5552
5553   emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5554   DONE;
5555 })
5556
5557
5558 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5559   [(match_operand:VSTRUCT 0 "register_operand" "=w")
5560         (match_operand:DI 1 "register_operand" "w")
5561         (match_operand:VSTRUCT 2 "register_operand" "0")
5562         (match_operand:SI 3 "immediate_operand" "i")
5563         (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5564   "TARGET_SIMD"
5565 {
5566   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5567   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5568                      * <VSTRUCT:nregs>);
5569
5570   aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5571   emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5572         operands[0], mem, operands[2], operands[3]));
5573   DONE;
5574 })
5575
5576 ;; Expanders for builtins to extract vector registers from large
5577 ;; opaque integer modes.
5578
5579 ;; D-register list.
5580
5581 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5582  [(match_operand:VDC 0 "register_operand" "=w")
5583   (match_operand:VSTRUCT 1 "register_operand" "w")
5584   (match_operand:SI 2 "immediate_operand" "i")]
5585   "TARGET_SIMD"
5586 {
5587   int part = INTVAL (operands[2]);
5588   rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5589   int offset = part * 16;
5590
5591   emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5592   emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5593   DONE;
5594 })
5595
5596 ;; Q-register list.
5597
5598 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5599  [(match_operand:VQ 0 "register_operand" "=w")
5600   (match_operand:VSTRUCT 1 "register_operand" "w")
5601   (match_operand:SI 2 "immediate_operand" "i")]
5602   "TARGET_SIMD"
5603 {
5604   int part = INTVAL (operands[2]);
5605   int offset = part * 16;
5606
5607   emit_move_insn (operands[0],
5608                   gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5609   DONE;
5610 })
5611
5612 ;; Permuted-store expanders for neon intrinsics.
5613
5614 ;; Permute instructions
5615
5616 ;; vec_perm support
5617
5618 (define_expand "vec_perm<mode>"
5619   [(match_operand:VB 0 "register_operand")
5620    (match_operand:VB 1 "register_operand")
5621    (match_operand:VB 2 "register_operand")
5622    (match_operand:VB 3 "register_operand")]
5623   "TARGET_SIMD"
5624 {
5625   aarch64_expand_vec_perm (operands[0], operands[1],
5626                            operands[2], operands[3], <nunits>);
5627   DONE;
5628 })
5629
5630 (define_insn "aarch64_tbl1<mode>"
5631   [(set (match_operand:VB 0 "register_operand" "=w")
5632         (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5633                     (match_operand:VB 2 "register_operand" "w")]
5634                    UNSPEC_TBL))]
5635   "TARGET_SIMD"
5636   "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5637   [(set_attr "type" "neon_tbl1<q>")]
5638 )
5639
5640 ;; Two source registers.
5641
5642 (define_insn "aarch64_tbl2v16qi"
5643   [(set (match_operand:V16QI 0 "register_operand" "=w")
5644         (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5645                        (match_operand:V16QI 2 "register_operand" "w")]
5646                       UNSPEC_TBL))]
5647   "TARGET_SIMD"
5648   "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5649   [(set_attr "type" "neon_tbl2_q")]
5650 )
5651
5652 (define_insn "aarch64_tbl3<mode>"
5653   [(set (match_operand:VB 0 "register_operand" "=w")
5654         (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5655                       (match_operand:VB 2 "register_operand" "w")]
5656                       UNSPEC_TBL))]
5657   "TARGET_SIMD"
5658   "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5659   [(set_attr "type" "neon_tbl3")]
5660 )
5661
5662 (define_insn "aarch64_tbx4<mode>"
5663   [(set (match_operand:VB 0 "register_operand" "=w")
5664         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5665                       (match_operand:OI 2 "register_operand" "w")
5666                       (match_operand:VB 3 "register_operand" "w")]
5667                       UNSPEC_TBX))]
5668   "TARGET_SIMD"
5669   "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5670   [(set_attr "type" "neon_tbl4")]
5671 )
5672
5673 ;; Three source registers.
5674
5675 (define_insn "aarch64_qtbl3<mode>"
5676   [(set (match_operand:VB 0 "register_operand" "=w")
5677         (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5678                       (match_operand:VB 2 "register_operand" "w")]
5679                       UNSPEC_TBL))]
5680   "TARGET_SIMD"
5681   "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5682   [(set_attr "type" "neon_tbl3")]
5683 )
5684
5685 (define_insn "aarch64_qtbx3<mode>"
5686   [(set (match_operand:VB 0 "register_operand" "=w")
5687         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5688                       (match_operand:CI 2 "register_operand" "w")
5689                       (match_operand:VB 3 "register_operand" "w")]
5690                       UNSPEC_TBX))]
5691   "TARGET_SIMD"
5692   "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5693   [(set_attr "type" "neon_tbl3")]
5694 )
5695
5696 ;; Four source registers.
5697
5698 (define_insn "aarch64_qtbl4<mode>"
5699   [(set (match_operand:VB 0 "register_operand" "=w")
5700         (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5701                       (match_operand:VB 2 "register_operand" "w")]
5702                       UNSPEC_TBL))]
5703   "TARGET_SIMD"
5704   "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5705   [(set_attr "type" "neon_tbl4")]
5706 )
5707
5708 (define_insn "aarch64_qtbx4<mode>"
5709   [(set (match_operand:VB 0 "register_operand" "=w")
5710         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5711                       (match_operand:XI 2 "register_operand" "w")
5712                       (match_operand:VB 3 "register_operand" "w")]
5713                       UNSPEC_TBX))]
5714   "TARGET_SIMD"
5715   "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5716   [(set_attr "type" "neon_tbl4")]
5717 )
5718
5719 (define_insn_and_split "aarch64_combinev16qi"
5720   [(set (match_operand:OI 0 "register_operand" "=w")
5721         (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5722                     (match_operand:V16QI 2 "register_operand" "w")]
5723                    UNSPEC_CONCAT))]
5724   "TARGET_SIMD"
5725   "#"
5726   "&& reload_completed"
5727   [(const_int 0)]
5728 {
5729   aarch64_split_combinev16qi (operands);
5730   DONE;
5731 }
5732 [(set_attr "type" "multiple")]
5733 )
5734
5735 ;; This instruction's pattern is generated directly by
5736 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5737 ;; need corresponding changes there.
5738 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5739   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5740         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5741                           (match_operand:VALL_F16 2 "register_operand" "w")]
5742          PERMUTE))]
5743   "TARGET_SIMD"
5744   "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5745   [(set_attr "type" "neon_permute<q>")]
5746 )
5747
5748 ;; This instruction's pattern is generated directly by
5749 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5750 ;; need corresponding changes there.  Note that the immediate (third)
5751 ;; operand is a lane index not a byte index.
5752 (define_insn "aarch64_ext<mode>"
5753   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5754         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5755                           (match_operand:VALL_F16 2 "register_operand" "w")
5756                           (match_operand:SI 3 "immediate_operand" "i")]
5757          UNSPEC_EXT))]
5758   "TARGET_SIMD"
5759 {
5760   operands[3] = GEN_INT (INTVAL (operands[3])
5761       * GET_MODE_UNIT_SIZE (<MODE>mode));
5762   return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5763 }
5764   [(set_attr "type" "neon_ext<q>")]
5765 )
5766
5767 ;; This instruction's pattern is generated directly by
5768 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5769 ;; need corresponding changes there.
5770 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5771   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5772         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5773                     REVERSE))]
5774   "TARGET_SIMD"
5775   "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5776   [(set_attr "type" "neon_rev<q>")]
5777 )
5778
5779 (define_insn "aarch64_st2<mode>_dreg"
5780   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5781         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5782                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5783                    UNSPEC_ST2))]
5784   "TARGET_SIMD"
5785   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5786   [(set_attr "type" "neon_store2_2reg")]
5787 )
5788
5789 (define_insn "aarch64_st2<mode>_dreg"
5790   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5791         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5792                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5793                    UNSPEC_ST2))]
5794   "TARGET_SIMD"
5795   "st1\\t{%S1.1d - %T1.1d}, %0"
5796   [(set_attr "type" "neon_store1_2reg")]
5797 )
5798
5799 (define_insn "aarch64_st3<mode>_dreg"
5800   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5801         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5802                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5803                    UNSPEC_ST3))]
5804   "TARGET_SIMD"
5805   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5806   [(set_attr "type" "neon_store3_3reg")]
5807 )
5808
5809 (define_insn "aarch64_st3<mode>_dreg"
5810   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5811         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5812                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5813                    UNSPEC_ST3))]
5814   "TARGET_SIMD"
5815   "st1\\t{%S1.1d - %U1.1d}, %0"
5816   [(set_attr "type" "neon_store1_3reg")]
5817 )
5818
5819 (define_insn "aarch64_st4<mode>_dreg"
5820   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5821         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5822                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5823                    UNSPEC_ST4))]
5824   "TARGET_SIMD"
5825   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5826   [(set_attr "type" "neon_store4_4reg")]
5827 )
5828
5829 (define_insn "aarch64_st4<mode>_dreg"
5830   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5831         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5832                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5833                    UNSPEC_ST4))]
5834   "TARGET_SIMD"
5835   "st1\\t{%S1.1d - %V1.1d}, %0"
5836   [(set_attr "type" "neon_store1_4reg")]
5837 )
5838
5839 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5840  [(match_operand:DI 0 "register_operand" "r")
5841   (match_operand:VSTRUCT 1 "register_operand" "w")
5842   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5843   "TARGET_SIMD"
5844 {
5845   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5846   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5847
5848   emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5849   DONE;
5850 })
5851
5852 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5853  [(match_operand:DI 0 "register_operand" "r")
5854   (match_operand:VSTRUCT 1 "register_operand" "w")
5855   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5856   "TARGET_SIMD"
5857 {
5858   machine_mode mode = <VSTRUCT:MODE>mode;
5859   rtx mem = gen_rtx_MEM (mode, operands[0]);
5860
5861   emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5862   DONE;
5863 })
5864
5865 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5866  [(match_operand:DI 0 "register_operand" "r")
5867   (match_operand:VSTRUCT 1 "register_operand" "w")
5868   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5869   (match_operand:SI 2 "immediate_operand")]
5870   "TARGET_SIMD"
5871 {
5872   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5873   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5874                      * <VSTRUCT:nregs>);
5875
5876   emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5877                 mem, operands[1], operands[2]));
5878   DONE;
5879 })
5880
5881 (define_expand "aarch64_st1<VALL_F16:mode>"
5882  [(match_operand:DI 0 "register_operand")
5883   (match_operand:VALL_F16 1 "register_operand")]
5884   "TARGET_SIMD"
5885 {
5886   machine_mode mode = <VALL_F16:MODE>mode;
5887   rtx mem = gen_rtx_MEM (mode, operands[0]);
5888
5889   if (BYTES_BIG_ENDIAN)
5890     emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5891   else
5892     emit_move_insn (mem, operands[1]);
5893   DONE;
5894 })
5895
5896 ;; Expander for builtins to insert vector registers into large
5897 ;; opaque integer modes.
5898
5899 ;; Q-register list.  We don't need a D-reg inserter as we zero
5900 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5901
5902 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5903  [(match_operand:VSTRUCT 0 "register_operand" "+w")
5904   (match_operand:VSTRUCT 1 "register_operand" "0")
5905   (match_operand:VQ 2 "register_operand" "w")
5906   (match_operand:SI 3 "immediate_operand" "i")]
5907   "TARGET_SIMD"
5908 {
5909   int part = INTVAL (operands[3]);
5910   int offset = part * 16;
5911
5912   emit_move_insn (operands[0], operands[1]);
5913   emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5914                   operands[2]);
5915   DONE;
5916 })
5917
5918 ;; Standard pattern name vec_init<mode><Vel>.
5919
5920 (define_expand "vec_init<mode><Vel>"
5921   [(match_operand:VALL_F16 0 "register_operand" "")
5922    (match_operand 1 "" "")]
5923   "TARGET_SIMD"
5924 {
5925   aarch64_expand_vector_init (operands[0], operands[1]);
5926   DONE;
5927 })
5928
5929 (define_insn "*aarch64_simd_ld1r<mode>"
5930   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5931         (vec_duplicate:VALL_F16
5932           (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5933   "TARGET_SIMD"
5934   "ld1r\\t{%0.<Vtype>}, %1"
5935   [(set_attr "type" "neon_load1_all_lanes")]
5936 )
5937
5938 (define_insn "aarch64_simd_ld1<mode>_x2"
5939   [(set (match_operand:OI 0 "register_operand" "=w")
5940         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5941                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5942                    UNSPEC_LD1))]
5943   "TARGET_SIMD"
5944   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5945   [(set_attr "type" "neon_load1_2reg<q>")]
5946 )
5947
5948 (define_insn "aarch64_simd_ld1<mode>_x2"
5949   [(set (match_operand:OI 0 "register_operand" "=w")
5950         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5951                     (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5952                    UNSPEC_LD1))]
5953   "TARGET_SIMD"
5954   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5955   [(set_attr "type" "neon_load1_2reg<q>")]
5956 )
5957
5958
5959 (define_insn "@aarch64_frecpe<mode>"
5960   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5961         (unspec:VHSDF_HSDF
5962          [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
5963          UNSPEC_FRECPE))]
5964   "TARGET_SIMD"
5965   "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
5966   [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5967 )
5968
5969 (define_insn "aarch64_frecpx<mode>"
5970   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5971         (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5972          UNSPEC_FRECPX))]
5973   "TARGET_SIMD"
5974   "frecpx\t%<s>0, %<s>1"
5975   [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
5976 )
5977
5978 (define_insn "@aarch64_frecps<mode>"
5979   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5980         (unspec:VHSDF_HSDF
5981           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5982           (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5983           UNSPEC_FRECPS))]
5984   "TARGET_SIMD"
5985   "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5986   [(set_attr "type" "neon_fp_recps_<stype><q>")]
5987 )
5988
5989 (define_insn "aarch64_urecpe<mode>"
5990   [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5991         (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5992                 UNSPEC_URECPE))]
5993  "TARGET_SIMD"
5994  "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5995   [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5996
5997 ;; Standard pattern name vec_extract<mode><Vel>.
5998
5999 (define_expand "vec_extract<mode><Vel>"
6000   [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
6001    (match_operand:VALL_F16 1 "register_operand" "")
6002    (match_operand:SI 2 "immediate_operand" "")]
6003   "TARGET_SIMD"
6004 {
6005     emit_insn
6006       (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
6007     DONE;
6008 })
6009
6010 ;; aes
6011
6012 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
6013   [(set (match_operand:V16QI 0 "register_operand" "=w")
6014         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%0")
6015                        (match_operand:V16QI 2 "register_operand" "w")]
6016          CRYPTO_AES))]
6017   "TARGET_SIMD && TARGET_AES"
6018   "aes<aes_op>\\t%0.16b, %2.16b"
6019   [(set_attr "type" "crypto_aese")]
6020 )
6021
6022 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
6023   [(set (match_operand:V16QI 0 "register_operand" "=w")
6024         (unspec:V16QI [(xor:V16QI
6025                         (match_operand:V16QI 1 "register_operand" "%0")
6026                         (match_operand:V16QI 2 "register_operand" "w"))
6027                        (match_operand:V16QI 3 "aarch64_simd_imm_zero" "")]
6028                        CRYPTO_AES))]
6029   "TARGET_SIMD && TARGET_AES"
6030   "aes<aes_op>\\t%0.16b, %2.16b"
6031   [(set_attr "type" "crypto_aese")]
6032 )
6033
6034 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
6035   [(set (match_operand:V16QI 0 "register_operand" "=w")
6036         (unspec:V16QI [(match_operand:V16QI 3 "aarch64_simd_imm_zero" "")
6037         (xor:V16QI (match_operand:V16QI 1 "register_operand" "%0")
6038                    (match_operand:V16QI 2 "register_operand" "w"))]
6039         CRYPTO_AES))]
6040   "TARGET_SIMD && TARGET_AES"
6041   "aes<aes_op>\\t%0.16b, %2.16b"
6042   [(set_attr "type" "crypto_aese")]
6043 )
6044
6045 ;; When AES/AESMC fusion is enabled we want the register allocation to
6046 ;; look like:
6047 ;;    AESE Vn, _
6048 ;;    AESMC Vn, Vn
6049 ;; So prefer to tie operand 1 to operand 0 when fusing.
6050
6051 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
6052   [(set (match_operand:V16QI 0 "register_operand" "=w,w")
6053         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
6054          CRYPTO_AESMC))]
6055   "TARGET_SIMD && TARGET_AES"
6056   "aes<aesmc_op>\\t%0.16b, %1.16b"
6057   [(set_attr "type" "crypto_aesmc")
6058    (set_attr_alternative "enabled"
6059      [(if_then_else (match_test
6060                        "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
6061                      (const_string "yes" )
6062                      (const_string "no"))
6063       (const_string "yes")])]
6064 )
6065
6066 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
6067 ;; and enforce the register dependency without scheduling or register
6068 ;; allocation messing up the order or introducing moves inbetween.
6069 ;;  Mash the two together during combine.
6070
6071 (define_insn "*aarch64_crypto_aese_fused"
6072   [(set (match_operand:V16QI 0 "register_operand" "=&w")
6073         (unspec:V16QI
6074           [(unspec:V16QI
6075             [(match_operand:V16QI 1 "register_operand" "0")
6076              (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE)
6077           ] UNSPEC_AESMC))]
6078   "TARGET_SIMD && TARGET_AES
6079    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6080   "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
6081   [(set_attr "type" "crypto_aese")
6082    (set_attr "length" "8")]
6083 )
6084
6085 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
6086 ;; and enforce the register dependency without scheduling or register
6087 ;; allocation messing up the order or introducing moves inbetween.
6088 ;;  Mash the two together during combine.
6089
6090 (define_insn "*aarch64_crypto_aesd_fused"
6091   [(set (match_operand:V16QI 0 "register_operand" "=&w")
6092         (unspec:V16QI
6093           [(unspec:V16QI
6094             [(match_operand:V16QI 1 "register_operand" "0")
6095              (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD)
6096           ] UNSPEC_AESIMC))]
6097   "TARGET_SIMD && TARGET_AES
6098    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6099   "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
6100   [(set_attr "type" "crypto_aese")
6101    (set_attr "length" "8")]
6102 )
6103
6104 ;; sha1
6105
6106 (define_insn "aarch64_crypto_sha1hsi"
6107   [(set (match_operand:SI 0 "register_operand" "=w")
6108         (unspec:SI [(match_operand:SI 1
6109                        "register_operand" "w")]
6110          UNSPEC_SHA1H))]
6111   "TARGET_SIMD && TARGET_SHA2"
6112   "sha1h\\t%s0, %s1"
6113   [(set_attr "type" "crypto_sha1_fast")]
6114 )
6115
6116 (define_insn "aarch64_crypto_sha1hv4si"
6117   [(set (match_operand:SI 0 "register_operand" "=w")
6118         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6119                      (parallel [(const_int 0)]))]
6120          UNSPEC_SHA1H))]
6121   "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
6122   "sha1h\\t%s0, %s1"
6123   [(set_attr "type" "crypto_sha1_fast")]
6124 )
6125
6126 (define_insn "aarch64_be_crypto_sha1hv4si"
6127   [(set (match_operand:SI 0 "register_operand" "=w")
6128         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6129                      (parallel [(const_int 3)]))]
6130          UNSPEC_SHA1H))]
6131   "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
6132   "sha1h\\t%s0, %s1"
6133   [(set_attr "type" "crypto_sha1_fast")]
6134 )
6135
6136 (define_insn "aarch64_crypto_sha1su1v4si"
6137   [(set (match_operand:V4SI 0 "register_operand" "=w")
6138         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6139                       (match_operand:V4SI 2 "register_operand" "w")]
6140          UNSPEC_SHA1SU1))]
6141   "TARGET_SIMD && TARGET_SHA2"
6142   "sha1su1\\t%0.4s, %2.4s"
6143   [(set_attr "type" "crypto_sha1_fast")]
6144 )
6145
6146 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
6147   [(set (match_operand:V4SI 0 "register_operand" "=w")
6148         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6149                       (match_operand:SI 2 "register_operand" "w")
6150                       (match_operand:V4SI 3 "register_operand" "w")]
6151          CRYPTO_SHA1))]
6152   "TARGET_SIMD && TARGET_SHA2"
6153   "sha1<sha1_op>\\t%q0, %s2, %3.4s"
6154   [(set_attr "type" "crypto_sha1_slow")]
6155 )
6156
6157 (define_insn "aarch64_crypto_sha1su0v4si"
6158   [(set (match_operand:V4SI 0 "register_operand" "=w")
6159         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6160                       (match_operand:V4SI 2 "register_operand" "w")
6161                       (match_operand:V4SI 3 "register_operand" "w")]
6162          UNSPEC_SHA1SU0))]
6163   "TARGET_SIMD && TARGET_SHA2"
6164   "sha1su0\\t%0.4s, %2.4s, %3.4s"
6165   [(set_attr "type" "crypto_sha1_xor")]
6166 )
6167
6168 ;; sha256
6169
6170 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6171   [(set (match_operand:V4SI 0 "register_operand" "=w")
6172         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6173                       (match_operand:V4SI 2 "register_operand" "w")
6174                       (match_operand:V4SI 3 "register_operand" "w")]
6175          CRYPTO_SHA256))]
6176   "TARGET_SIMD && TARGET_SHA2"
6177   "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
6178   [(set_attr "type" "crypto_sha256_slow")]
6179 )
6180
6181 (define_insn "aarch64_crypto_sha256su0v4si"
6182   [(set (match_operand:V4SI 0 "register_operand" "=w")
6183         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6184                       (match_operand:V4SI 2 "register_operand" "w")]
6185          UNSPEC_SHA256SU0))]
6186   "TARGET_SIMD && TARGET_SHA2"
6187   "sha256su0\\t%0.4s, %2.4s"
6188   [(set_attr "type" "crypto_sha256_fast")]
6189 )
6190
6191 (define_insn "aarch64_crypto_sha256su1v4si"
6192   [(set (match_operand:V4SI 0 "register_operand" "=w")
6193         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6194                       (match_operand:V4SI 2 "register_operand" "w")
6195                       (match_operand:V4SI 3 "register_operand" "w")]
6196          UNSPEC_SHA256SU1))]
6197   "TARGET_SIMD && TARGET_SHA2"
6198   "sha256su1\\t%0.4s, %2.4s, %3.4s"
6199   [(set_attr "type" "crypto_sha256_slow")]
6200 )
6201
6202 ;; sha512
6203
6204 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6205   [(set (match_operand:V2DI 0 "register_operand" "=w")
6206         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6207                       (match_operand:V2DI 2 "register_operand" "w")
6208                       (match_operand:V2DI 3 "register_operand" "w")]
6209          CRYPTO_SHA512))]
6210   "TARGET_SIMD && TARGET_SHA3"
6211   "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6212   [(set_attr "type" "crypto_sha512")]
6213 )
6214
6215 (define_insn "aarch64_crypto_sha512su0qv2di"
6216   [(set (match_operand:V2DI 0 "register_operand" "=w")
6217         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6218                       (match_operand:V2DI 2 "register_operand" "w")]
6219          UNSPEC_SHA512SU0))]
6220   "TARGET_SIMD && TARGET_SHA3"
6221   "sha512su0\\t%0.2d, %2.2d"
6222   [(set_attr "type" "crypto_sha512")]
6223 )
6224
6225 (define_insn "aarch64_crypto_sha512su1qv2di"
6226   [(set (match_operand:V2DI 0 "register_operand" "=w")
6227         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6228                       (match_operand:V2DI 2 "register_operand" "w")
6229                       (match_operand:V2DI 3 "register_operand" "w")]
6230          UNSPEC_SHA512SU1))]
6231   "TARGET_SIMD && TARGET_SHA3"
6232   "sha512su1\\t%0.2d, %2.2d, %3.2d"
6233   [(set_attr "type" "crypto_sha512")]
6234 )
6235
6236 ;; sha3
6237
6238 (define_insn "eor3q<mode>4"
6239   [(set (match_operand:VQ_I 0 "register_operand" "=w")
6240         (xor:VQ_I
6241          (xor:VQ_I
6242           (match_operand:VQ_I 2 "register_operand" "w")
6243           (match_operand:VQ_I 3 "register_operand" "w"))
6244          (match_operand:VQ_I 1 "register_operand" "w")))]
6245   "TARGET_SIMD && TARGET_SHA3"
6246   "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6247   [(set_attr "type" "crypto_sha3")]
6248 )
6249
6250 (define_insn "aarch64_rax1qv2di"
6251   [(set (match_operand:V2DI 0 "register_operand" "=w")
6252         (xor:V2DI
6253          (rotate:V2DI
6254           (match_operand:V2DI 2 "register_operand" "w")
6255           (const_int 1))
6256          (match_operand:V2DI 1 "register_operand" "w")))]
6257   "TARGET_SIMD && TARGET_SHA3"
6258   "rax1\\t%0.2d, %1.2d, %2.2d"
6259   [(set_attr "type" "crypto_sha3")]
6260 )
6261
6262 (define_insn "aarch64_xarqv2di"
6263   [(set (match_operand:V2DI 0 "register_operand" "=w")
6264         (rotatert:V2DI
6265          (xor:V2DI
6266           (match_operand:V2DI 1 "register_operand" "%w")
6267           (match_operand:V2DI 2 "register_operand" "w"))
6268          (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6269   "TARGET_SIMD && TARGET_SHA3"
6270   "xar\\t%0.2d, %1.2d, %2.2d, %3"
6271   [(set_attr "type" "crypto_sha3")]
6272 )
6273
6274 (define_insn "bcaxq<mode>4"
6275   [(set (match_operand:VQ_I 0 "register_operand" "=w")
6276         (xor:VQ_I
6277          (and:VQ_I
6278           (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6279           (match_operand:VQ_I 2 "register_operand" "w"))
6280          (match_operand:VQ_I 1 "register_operand" "w")))]
6281   "TARGET_SIMD && TARGET_SHA3"
6282   "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6283   [(set_attr "type" "crypto_sha3")]
6284 )
6285
6286 ;; SM3
6287
6288 (define_insn "aarch64_sm3ss1qv4si"
6289   [(set (match_operand:V4SI 0 "register_operand" "=w")
6290         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6291                       (match_operand:V4SI 2 "register_operand" "w")
6292                       (match_operand:V4SI 3 "register_operand" "w")]
6293          UNSPEC_SM3SS1))]
6294   "TARGET_SIMD && TARGET_SM4"
6295   "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6296   [(set_attr "type" "crypto_sm3")]
6297 )
6298
6299
6300 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6301   [(set (match_operand:V4SI 0 "register_operand" "=w")
6302         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6303                       (match_operand:V4SI 2 "register_operand" "w")
6304                       (match_operand:V4SI 3 "register_operand" "w")
6305                       (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6306          CRYPTO_SM3TT))]
6307   "TARGET_SIMD && TARGET_SM4"
6308   "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6309   [(set_attr "type" "crypto_sm3")]
6310 )
6311
6312 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6313   [(set (match_operand:V4SI 0 "register_operand" "=w")
6314         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6315                       (match_operand:V4SI 2 "register_operand" "w")
6316                       (match_operand:V4SI 3 "register_operand" "w")]
6317          CRYPTO_SM3PART))]
6318   "TARGET_SIMD && TARGET_SM4"
6319   "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6320   [(set_attr "type" "crypto_sm3")]
6321 )
6322
6323 ;; SM4
6324
6325 (define_insn "aarch64_sm4eqv4si"
6326   [(set (match_operand:V4SI 0 "register_operand" "=w")
6327         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6328                       (match_operand:V4SI 2 "register_operand" "w")]
6329          UNSPEC_SM4E))]
6330   "TARGET_SIMD && TARGET_SM4"
6331   "sm4e\\t%0.4s, %2.4s"
6332   [(set_attr "type" "crypto_sm4")]
6333 )
6334
6335 (define_insn "aarch64_sm4ekeyqv4si"
6336   [(set (match_operand:V4SI 0 "register_operand" "=w")
6337         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6338                       (match_operand:V4SI 2 "register_operand" "w")]
6339          UNSPEC_SM4EKEY))]
6340   "TARGET_SIMD && TARGET_SM4"
6341   "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6342   [(set_attr "type" "crypto_sm4")]
6343 )
6344
6345 ;; fp16fml
6346
6347 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6348   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6349         (unspec:VDQSF
6350          [(match_operand:VDQSF 1 "register_operand" "0")
6351           (match_operand:<VFMLA_W> 2 "register_operand" "w")
6352           (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6353          VFMLA16_LOW))]
6354   "TARGET_F16FML"
6355 {
6356   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6357                                             <nunits> * 2, false);
6358   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6359                                             <nunits> * 2, false);
6360
6361   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6362                                                                 operands[1],
6363                                                                 operands[2],
6364                                                                 operands[3],
6365                                                                 p1, p2));
6366   DONE;
6367
6368 })
6369
6370 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6371   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6372         (unspec:VDQSF
6373          [(match_operand:VDQSF 1 "register_operand" "0")
6374           (match_operand:<VFMLA_W> 2 "register_operand" "w")
6375           (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6376          VFMLA16_HIGH))]
6377   "TARGET_F16FML"
6378 {
6379   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6380   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6381
6382   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6383                                                                  operands[1],
6384                                                                  operands[2],
6385                                                                  operands[3],
6386                                                                  p1, p2));
6387   DONE;
6388 })
6389
6390 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6391   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6392         (fma:VDQSF
6393          (float_extend:VDQSF
6394           (vec_select:<VFMLA_SEL_W>
6395            (match_operand:<VFMLA_W> 2 "register_operand" "w")
6396            (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6397          (float_extend:VDQSF
6398           (vec_select:<VFMLA_SEL_W>
6399            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6400            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6401          (match_operand:VDQSF 1 "register_operand" "0")))]
6402   "TARGET_F16FML"
6403   "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6404   [(set_attr "type" "neon_fp_mul_s")]
6405 )
6406
6407 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6408   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6409         (fma:VDQSF
6410          (float_extend:VDQSF
6411           (neg:<VFMLA_SEL_W>
6412            (vec_select:<VFMLA_SEL_W>
6413             (match_operand:<VFMLA_W> 2 "register_operand" "w")
6414             (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6415          (float_extend:VDQSF
6416           (vec_select:<VFMLA_SEL_W>
6417            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6418            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6419          (match_operand:VDQSF 1 "register_operand" "0")))]
6420   "TARGET_F16FML"
6421   "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6422   [(set_attr "type" "neon_fp_mul_s")]
6423 )
6424
6425 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6426   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6427         (fma:VDQSF
6428          (float_extend:VDQSF
6429           (vec_select:<VFMLA_SEL_W>
6430            (match_operand:<VFMLA_W> 2 "register_operand" "w")
6431            (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6432          (float_extend:VDQSF
6433           (vec_select:<VFMLA_SEL_W>
6434            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6435            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6436          (match_operand:VDQSF 1 "register_operand" "0")))]
6437   "TARGET_F16FML"
6438   "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6439   [(set_attr "type" "neon_fp_mul_s")]
6440 )
6441
6442 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6443   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6444         (fma:VDQSF
6445          (float_extend:VDQSF
6446           (neg:<VFMLA_SEL_W>
6447            (vec_select:<VFMLA_SEL_W>
6448             (match_operand:<VFMLA_W> 2 "register_operand" "w")
6449             (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6450          (float_extend:VDQSF
6451           (vec_select:<VFMLA_SEL_W>
6452            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6453            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6454          (match_operand:VDQSF 1 "register_operand" "0")))]
6455   "TARGET_F16FML"
6456   "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6457   [(set_attr "type" "neon_fp_mul_s")]
6458 )
6459
6460 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6461   [(set (match_operand:V2SF 0 "register_operand" "")
6462         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6463                            (match_operand:V4HF 2 "register_operand" "")
6464                            (match_operand:V4HF 3 "register_operand" "")
6465                            (match_operand:SI 4 "aarch64_imm2" "")]
6466          VFMLA16_LOW))]
6467   "TARGET_F16FML"
6468 {
6469     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6470     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6471
6472     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6473                                                             operands[1],
6474                                                             operands[2],
6475                                                             operands[3],
6476                                                             p1, lane));
6477     DONE;
6478 }
6479 )
6480
6481 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6482   [(set (match_operand:V2SF 0 "register_operand" "")
6483         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6484                            (match_operand:V4HF 2 "register_operand" "")
6485                            (match_operand:V4HF 3 "register_operand" "")
6486                            (match_operand:SI 4 "aarch64_imm2" "")]
6487          VFMLA16_HIGH))]
6488   "TARGET_F16FML"
6489 {
6490     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6491     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6492
6493     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6494                                                              operands[1],
6495                                                              operands[2],
6496                                                              operands[3],
6497                                                              p1, lane));
6498     DONE;
6499 })
6500
6501 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6502   [(set (match_operand:V2SF 0 "register_operand" "=w")
6503         (fma:V2SF
6504          (float_extend:V2SF
6505            (vec_select:V2HF
6506             (match_operand:V4HF 2 "register_operand" "w")
6507             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6508          (float_extend:V2SF
6509            (vec_duplicate:V2HF
6510             (vec_select:HF
6511              (match_operand:V4HF 3 "register_operand" "x")
6512              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6513          (match_operand:V2SF 1 "register_operand" "0")))]
6514   "TARGET_F16FML"
6515   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6516   [(set_attr "type" "neon_fp_mul_s")]
6517 )
6518
6519 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6520   [(set (match_operand:V2SF 0 "register_operand" "=w")
6521         (fma:V2SF
6522          (float_extend:V2SF
6523           (neg:V2HF
6524            (vec_select:V2HF
6525             (match_operand:V4HF 2 "register_operand" "w")
6526             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6527          (float_extend:V2SF
6528           (vec_duplicate:V2HF
6529            (vec_select:HF
6530             (match_operand:V4HF 3 "register_operand" "x")
6531             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6532          (match_operand:V2SF 1 "register_operand" "0")))]
6533   "TARGET_F16FML"
6534   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6535   [(set_attr "type" "neon_fp_mul_s")]
6536 )
6537
6538 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6539   [(set (match_operand:V2SF 0 "register_operand" "=w")
6540         (fma:V2SF
6541          (float_extend:V2SF
6542            (vec_select:V2HF
6543             (match_operand:V4HF 2 "register_operand" "w")
6544             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6545          (float_extend:V2SF
6546            (vec_duplicate:V2HF
6547             (vec_select:HF
6548              (match_operand:V4HF 3 "register_operand" "x")
6549              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6550          (match_operand:V2SF 1 "register_operand" "0")))]
6551   "TARGET_F16FML"
6552   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6553   [(set_attr "type" "neon_fp_mul_s")]
6554 )
6555
6556 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6557   [(set (match_operand:V2SF 0 "register_operand" "=w")
6558         (fma:V2SF
6559          (float_extend:V2SF
6560            (neg:V2HF
6561             (vec_select:V2HF
6562              (match_operand:V4HF 2 "register_operand" "w")
6563              (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6564          (float_extend:V2SF
6565            (vec_duplicate:V2HF
6566             (vec_select:HF
6567              (match_operand:V4HF 3 "register_operand" "x")
6568              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6569          (match_operand:V2SF 1 "register_operand" "0")))]
6570   "TARGET_F16FML"
6571   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6572   [(set_attr "type" "neon_fp_mul_s")]
6573 )
6574
6575 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6576   [(set (match_operand:V4SF 0 "register_operand" "")
6577         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6578                            (match_operand:V8HF 2 "register_operand" "")
6579                            (match_operand:V8HF 3 "register_operand" "")
6580                            (match_operand:SI 4 "aarch64_lane_imm3" "")]
6581          VFMLA16_LOW))]
6582   "TARGET_F16FML"
6583 {
6584     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6585     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6586
6587     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6588                                                               operands[1],
6589                                                               operands[2],
6590                                                               operands[3],
6591                                                               p1, lane));
6592     DONE;
6593 })
6594
6595 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6596   [(set (match_operand:V4SF 0 "register_operand" "")
6597         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6598                            (match_operand:V8HF 2 "register_operand" "")
6599                            (match_operand:V8HF 3 "register_operand" "")
6600                            (match_operand:SI 4 "aarch64_lane_imm3" "")]
6601          VFMLA16_HIGH))]
6602   "TARGET_F16FML"
6603 {
6604     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6605     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6606
6607     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6608                                                                operands[1],
6609                                                                operands[2],
6610                                                                operands[3],
6611                                                                p1, lane));
6612     DONE;
6613 })
6614
6615 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6616   [(set (match_operand:V4SF 0 "register_operand" "=w")
6617         (fma:V4SF
6618          (float_extend:V4SF
6619           (vec_select:V4HF
6620             (match_operand:V8HF 2 "register_operand" "w")
6621             (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6622          (float_extend:V4SF
6623           (vec_duplicate:V4HF
6624            (vec_select:HF
6625             (match_operand:V8HF 3 "register_operand" "x")
6626             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6627          (match_operand:V4SF 1 "register_operand" "0")))]
6628   "TARGET_F16FML"
6629   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6630   [(set_attr "type" "neon_fp_mul_s")]
6631 )
6632
6633 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6634   [(set (match_operand:V4SF 0 "register_operand" "=w")
6635         (fma:V4SF
6636           (float_extend:V4SF
6637            (neg:V4HF
6638             (vec_select:V4HF
6639              (match_operand:V8HF 2 "register_operand" "w")
6640              (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6641          (float_extend:V4SF
6642           (vec_duplicate:V4HF
6643            (vec_select:HF
6644             (match_operand:V8HF 3 "register_operand" "x")
6645             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6646          (match_operand:V4SF 1 "register_operand" "0")))]
6647   "TARGET_F16FML"
6648   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6649   [(set_attr "type" "neon_fp_mul_s")]
6650 )
6651
6652 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6653   [(set (match_operand:V4SF 0 "register_operand" "=w")
6654         (fma:V4SF
6655          (float_extend:V4SF
6656           (vec_select:V4HF
6657             (match_operand:V8HF 2 "register_operand" "w")
6658             (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6659          (float_extend:V4SF
6660           (vec_duplicate:V4HF
6661            (vec_select:HF
6662             (match_operand:V8HF 3 "register_operand" "x")
6663             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6664          (match_operand:V4SF 1 "register_operand" "0")))]
6665   "TARGET_F16FML"
6666   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6667   [(set_attr "type" "neon_fp_mul_s")]
6668 )
6669
6670 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6671   [(set (match_operand:V4SF 0 "register_operand" "=w")
6672         (fma:V4SF
6673          (float_extend:V4SF
6674           (neg:V4HF
6675            (vec_select:V4HF
6676             (match_operand:V8HF 2 "register_operand" "w")
6677             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6678          (float_extend:V4SF
6679           (vec_duplicate:V4HF
6680            (vec_select:HF
6681             (match_operand:V8HF 3 "register_operand" "x")
6682             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6683          (match_operand:V4SF 1 "register_operand" "0")))]
6684   "TARGET_F16FML"
6685   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6686   [(set_attr "type" "neon_fp_mul_s")]
6687 )
6688
6689 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6690   [(set (match_operand:V2SF 0 "register_operand" "")
6691         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6692                       (match_operand:V4HF 2 "register_operand" "")
6693                       (match_operand:V8HF 3 "register_operand" "")
6694                       (match_operand:SI 4 "aarch64_lane_imm3" "")]
6695          VFMLA16_LOW))]
6696   "TARGET_F16FML"
6697 {
6698     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6699     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6700
6701     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6702                                                              operands[1],
6703                                                              operands[2],
6704                                                              operands[3],
6705                                                              p1, lane));
6706     DONE;
6707
6708 })
6709
6710 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6711   [(set (match_operand:V2SF 0 "register_operand" "")
6712         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6713                       (match_operand:V4HF 2 "register_operand" "")
6714                       (match_operand:V8HF 3 "register_operand" "")
6715                       (match_operand:SI 4 "aarch64_lane_imm3" "")]
6716          VFMLA16_HIGH))]
6717   "TARGET_F16FML"
6718 {
6719     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6720     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6721
6722     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6723                                                               operands[1],
6724                                                               operands[2],
6725                                                               operands[3],
6726                                                               p1, lane));
6727     DONE;
6728
6729 })
6730
6731 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6732   [(set (match_operand:V2SF 0 "register_operand" "=w")
6733         (fma:V2SF
6734          (float_extend:V2SF
6735            (vec_select:V2HF
6736             (match_operand:V4HF 2 "register_operand" "w")
6737             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6738          (float_extend:V2SF
6739           (vec_duplicate:V2HF
6740            (vec_select:HF
6741             (match_operand:V8HF 3 "register_operand" "x")
6742             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6743          (match_operand:V2SF 1 "register_operand" "0")))]
6744   "TARGET_F16FML"
6745   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6746   [(set_attr "type" "neon_fp_mul_s")]
6747 )
6748
6749 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6750   [(set (match_operand:V2SF 0 "register_operand" "=w")
6751         (fma:V2SF
6752          (float_extend:V2SF
6753           (neg:V2HF
6754            (vec_select:V2HF
6755             (match_operand:V4HF 2 "register_operand" "w")
6756             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6757          (float_extend:V2SF
6758           (vec_duplicate:V2HF
6759            (vec_select:HF
6760             (match_operand:V8HF 3 "register_operand" "x")
6761             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6762          (match_operand:V2SF 1 "register_operand" "0")))]
6763   "TARGET_F16FML"
6764   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6765   [(set_attr "type" "neon_fp_mul_s")]
6766 )
6767
6768 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6769   [(set (match_operand:V2SF 0 "register_operand" "=w")
6770         (fma:V2SF
6771          (float_extend:V2SF
6772            (vec_select:V2HF
6773             (match_operand:V4HF 2 "register_operand" "w")
6774             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6775          (float_extend:V2SF
6776           (vec_duplicate:V2HF
6777            (vec_select:HF
6778             (match_operand:V8HF 3 "register_operand" "x")
6779             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6780          (match_operand:V2SF 1 "register_operand" "0")))]
6781   "TARGET_F16FML"
6782   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6783   [(set_attr "type" "neon_fp_mul_s")]
6784 )
6785
6786 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6787   [(set (match_operand:V2SF 0 "register_operand" "=w")
6788         (fma:V2SF
6789          (float_extend:V2SF
6790           (neg:V2HF
6791            (vec_select:V2HF
6792             (match_operand:V4HF 2 "register_operand" "w")
6793             (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6794          (float_extend:V2SF
6795           (vec_duplicate:V2HF
6796            (vec_select:HF
6797             (match_operand:V8HF 3 "register_operand" "x")
6798             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6799          (match_operand:V2SF 1 "register_operand" "0")))]
6800   "TARGET_F16FML"
6801   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6802   [(set_attr "type" "neon_fp_mul_s")]
6803 )
6804
6805 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6806   [(set (match_operand:V4SF 0 "register_operand" "")
6807         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6808                       (match_operand:V8HF 2 "register_operand" "")
6809                       (match_operand:V4HF 3 "register_operand" "")
6810                       (match_operand:SI 4 "aarch64_imm2" "")]
6811          VFMLA16_LOW))]
6812   "TARGET_F16FML"
6813 {
6814     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6815     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6816
6817     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6818                                                              operands[1],
6819                                                              operands[2],
6820                                                              operands[3],
6821                                                              p1, lane));
6822     DONE;
6823 })
6824
6825 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6826   [(set (match_operand:V4SF 0 "register_operand" "")
6827         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6828                       (match_operand:V8HF 2 "register_operand" "")
6829                       (match_operand:V4HF 3 "register_operand" "")
6830                       (match_operand:SI 4 "aarch64_imm2" "")]
6831          VFMLA16_HIGH))]
6832   "TARGET_F16FML"
6833 {
6834     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6835     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6836
6837     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6838                                                               operands[1],
6839                                                               operands[2],
6840                                                               operands[3],
6841                                                               p1, lane));
6842     DONE;
6843 })
6844
6845 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6846   [(set (match_operand:V4SF 0 "register_operand" "=w")
6847         (fma:V4SF
6848          (float_extend:V4SF
6849           (vec_select:V4HF
6850            (match_operand:V8HF 2 "register_operand" "w")
6851            (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6852          (float_extend:V4SF
6853           (vec_duplicate:V4HF
6854            (vec_select:HF
6855             (match_operand:V4HF 3 "register_operand" "x")
6856             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6857          (match_operand:V4SF 1 "register_operand" "0")))]
6858   "TARGET_F16FML"
6859   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6860   [(set_attr "type" "neon_fp_mul_s")]
6861 )
6862
6863 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6864   [(set (match_operand:V4SF 0 "register_operand" "=w")
6865         (fma:V4SF
6866          (float_extend:V4SF
6867           (neg:V4HF
6868            (vec_select:V4HF
6869             (match_operand:V8HF 2 "register_operand" "w")
6870             (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6871          (float_extend:V4SF
6872           (vec_duplicate:V4HF
6873            (vec_select:HF
6874             (match_operand:V4HF 3 "register_operand" "x")
6875             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6876          (match_operand:V4SF 1 "register_operand" "0")))]
6877   "TARGET_F16FML"
6878   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6879   [(set_attr "type" "neon_fp_mul_s")]
6880 )
6881
6882 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6883   [(set (match_operand:V4SF 0 "register_operand" "=w")
6884         (fma:V4SF
6885          (float_extend:V4SF
6886           (vec_select:V4HF
6887            (match_operand:V8HF 2 "register_operand" "w")
6888            (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6889          (float_extend:V4SF
6890           (vec_duplicate:V4HF
6891            (vec_select:HF
6892             (match_operand:V4HF 3 "register_operand" "x")
6893             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6894          (match_operand:V4SF 1 "register_operand" "0")))]
6895   "TARGET_F16FML"
6896   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6897   [(set_attr "type" "neon_fp_mul_s")]
6898 )
6899
6900 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6901   [(set (match_operand:V4SF 0 "register_operand" "=w")
6902         (fma:V4SF
6903          (float_extend:V4SF
6904           (neg:V4HF
6905            (vec_select:V4HF
6906             (match_operand:V8HF 2 "register_operand" "w")
6907             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6908          (float_extend:V4SF
6909           (vec_duplicate:V4HF
6910            (vec_select:HF
6911             (match_operand:V4HF 3 "register_operand" "x")
6912             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6913          (match_operand:V4SF 1 "register_operand" "0")))]
6914   "TARGET_F16FML"
6915   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6916   [(set_attr "type" "neon_fp_mul_s")]
6917 )
6918
6919 ;; pmull
6920
6921 (define_insn "aarch64_crypto_pmulldi"
6922   [(set (match_operand:TI 0 "register_operand" "=w")
6923         (unspec:TI  [(match_operand:DI 1 "register_operand" "w")
6924                      (match_operand:DI 2 "register_operand" "w")]
6925                     UNSPEC_PMULL))]
6926  "TARGET_SIMD && TARGET_AES"
6927  "pmull\\t%0.1q, %1.1d, %2.1d"
6928   [(set_attr "type" "crypto_pmull")]
6929 )
6930
6931 (define_insn "aarch64_crypto_pmullv2di"
6932  [(set (match_operand:TI 0 "register_operand" "=w")
6933        (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
6934                    (match_operand:V2DI 2 "register_operand" "w")]
6935                   UNSPEC_PMULL2))]
6936   "TARGET_SIMD && TARGET_AES"
6937   "pmull2\\t%0.1q, %1.2d, %2.2d"
6938   [(set_attr "type" "crypto_pmull")]
6939 )