gcc/config/aarch64/aarch64-simd.md

   1 ;; Machine description for AArch64 AdvSIMD architecture.
   2 ;; Copyright (C) 2011-2019 Free Software Foundation, Inc.
   3 ;; Contributed by ARM Ltd.
   4 ;;
   5 ;; This file is part of GCC.
   6 ;;
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published by
   9 ;; the Free Software Foundation; either version 3, or (at your option)
  10 ;; any later version.
  11 ;;
  12 ;; GCC is distributed in the hope that it will be useful, but
  13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 ;; General Public License for more details.
  16 ;;
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 (define_expand "mov<mode>"
  22   [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
  23         (match_operand:VALL_F16 1 "general_operand"))]
  24   "TARGET_SIMD"
  25   "
  26   /* Force the operand into a register if it is not an
  27      immediate whose use can be replaced with xzr.
  28      If the mode is 16 bytes wide, then we will be doing
  29      a stp in DI mode, so we check the validity of that.
  30      If the mode is 8 bytes wide, then we will do doing a
  31      normal str, so the check need not apply.  */
  32   if (GET_CODE (operands[0]) == MEM
  33       && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
  34            && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
  35                 && aarch64_mem_pair_operand (operands[0], DImode))
  36                || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
  37       operands[1] = force_reg (<MODE>mode, operands[1]);
  38   "
  39 )
  40
  41 (define_expand "movmisalign<mode>"
  42   [(set (match_operand:VALL 0 "nonimmediate_operand")
  43         (match_operand:VALL 1 "general_operand"))]
  44   "TARGET_SIMD"
  45 {
  46   /* This pattern is not permitted to fail during expansion: if both arguments
  47      are non-registers (e.g. memory := constant, which can be created by the
  48      auto-vectorizer), force operand 1 into a register.  */
  49   if (!register_operand (operands[0], <MODE>mode)
  50       && !register_operand (operands[1], <MODE>mode))
  51     operands[1] = force_reg (<MODE>mode, operands[1]);
  52 })
  53
  54 (define_insn "aarch64_simd_dup<mode>"
  55   [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
  56         (vec_duplicate:VDQ_I
  57           (match_operand:<VEL> 1 "register_operand" "w,?r")))]
  58   "TARGET_SIMD"
  59   "@
  60    dup\\t%0.<Vtype>, %1.<Vetype>[0]
  61    dup\\t%0.<Vtype>, %<vw>1"
  62   [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
  63 )
  64
  65 (define_insn "aarch64_simd_dup<mode>"
  66   [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
  67         (vec_duplicate:VDQF_F16
  68           (match_operand:<VEL> 1 "register_operand" "w")))]
  69   "TARGET_SIMD"
  70   "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
  71   [(set_attr "type" "neon_dup<q>")]
  72 )
  73
  74 (define_insn "aarch64_dup_lane<mode>"
  75   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
  76         (vec_duplicate:VALL_F16
  77           (vec_select:<VEL>
  78             (match_operand:VALL_F16 1 "register_operand" "w")
  79             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  80           )))]
  81   "TARGET_SIMD"
  82   {
  83     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
  84     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
  85   }
  86   [(set_attr "type" "neon_dup<q>")]
  87 )
  88
  89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
  90   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
  91         (vec_duplicate:VALL_F16_NO_V2Q
  92           (vec_select:<VEL>
  93             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
  94             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  95           )))]
  96   "TARGET_SIMD"
  97   {
  98     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
  99     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
 100   }
 101   [(set_attr "type" "neon_dup<q>")]
 102 )
 103
 104 (define_insn "*aarch64_simd_mov<VD:mode>"
 105   [(set (match_operand:VD 0 "nonimmediate_operand"
 106                 "=w, m,  m,  w, ?r, ?w, ?r, w")
 107         (match_operand:VD 1 "general_operand"
 108                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 109   "TARGET_SIMD
 110    && (register_operand (operands[0], <MODE>mode)
 111        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 112 {
 113    switch (which_alternative)
 114      {
 115      case 0: return "ldr\t%d0, %1";
 116      case 1: return "str\txzr, %0";
 117      case 2: return "str\t%d1, %0";
 118      case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 119      case 4: return "umov\t%0, %1.d[0]";
 120      case 5: return "fmov\t%d0, %1";
 121      case 6: return "mov\t%0, %1";
 122      case 7:
 123         return aarch64_output_simd_mov_immediate (operands[1], 64);
 124      default: gcc_unreachable ();
 125      }
 126 }
 127   [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
 128                      neon_logic<q>, neon_to_gp<q>, f_mcr,\
 129                      mov_reg, neon_move<q>")]
 130 )
 131
 132 (define_insn "*aarch64_simd_mov<VQ:mode>"
 133   [(set (match_operand:VQ 0 "nonimmediate_operand"
 134                 "=w, Umn,  m,  w, ?r, ?w, ?r, w")
 135         (match_operand:VQ 1 "general_operand"
 136                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 137   "TARGET_SIMD
 138    && (register_operand (operands[0], <MODE>mode)
 139        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 140 {
 141   switch (which_alternative)
 142     {
 143     case 0:
 144         return "ldr\t%q0, %1";
 145     case 1:
 146         return "stp\txzr, xzr, %0";
 147     case 2:
 148         return "str\t%q1, %0";
 149     case 3:
 150         return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 151     case 4:
 152     case 5:
 153     case 6:
 154         return "#";
 155     case 7:
 156         return aarch64_output_simd_mov_immediate (operands[1], 128);
 157     default:
 158         gcc_unreachable ();
 159     }
 160 }
 161   [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
 162                      neon_logic<q>, multiple, multiple,\
 163                      multiple, neon_move<q>")
 164    (set_attr "length" "4,4,4,4,8,8,8,4")]
 165 )
 166
 167 ;; When storing lane zero we can use the normal STR and its more permissive
 168 ;; addressing modes.
 169
 170 (define_insn "aarch64_store_lane0<mode>"
 171   [(set (match_operand:<VEL> 0 "memory_operand" "=m")
 172         (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
 173                         (parallel [(match_operand 2 "const_int_operand" "n")])))]
 174   "TARGET_SIMD
 175    && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
 176   "str\\t%<Vetype>1, %0"
 177   [(set_attr "type" "neon_store1_1reg<q>")]
 178 )
 179
 180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
 181   [(set (match_operand:DREG 0 "register_operand" "=w")
 182         (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
 183    (set (match_operand:DREG2 2 "register_operand" "=w")
 184         (match_operand:DREG2 3 "memory_operand" "m"))]
 185   "TARGET_SIMD
 186    && rtx_equal_p (XEXP (operands[3], 0),
 187                    plus_constant (Pmode,
 188                                   XEXP (operands[1], 0),
 189                                   GET_MODE_SIZE (<DREG:MODE>mode)))"
 190   "ldp\\t%d0, %d2, %1"
 191   [(set_attr "type" "neon_ldp")]
 192 )
 193
 194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
 195   [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
 196         (match_operand:DREG 1 "register_operand" "w"))
 197    (set (match_operand:DREG2 2 "memory_operand" "=m")
 198         (match_operand:DREG2 3 "register_operand" "w"))]
 199   "TARGET_SIMD
 200    && rtx_equal_p (XEXP (operands[2], 0),
 201                    plus_constant (Pmode,
 202                                   XEXP (operands[0], 0),
 203                                   GET_MODE_SIZE (<DREG:MODE>mode)))"
 204   "stp\\t%d1, %d3, %0"
 205   [(set_attr "type" "neon_stp")]
 206 )
 207
 208 (define_insn "load_pair<VQ:mode><VQ2:mode>"
 209   [(set (match_operand:VQ 0 "register_operand" "=w")
 210         (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
 211    (set (match_operand:VQ2 2 "register_operand" "=w")
 212         (match_operand:VQ2 3 "memory_operand" "m"))]
 213   "TARGET_SIMD
 214     && rtx_equal_p (XEXP (operands[3], 0),
 215                     plus_constant (Pmode,
 216                                XEXP (operands[1], 0),
 217                                GET_MODE_SIZE (<VQ:MODE>mode)))"
 218   "ldp\\t%q0, %q2, %1"
 219   [(set_attr "type" "neon_ldp_q")]
 220 )
 221
 222 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
 223   [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
 224         (match_operand:VQ 1 "register_operand" "w"))
 225    (set (match_operand:VQ2 2 "memory_operand" "=m")
 226         (match_operand:VQ2 3 "register_operand" "w"))]
 227   "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
 228                 plus_constant (Pmode,
 229                                XEXP (operands[0], 0),
 230                                GET_MODE_SIZE (<VQ:MODE>mode)))"
 231   "stp\\t%q1, %q3, %0"
 232   [(set_attr "type" "neon_stp_q")]
 233 )
 234
 235
 236 (define_split
 237   [(set (match_operand:VQ 0 "register_operand" "")
 238       (match_operand:VQ 1 "register_operand" ""))]
 239   "TARGET_SIMD && reload_completed
 240    && GP_REGNUM_P (REGNO (operands[0]))
 241    && GP_REGNUM_P (REGNO (operands[1]))"
 242   [(const_int 0)]
 243 {
 244   aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
 245   DONE;
 246 })
 247
 248 (define_split
 249   [(set (match_operand:VQ 0 "register_operand" "")
 250         (match_operand:VQ 1 "register_operand" ""))]
 251   "TARGET_SIMD && reload_completed
 252    && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
 253        || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
 254   [(const_int 0)]
 255 {
 256   aarch64_split_simd_move (operands[0], operands[1]);
 257   DONE;
 258 })
 259
 260 (define_expand "@aarch64_split_simd_mov<mode>"
 261   [(set (match_operand:VQ 0)
 262         (match_operand:VQ 1))]
 263   "TARGET_SIMD"
 264   {
 265     rtx dst = operands[0];
 266     rtx src = operands[1];
 267
 268     if (GP_REGNUM_P (REGNO (src)))
 269       {
 270         rtx src_low_part = gen_lowpart (<VHALF>mode, src);
 271         rtx src_high_part = gen_highpart (<VHALF>mode, src);
 272
 273         emit_insn
 274           (gen_move_lo_quad_<mode> (dst, src_low_part));
 275         emit_insn
 276           (gen_move_hi_quad_<mode> (dst, src_high_part));
 277       }
 278
 279     else
 280       {
 281         rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
 282         rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
 283         rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
 284         rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
 285
 286         emit_insn
 287           (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
 288         emit_insn
 289           (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
 290       }
 291     DONE;
 292   }
 293 )
 294
 295 (define_insn "aarch64_simd_mov_from_<mode>low"
 296   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
 297         (vec_select:<VHALF>
 298           (match_operand:VQ 1 "register_operand" "w")
 299           (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
 300   "TARGET_SIMD && reload_completed"
 301   "umov\t%0, %1.d[0]"
 302   [(set_attr "type" "neon_to_gp<q>")
 303    (set_attr "length" "4")
 304   ])
 305
 306 (define_insn "aarch64_simd_mov_from_<mode>high"
 307   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
 308         (vec_select:<VHALF>
 309           (match_operand:VQ 1 "register_operand" "w")
 310           (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
 311   "TARGET_SIMD && reload_completed"
 312   "umov\t%0, %1.d[1]"
 313   [(set_attr "type" "neon_to_gp<q>")
 314    (set_attr "length" "4")
 315   ])
 316
 317 (define_insn "orn<mode>3"
 318  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 319        (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 320                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 321  "TARGET_SIMD"
 322  "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 323   [(set_attr "type" "neon_logic<q>")]
 324 )
 325
 326 (define_insn "bic<mode>3"
 327  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 328        (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 329                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 330  "TARGET_SIMD"
 331  "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 332   [(set_attr "type" "neon_logic<q>")]
 333 )
 334
 335 (define_insn "add<mode>3"
 336   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 337         (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 338                   (match_operand:VDQ_I 2 "register_operand" "w")))]
 339   "TARGET_SIMD"
 340   "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 341   [(set_attr "type" "neon_add<q>")]
 342 )
 343
 344 (define_insn "sub<mode>3"
 345   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 346         (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 347                    (match_operand:VDQ_I 2 "register_operand" "w")))]
 348   "TARGET_SIMD"
 349   "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 350   [(set_attr "type" "neon_sub<q>")]
 351 )
 352
 353 (define_insn "mul<mode>3"
 354   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 355         (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
 356                    (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
 357   "TARGET_SIMD"
 358   "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 359   [(set_attr "type" "neon_mul_<Vetype><q>")]
 360 )
 361
 362 (define_insn "bswap<mode>2"
 363   [(set (match_operand:VDQHSD 0 "register_operand" "=w")
 364         (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
 365   "TARGET_SIMD"
 366   "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
 367   [(set_attr "type" "neon_rev<q>")]
 368 )
 369
 370 (define_insn "aarch64_rbit<mode>"
 371   [(set (match_operand:VB 0 "register_operand" "=w")
 372         (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
 373                    UNSPEC_RBIT))]
 374   "TARGET_SIMD"
 375   "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
 376   [(set_attr "type" "neon_rbit")]
 377 )
 378
 379 (define_expand "ctz<mode>2"
 380   [(set (match_operand:VS 0 "register_operand")
 381         (ctz:VS (match_operand:VS 1 "register_operand")))]
 382   "TARGET_SIMD"
 383   {
 384      emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
 385      rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
 386                                              <MODE>mode, 0);
 387      emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
 388      emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
 389      DONE;
 390   }
 391 )
 392
 393 (define_expand "xorsign<mode>3"
 394   [(match_operand:VHSDF 0 "register_operand")
 395    (match_operand:VHSDF 1 "register_operand")
 396    (match_operand:VHSDF 2 "register_operand")]
 397   "TARGET_SIMD"
 398 {
 399
 400   machine_mode imode = <V_INT_EQUIV>mode;
 401   rtx v_bitmask = gen_reg_rtx (imode);
 402   rtx op1x = gen_reg_rtx (imode);
 403   rtx op2x = gen_reg_rtx (imode);
 404
 405   rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
 406   rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
 407
 408   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 409
 410   emit_move_insn (v_bitmask,
 411                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 412                                                      HOST_WIDE_INT_M1U << bits));
 413
 414   emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
 415   emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
 416   emit_move_insn (operands[0],
 417                   lowpart_subreg (<MODE>mode, op1x, imode));
 418   DONE;
 419 }
 420 )
 421
 422 ;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
 423 ;; fact that their usage need to guarantee that the source vectors are
 424 ;; contiguous.  It would be wrong to describe the operation without being able
 425 ;; to describe the permute that is also required, but even if that is done
 426 ;; the permute would have been created as a LOAD_LANES which means the values
 427 ;; in the registers are in the wrong order.
 428 (define_insn "aarch64_fcadd<rot><mode>"
 429   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 430         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
 431                        (match_operand:VHSDF 2 "register_operand" "w")]
 432                        FCADD))]
 433   "TARGET_COMPLEX"
 434   "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
 435   [(set_attr "type" "neon_fcadd")]
 436 )
 437
 438 (define_insn "aarch64_fcmla<rot><mode>"
 439   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 440         (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
 441                     (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
 442                                    (match_operand:VHSDF 3 "register_operand" "w")]
 443                                    FCMLA)))]
 444   "TARGET_COMPLEX"
 445   "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
 446   [(set_attr "type" "neon_fcmla")]
 447 )
 448
 449
 450 (define_insn "aarch64_fcmla_lane<rot><mode>"
 451   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 452         (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
 453                     (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
 454                                    (match_operand:VHSDF 3 "register_operand" "w")
 455                                    (match_operand:SI 4 "const_int_operand" "n")]
 456                                    FCMLA)))]
 457   "TARGET_COMPLEX"
 458 {
 459   operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
 460   return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
 461 }
 462   [(set_attr "type" "neon_fcmla")]
 463 )
 464
 465 (define_insn "aarch64_fcmla_laneq<rot>v4hf"
 466   [(set (match_operand:V4HF 0 "register_operand" "=w")
 467         (plus:V4HF (match_operand:V4HF 1 "register_operand" "0")
 468                    (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
 469                                  (match_operand:V8HF 3 "register_operand" "w")
 470                                  (match_operand:SI 4 "const_int_operand" "n")]
 471                                  FCMLA)))]
 472   "TARGET_COMPLEX"
 473 {
 474   operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
 475   return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
 476 }
 477   [(set_attr "type" "neon_fcmla")]
 478 )
 479
 480 (define_insn "aarch64_fcmlaq_lane<rot><mode>"
 481   [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
 482         (plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0")
 483                      (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
 484                                      (match_operand:<VHALF> 3 "register_operand" "w")
 485                                      (match_operand:SI 4 "const_int_operand" "n")]
 486                                      FCMLA)))]
 487   "TARGET_COMPLEX"
 488 {
 489   int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
 490   operands[4]
 491     = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
 492   return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
 493 }
 494   [(set_attr "type" "neon_fcmla")]
 495 )
 496
 497 ;; These instructions map to the __builtins for the Dot Product operations.
 498 (define_insn "aarch64_<sur>dot<vsi2qi>"
 499   [(set (match_operand:VS 0 "register_operand" "=w")
 500         (plus:VS (match_operand:VS 1 "register_operand" "0")
 501                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 502                             (match_operand:<VSI2QI> 3 "register_operand" "w")]
 503                 DOTPROD)))]
 504   "TARGET_DOTPROD"
 505   "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
 506   [(set_attr "type" "neon_dot<q>")]
 507 )
 508
 509 ;; These expands map to the Dot Product optab the vectorizer checks for.
 510 ;; The auto-vectorizer expects a dot product builtin that also does an
 511 ;; accumulation into the provided register.
 512 ;; Given the following pattern
 513 ;;
 514 ;; for (i=0; i<len; i++) {
 515 ;;     c = a[i] * b[i];
 516 ;;     r += c;
 517 ;; }
 518 ;; return result;
 519 ;;
 520 ;; This can be auto-vectorized to
 521 ;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
 522 ;;
 523 ;; given enough iterations.  However the vectorizer can keep unrolling the loop
 524 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
 525 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
 526 ;; ...
 527 ;;
 528 ;; and so the vectorizer provides r, in which the result has to be accumulated.
 529 (define_expand "<sur>dot_prod<vsi2qi>"
 530   [(set (match_operand:VS 0 "register_operand")
 531         (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
 532                             (match_operand:<VSI2QI> 2 "register_operand")]
 533                  DOTPROD)
 534                 (match_operand:VS 3 "register_operand")))]
 535   "TARGET_DOTPROD"
 536 {
 537   emit_insn (
 538     gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
 539                                     operands[2]));
 540   emit_insn (gen_rtx_SET (operands[0], operands[3]));
 541   DONE;
 542 })
 543
 544 ;; These instructions map to the __builtins for the Dot Product
 545 ;; indexed operations.
 546 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
 547   [(set (match_operand:VS 0 "register_operand" "=w")
 548         (plus:VS (match_operand:VS 1 "register_operand" "0")
 549                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 550                             (match_operand:V8QI 3 "register_operand" "<h_con>")
 551                             (match_operand:SI 4 "immediate_operand" "i")]
 552                 DOTPROD)))]
 553   "TARGET_DOTPROD"
 554   {
 555     operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
 556     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 557   }
 558   [(set_attr "type" "neon_dot<q>")]
 559 )
 560
 561 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
 562   [(set (match_operand:VS 0 "register_operand" "=w")
 563         (plus:VS (match_operand:VS 1 "register_operand" "0")
 564                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 565                             (match_operand:V16QI 3 "register_operand" "<h_con>")
 566                             (match_operand:SI 4 "immediate_operand" "i")]
 567                 DOTPROD)))]
 568   "TARGET_DOTPROD"
 569   {
 570     operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
 571     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 572   }
 573   [(set_attr "type" "neon_dot<q>")]
 574 )
 575
 576 (define_expand "copysign<mode>3"
 577   [(match_operand:VHSDF 0 "register_operand")
 578    (match_operand:VHSDF 1 "register_operand")
 579    (match_operand:VHSDF 2 "register_operand")]
 580   "TARGET_FLOAT && TARGET_SIMD"
 581 {
 582   rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
 583   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 584
 585   emit_move_insn (v_bitmask,
 586                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 587                                                      HOST_WIDE_INT_M1U << bits));
 588   emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
 589                                          operands[2], operands[1]));
 590   DONE;
 591 }
 592 )
 593
 594 (define_insn "*aarch64_mul3_elt<mode>"
 595  [(set (match_operand:VMUL 0 "register_operand" "=w")
 596     (mult:VMUL
 597       (vec_duplicate:VMUL
 598           (vec_select:<VEL>
 599             (match_operand:VMUL 1 "register_operand" "<h_con>")
 600             (parallel [(match_operand:SI 2 "immediate_operand")])))
 601       (match_operand:VMUL 3 "register_operand" "w")))]
 602   "TARGET_SIMD"
 603   {
 604     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
 605     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 606   }
 607   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 608 )
 609
 610 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
 611   [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
 612      (mult:VMUL_CHANGE_NLANES
 613        (vec_duplicate:VMUL_CHANGE_NLANES
 614           (vec_select:<VEL>
 615             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
 616             (parallel [(match_operand:SI 2 "immediate_operand")])))
 617       (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
 618   "TARGET_SIMD"
 619   {
 620     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
 621     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 622   }
 623   [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
 624 )
 625
 626 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
 627  [(set (match_operand:VMUL 0 "register_operand" "=w")
 628     (mult:VMUL
 629       (vec_duplicate:VMUL
 630             (match_operand:<VEL> 1 "register_operand" "<h_con>"))
 631       (match_operand:VMUL 2 "register_operand" "w")))]
 632   "TARGET_SIMD"
 633   "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
 634   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 635 )
 636
 637 (define_insn "@aarch64_rsqrte<mode>"
 638   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 639         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
 640                      UNSPEC_RSQRTE))]
 641   "TARGET_SIMD"
 642   "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 643   [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
 644
 645 (define_insn "@aarch64_rsqrts<mode>"
 646   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 647         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
 648                             (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
 649          UNSPEC_RSQRTS))]
 650   "TARGET_SIMD"
 651   "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 652   [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
 653
 654 (define_expand "rsqrt<mode>2"
 655   [(set (match_operand:VALLF 0 "register_operand")
 656         (unspec:VALLF [(match_operand:VALLF 1 "register_operand")]
 657                      UNSPEC_RSQRT))]
 658   "TARGET_SIMD"
 659 {
 660   aarch64_emit_approx_sqrt (operands[0], operands[1], true);
 661   DONE;
 662 })
 663
 664 (define_insn "*aarch64_mul3_elt_to_64v2df"
 665   [(set (match_operand:DF 0 "register_operand" "=w")
 666      (mult:DF
 667        (vec_select:DF
 668          (match_operand:V2DF 1 "register_operand" "w")
 669          (parallel [(match_operand:SI 2 "immediate_operand")]))
 670        (match_operand:DF 3 "register_operand" "w")))]
 671   "TARGET_SIMD"
 672   {
 673     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
 674     return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
 675   }
 676   [(set_attr "type" "neon_fp_mul_d_scalar_q")]
 677 )
 678
 679 (define_insn "neg<mode>2"
 680   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 681         (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 682   "TARGET_SIMD"
 683   "neg\t%0.<Vtype>, %1.<Vtype>"
 684   [(set_attr "type" "neon_neg<q>")]
 685 )
 686
 687 (define_insn "abs<mode>2"
 688   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 689         (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 690   "TARGET_SIMD"
 691   "abs\t%0.<Vtype>, %1.<Vtype>"
 692   [(set_attr "type" "neon_abs<q>")]
 693 )
 694
 695 ;; The intrinsic version of integer ABS must not be allowed to
 696 ;; combine with any operation with an integerated ABS step, such
 697 ;; as SABD.
 698 (define_insn "aarch64_abs<mode>"
 699   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
 700           (unspec:VSDQ_I_DI
 701             [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
 702            UNSPEC_ABS))]
 703   "TARGET_SIMD"
 704   "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 705   [(set_attr "type" "neon_abs<q>")]
 706 )
 707
 708 ;; It's tempting to represent SABD as ABS (MINUS op1 op2).
 709 ;; This isn't accurate as ABS treats always its input as a signed value.
 710 ;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
 711 ;; Whereas SABD would return 192 (-64 signed) on the above example.
 712 ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
 713 (define_insn "aarch64_<su>abd<mode>_3"
 714   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 715         (minus:VDQ_BHSI
 716           (USMAX:VDQ_BHSI
 717             (match_operand:VDQ_BHSI 1 "register_operand" "w")
 718             (match_operand:VDQ_BHSI 2 "register_operand" "w"))
 719           (<max_opp>:VDQ_BHSI
 720             (match_dup 1)
 721             (match_dup 2))))]
 722   "TARGET_SIMD"
 723   "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 724   [(set_attr "type" "neon_abd<q>")]
 725 )
 726
 727 (define_insn "aarch64_<sur>abdl2<mode>_3"
 728   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 729         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 730                           (match_operand:VDQV_S 2 "register_operand" "w")]
 731         ABDL2))]
 732   "TARGET_SIMD"
 733   "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
 734   [(set_attr "type" "neon_abd<q>")]
 735 )
 736
 737 (define_insn "aarch64_<sur>abal<mode>_4"
 738   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 739         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 740                           (match_operand:VDQV_S 2 "register_operand" "w")
 741                          (match_operand:<VDBLW> 3 "register_operand" "0")]
 742         ABAL))]
 743   "TARGET_SIMD"
 744   "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
 745   [(set_attr "type" "neon_arith_acc<q>")]
 746 )
 747
 748 (define_insn "aarch64_<sur>adalp<mode>_3"
 749   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 750         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 751                           (match_operand:<VDBLW> 2 "register_operand" "0")]
 752         ADALP))]
 753   "TARGET_SIMD"
 754   "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
 755   [(set_attr "type" "neon_reduc_add<q>")]
 756 )
 757
 758 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
 759 ;; inputs in operands 1 and 2.  The sequence also has to perform a widening
 760 ;; reduction of the difference into a V4SI vector and accumulate that into
 761 ;; operand 3 before copying that into the result operand 0.
 762 ;; Perform that with a sequence of:
 763 ;; UABDL2       tmp.8h, op1.16b, op2.16b
 764 ;; UABAL        tmp.8h, op1.16b, op2.16b
 765 ;; UADALP       op3.4s, tmp.8h
 766 ;; MOV          op0, op3 // should be eliminated in later passes.
 767 ;;
 768 ;; For TARGET_DOTPROD we do:
 769 ;; MOV  tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
 770 ;; UABD tmp2.16b, op1.16b, op2.16b
 771 ;; UDOT op3.4s, tmp2.16b, tmp1.16b
 772 ;; MOV  op0, op3 // RA will tie the operands of UDOT appropriately.
 773 ;;
 774 ;; The signed version just uses the signed variants of the above instructions
 775 ;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
 776 ;; unsigned.
 777
 778 (define_expand "<sur>sadv16qi"
 779   [(use (match_operand:V4SI 0 "register_operand"))
 780    (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
 781                   (use (match_operand:V16QI 2 "register_operand"))] ABAL)
 782    (use (match_operand:V4SI 3 "register_operand"))]
 783   "TARGET_SIMD"
 784   {
 785     if (TARGET_DOTPROD)
 786       {
 787         rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
 788         rtx abd = gen_reg_rtx (V16QImode);
 789         emit_insn (gen_aarch64_<sur>abdv16qi_3 (abd, operands[1], operands[2]));
 790         emit_insn (gen_aarch64_udotv16qi (operands[0], operands[3],
 791                                           abd, ones));
 792         DONE;
 793       }
 794     rtx reduc = gen_reg_rtx (V8HImode);
 795     emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
 796                                                operands[2]));
 797     emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
 798                                               operands[2], reduc));
 799     emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
 800                                               operands[3]));
 801     emit_move_insn (operands[0], operands[3]);
 802     DONE;
 803   }
 804 )
 805
 806 (define_insn "aba<mode>_3"
 807   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 808         (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
 809                          (match_operand:VDQ_BHSI 1 "register_operand" "w")
 810                          (match_operand:VDQ_BHSI 2 "register_operand" "w")))
 811                        (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
 812   "TARGET_SIMD"
 813   "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 814   [(set_attr "type" "neon_arith_acc<q>")]
 815 )
 816
 817 (define_insn "fabd<mode>3"
 818   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 819         (abs:VHSDF_HSDF
 820           (minus:VHSDF_HSDF
 821             (match_operand:VHSDF_HSDF 1 "register_operand" "w")
 822             (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
 823   "TARGET_SIMD"
 824   "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 825   [(set_attr "type" "neon_fp_abd_<stype><q>")]
 826 )
 827
 828 ;; For AND (vector, register) and BIC (vector, immediate)
 829 (define_insn "and<mode>3"
 830   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
 831         (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
 832                    (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
 833   "TARGET_SIMD"
 834   {
 835     switch (which_alternative)
 836       {
 837       case 0:
 838         return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
 839       case 1:
 840         return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
 841                                                   AARCH64_CHECK_BIC);
 842       default:
 843         gcc_unreachable ();
 844       }
 845   }
 846   [(set_attr "type" "neon_logic<q>")]
 847 )
 848
 849 ;; For ORR (vector, register) and ORR (vector, immediate)
 850 (define_insn "ior<mode>3"
 851   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
 852         (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
 853                    (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
 854   "TARGET_SIMD"
 855   {
 856     switch (which_alternative)
 857       {
 858       case 0:
 859         return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
 860       case 1:
 861         return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
 862                                                   AARCH64_CHECK_ORR);
 863       default:
 864         gcc_unreachable ();
 865       }
 866   }
 867   [(set_attr "type" "neon_logic<q>")]
 868 )
 869
 870 (define_insn "xor<mode>3"
 871   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 872         (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 873                  (match_operand:VDQ_I 2 "register_operand" "w")))]
 874   "TARGET_SIMD"
 875   "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
 876   [(set_attr "type" "neon_logic<q>")]
 877 )
 878
 879 (define_insn "one_cmpl<mode>2"
 880   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 881         (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 882   "TARGET_SIMD"
 883   "not\t%0.<Vbtype>, %1.<Vbtype>"
 884   [(set_attr "type" "neon_logic<q>")]
 885 )
 886
 887 (define_insn "aarch64_simd_vec_set<mode>"
 888   [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
 889         (vec_merge:VALL_F16
 890             (vec_duplicate:VALL_F16
 891                 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
 892             (match_operand:VALL_F16 3 "register_operand" "0,0,0")
 893             (match_operand:SI 2 "immediate_operand" "i,i,i")))]
 894   "TARGET_SIMD"
 895   {
 896    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 897    operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
 898    switch (which_alternative)
 899      {
 900      case 0:
 901         return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
 902      case 1:
 903         return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
 904      case 2:
 905         return "ld1\\t{%0.<Vetype>}[%p2], %1";
 906      default:
 907         gcc_unreachable ();
 908      }
 909   }
 910   [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
 911 )
 912
 913 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
 914   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
 915         (vec_merge:VALL_F16
 916             (vec_duplicate:VALL_F16
 917               (vec_select:<VEL>
 918                 (match_operand:VALL_F16 3 "register_operand" "w")
 919                 (parallel
 920                   [(match_operand:SI 4 "immediate_operand" "i")])))
 921             (match_operand:VALL_F16 1 "register_operand" "0")
 922             (match_operand:SI 2 "immediate_operand" "i")))]
 923   "TARGET_SIMD"
 924   {
 925     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 926     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
 927     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
 928
 929     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
 930   }
 931   [(set_attr "type" "neon_ins<q>")]
 932 )
 933
 934 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
 935   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
 936         (vec_merge:VALL_F16_NO_V2Q
 937             (vec_duplicate:VALL_F16_NO_V2Q
 938               (vec_select:<VEL>
 939                 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
 940                 (parallel
 941                   [(match_operand:SI 4 "immediate_operand" "i")])))
 942             (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
 943             (match_operand:SI 2 "immediate_operand" "i")))]
 944   "TARGET_SIMD"
 945   {
 946     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 947     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
 948     operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
 949                                            INTVAL (operands[4]));
 950
 951     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
 952   }
 953   [(set_attr "type" "neon_ins<q>")]
 954 )
 955
 956 (define_expand "signbit<mode>2"
 957   [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
 958    (use (match_operand:VDQSF 1 "register_operand"))]
 959   "TARGET_SIMD"
 960 {
 961   int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
 962   rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 963                                                         shift_amount);
 964   operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
 965
 966   emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
 967                                                  shift_vector));
 968   DONE;
 969 })
 970
 971 (define_insn "aarch64_simd_lshr<mode>"
 972  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 973        (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 974                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
 975  "TARGET_SIMD"
 976  "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
 977   [(set_attr "type" "neon_shift_imm<q>")]
 978 )
 979
 980 (define_insn "aarch64_simd_ashr<mode>"
 981  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 982        (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 983                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
 984  "TARGET_SIMD"
 985  "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
 986   [(set_attr "type" "neon_shift_imm<q>")]
 987 )
 988
 989 (define_insn "*aarch64_simd_sra<mode>"
 990  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 991         (plus:VDQ_I
 992            (SHIFTRT:VDQ_I
 993                 (match_operand:VDQ_I 1 "register_operand" "w")
 994                 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr"))
 995            (match_operand:VDQ_I 3 "register_operand" "0")))]
 996   "TARGET_SIMD"
 997   "<sra_op>sra\t%0.<Vtype>, %1.<Vtype>, %2"
 998   [(set_attr "type" "neon_shift_acc<q>")]
 999 )
1000
1001 (define_insn "aarch64_simd_imm_shl<mode>"
1002  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1003        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1004                    (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm" "Dl")))]
1005  "TARGET_SIMD"
1006   "shl\t%0.<Vtype>, %1.<Vtype>, %2"
1007   [(set_attr "type" "neon_shift_imm<q>")]
1008 )
1009
1010 (define_insn "aarch64_simd_reg_sshl<mode>"
1011  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1012        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1013                    (match_operand:VDQ_I 2 "register_operand" "w")))]
1014  "TARGET_SIMD"
1015  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1016   [(set_attr "type" "neon_shift_reg<q>")]
1017 )
1018
1019 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
1020  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1021        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1022                     (match_operand:VDQ_I 2 "register_operand" "w")]
1023                    UNSPEC_ASHIFT_UNSIGNED))]
1024  "TARGET_SIMD"
1025  "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1026   [(set_attr "type" "neon_shift_reg<q>")]
1027 )
1028
1029 (define_insn "aarch64_simd_reg_shl<mode>_signed"
1030  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1031        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1032                     (match_operand:VDQ_I 2 "register_operand" "w")]
1033                    UNSPEC_ASHIFT_SIGNED))]
1034  "TARGET_SIMD"
1035  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1036   [(set_attr "type" "neon_shift_reg<q>")]
1037 )
1038
1039 (define_expand "ashl<mode>3"
1040   [(match_operand:VDQ_I 0 "register_operand")
1041    (match_operand:VDQ_I 1 "register_operand")
1042    (match_operand:SI  2 "general_operand")]
1043  "TARGET_SIMD"
1044 {
1045   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1046   int shift_amount;
1047
1048   if (CONST_INT_P (operands[2]))
1049     {
1050       shift_amount = INTVAL (operands[2]);
1051       if (shift_amount >= 0 && shift_amount < bit_width)
1052         {
1053           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1054                                                        shift_amount);
1055           emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
1056                                                      operands[1],
1057                                                      tmp));
1058           DONE;
1059         }
1060       else
1061         {
1062           operands[2] = force_reg (SImode, operands[2]);
1063         }
1064     }
1065   else if (MEM_P (operands[2]))
1066     {
1067       operands[2] = force_reg (SImode, operands[2]);
1068     }
1069
1070   if (REG_P (operands[2]))
1071     {
1072       rtx tmp = gen_reg_rtx (<MODE>mode);
1073       emit_insn (gen_aarch64_simd_dup<mode> (tmp,
1074                                              convert_to_mode (<VEL>mode,
1075                                                               operands[2],
1076                                                               0)));
1077       emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1078                                                   tmp));
1079       DONE;
1080     }
1081   else
1082     FAIL;
1083 }
1084 )
1085
1086 (define_expand "lshr<mode>3"
1087   [(match_operand:VDQ_I 0 "register_operand")
1088    (match_operand:VDQ_I 1 "register_operand")
1089    (match_operand:SI  2 "general_operand")]
1090  "TARGET_SIMD"
1091 {
1092   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1093   int shift_amount;
1094
1095   if (CONST_INT_P (operands[2]))
1096     {
1097       shift_amount = INTVAL (operands[2]);
1098       if (shift_amount > 0 && shift_amount <= bit_width)
1099         {
1100           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1101                                                        shift_amount);
1102           emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
1103                                                   operands[1],
1104                                                   tmp));
1105           DONE;
1106         }
1107       else
1108         operands[2] = force_reg (SImode, operands[2]);
1109     }
1110   else if (MEM_P (operands[2]))
1111     {
1112       operands[2] = force_reg (SImode, operands[2]);
1113     }
1114
1115   if (REG_P (operands[2]))
1116     {
1117       rtx tmp = gen_reg_rtx (SImode);
1118       rtx tmp1 = gen_reg_rtx (<MODE>mode);
1119       emit_insn (gen_negsi2 (tmp, operands[2]));
1120       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1121                                              convert_to_mode (<VEL>mode,
1122                                                               tmp, 0)));
1123       emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
1124                                                           operands[1],
1125                                                           tmp1));
1126       DONE;
1127     }
1128   else
1129     FAIL;
1130 }
1131 )
1132
1133 (define_expand "ashr<mode>3"
1134   [(match_operand:VDQ_I 0 "register_operand")
1135    (match_operand:VDQ_I 1 "register_operand")
1136    (match_operand:SI  2 "general_operand")]
1137  "TARGET_SIMD"
1138 {
1139   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1140   int shift_amount;
1141
1142   if (CONST_INT_P (operands[2]))
1143     {
1144       shift_amount = INTVAL (operands[2]);
1145       if (shift_amount > 0 && shift_amount <= bit_width)
1146         {
1147           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1148                                                        shift_amount);
1149           emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1150                                                   operands[1],
1151                                                   tmp));
1152           DONE;
1153         }
1154       else
1155         operands[2] = force_reg (SImode, operands[2]);
1156     }
1157   else if (MEM_P (operands[2]))
1158     {
1159       operands[2] = force_reg (SImode, operands[2]);
1160     }
1161
1162   if (REG_P (operands[2]))
1163     {
1164       rtx tmp = gen_reg_rtx (SImode);
1165       rtx tmp1 = gen_reg_rtx (<MODE>mode);
1166       emit_insn (gen_negsi2 (tmp, operands[2]));
1167       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1168                                              convert_to_mode (<VEL>mode,
1169                                                               tmp, 0)));
1170       emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
1171                                                         operands[1],
1172                                                         tmp1));
1173       DONE;
1174     }
1175   else
1176     FAIL;
1177 }
1178 )
1179
1180 (define_expand "vashl<mode>3"
1181  [(match_operand:VDQ_I 0 "register_operand")
1182   (match_operand:VDQ_I 1 "register_operand")
1183   (match_operand:VDQ_I 2 "register_operand")]
1184  "TARGET_SIMD"
1185 {
1186   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1187                                               operands[2]));
1188   DONE;
1189 })
1190
1191 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1192 ;; Negating individual lanes most certainly offsets the
1193 ;; gain from vectorization.
1194 (define_expand "vashr<mode>3"
1195  [(match_operand:VDQ_BHSI 0 "register_operand")
1196   (match_operand:VDQ_BHSI 1 "register_operand")
1197   (match_operand:VDQ_BHSI 2 "register_operand")]
1198  "TARGET_SIMD"
1199 {
1200   rtx neg = gen_reg_rtx (<MODE>mode);
1201   emit (gen_neg<mode>2 (neg, operands[2]));
1202   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1203                                                     neg));
1204   DONE;
1205 })
1206
1207 ;; DI vector shift
1208 (define_expand "aarch64_ashr_simddi"
1209   [(match_operand:DI 0 "register_operand")
1210    (match_operand:DI 1 "register_operand")
1211    (match_operand:SI 2 "aarch64_shift_imm64_di")]
1212   "TARGET_SIMD"
1213   {
1214     /* An arithmetic shift right by 64 fills the result with copies of the sign
1215        bit, just like asr by 63 - however the standard pattern does not handle
1216        a shift by 64.  */
1217     if (INTVAL (operands[2]) == 64)
1218       operands[2] = GEN_INT (63);
1219     emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1220     DONE;
1221   }
1222 )
1223
1224 (define_expand "vlshr<mode>3"
1225  [(match_operand:VDQ_BHSI 0 "register_operand")
1226   (match_operand:VDQ_BHSI 1 "register_operand")
1227   (match_operand:VDQ_BHSI 2 "register_operand")]
1228  "TARGET_SIMD"
1229 {
1230   rtx neg = gen_reg_rtx (<MODE>mode);
1231   emit (gen_neg<mode>2 (neg, operands[2]));
1232   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1233                                                       neg));
1234   DONE;
1235 })
1236
1237 (define_expand "aarch64_lshr_simddi"
1238   [(match_operand:DI 0 "register_operand")
1239    (match_operand:DI 1 "register_operand")
1240    (match_operand:SI 2 "aarch64_shift_imm64_di")]
1241   "TARGET_SIMD"
1242   {
1243     if (INTVAL (operands[2]) == 64)
1244       emit_move_insn (operands[0], const0_rtx);
1245     else
1246       emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1247     DONE;
1248   }
1249 )
1250
1251 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1252 (define_insn "vec_shr_<mode>"
1253   [(set (match_operand:VD 0 "register_operand" "=w")
1254         (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1255                     (match_operand:SI 2 "immediate_operand" "i")]
1256                    UNSPEC_VEC_SHR))]
1257   "TARGET_SIMD"
1258   {
1259     if (BYTES_BIG_ENDIAN)
1260       return "shl %d0, %d1, %2";
1261     else
1262       return "ushr %d0, %d1, %2";
1263   }
1264   [(set_attr "type" "neon_shift_imm")]
1265 )
1266
1267 (define_expand "vec_set<mode>"
1268   [(match_operand:VALL_F16 0 "register_operand")
1269    (match_operand:<VEL> 1 "register_operand")
1270    (match_operand:SI 2 "immediate_operand")]
1271   "TARGET_SIMD"
1272   {
1273     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1274     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1275                                           GEN_INT (elem), operands[0]));
1276     DONE;
1277   }
1278 )
1279
1280
1281 (define_insn "aarch64_mla<mode>"
1282  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1283        (plus:VDQ_BHSI (mult:VDQ_BHSI
1284                         (match_operand:VDQ_BHSI 2 "register_operand" "w")
1285                         (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1286                       (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1287  "TARGET_SIMD"
1288  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1289   [(set_attr "type" "neon_mla_<Vetype><q>")]
1290 )
1291
1292 (define_insn "*aarch64_mla_elt<mode>"
1293  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1294        (plus:VDQHS
1295          (mult:VDQHS
1296            (vec_duplicate:VDQHS
1297               (vec_select:<VEL>
1298                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1299                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1300            (match_operand:VDQHS 3 "register_operand" "w"))
1301          (match_operand:VDQHS 4 "register_operand" "0")))]
1302  "TARGET_SIMD"
1303   {
1304     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1305     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1306   }
1307   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1308 )
1309
1310 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1311  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1312        (plus:VDQHS
1313          (mult:VDQHS
1314            (vec_duplicate:VDQHS
1315               (vec_select:<VEL>
1316                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1317                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1318            (match_operand:VDQHS 3 "register_operand" "w"))
1319          (match_operand:VDQHS 4 "register_operand" "0")))]
1320  "TARGET_SIMD"
1321   {
1322     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1323     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1324   }
1325   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1326 )
1327
1328 (define_insn "*aarch64_mla_elt_merge<mode>"
1329   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1330         (plus:VDQHS
1331           (mult:VDQHS (vec_duplicate:VDQHS
1332                   (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1333                 (match_operand:VDQHS 2 "register_operand" "w"))
1334           (match_operand:VDQHS 3 "register_operand" "0")))]
1335  "TARGET_SIMD"
1336  "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1337   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1338 )
1339
1340 (define_insn "aarch64_mls<mode>"
1341  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1342        (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1343                    (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1344                               (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1345  "TARGET_SIMD"
1346  "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1347   [(set_attr "type" "neon_mla_<Vetype><q>")]
1348 )
1349
1350 (define_insn "*aarch64_mls_elt<mode>"
1351  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1352        (minus:VDQHS
1353          (match_operand:VDQHS 4 "register_operand" "0")
1354          (mult:VDQHS
1355            (vec_duplicate:VDQHS
1356               (vec_select:<VEL>
1357                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1358                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1359            (match_operand:VDQHS 3 "register_operand" "w"))))]
1360  "TARGET_SIMD"
1361   {
1362     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1363     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1364   }
1365   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1366 )
1367
1368 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1369  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1370        (minus:VDQHS
1371          (match_operand:VDQHS 4 "register_operand" "0")
1372          (mult:VDQHS
1373            (vec_duplicate:VDQHS
1374               (vec_select:<VEL>
1375                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1376                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1377            (match_operand:VDQHS 3 "register_operand" "w"))))]
1378  "TARGET_SIMD"
1379   {
1380     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1381     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1382   }
1383   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1384 )
1385
1386 (define_insn "*aarch64_mls_elt_merge<mode>"
1387   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1388         (minus:VDQHS
1389           (match_operand:VDQHS 1 "register_operand" "0")
1390           (mult:VDQHS (vec_duplicate:VDQHS
1391                   (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1392                 (match_operand:VDQHS 3 "register_operand" "w"))))]
1393   "TARGET_SIMD"
1394   "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1395   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1396 )
1397
1398 ;; Max/Min operations.
1399 (define_insn "<su><maxmin><mode>3"
1400  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1401        (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1402                     (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1403  "TARGET_SIMD"
1404  "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1405   [(set_attr "type" "neon_minmax<q>")]
1406 )
1407
1408 (define_expand "<su><maxmin>v2di3"
1409  [(set (match_operand:V2DI 0 "register_operand")
1410        (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand")
1411                     (match_operand:V2DI 2 "register_operand")))]
1412  "TARGET_SIMD"
1413 {
1414   enum rtx_code cmp_operator;
1415   rtx cmp_fmt;
1416
1417   switch (<CODE>)
1418     {
1419     case UMIN:
1420       cmp_operator = LTU;
1421       break;
1422     case SMIN:
1423       cmp_operator = LT;
1424       break;
1425     case UMAX:
1426       cmp_operator = GTU;
1427       break;
1428     case SMAX:
1429       cmp_operator = GT;
1430       break;
1431     default:
1432       gcc_unreachable ();
1433     }
1434
1435   cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1436   emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1437               operands[2], cmp_fmt, operands[1], operands[2]));
1438   DONE;
1439 })
1440
1441 ;; Pairwise Integer Max/Min operations.
1442 (define_insn "aarch64_<maxmin_uns>p<mode>"
1443  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1444        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1445                          (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1446                         MAXMINV))]
1447  "TARGET_SIMD"
1448  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1449   [(set_attr "type" "neon_minmax<q>")]
1450 )
1451
1452 ;; Pairwise FP Max/Min operations.
1453 (define_insn "aarch64_<maxmin_uns>p<mode>"
1454  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1455        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1456                       (match_operand:VHSDF 2 "register_operand" "w")]
1457                       FMAXMINV))]
1458  "TARGET_SIMD"
1459  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1460   [(set_attr "type" "neon_minmax<q>")]
1461 )
1462
1463 ;; vec_concat gives a new vector with the low elements from operand 1, and
1464 ;; the high elements from operand 2.  That is to say, given op1 = { a, b }
1465 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1466 ;; What that means, is that the RTL descriptions of the below patterns
1467 ;; need to change depending on endianness.
1468
1469 ;; Move to the low architectural bits of the register.
1470 ;; On little-endian this is { operand, zeroes }
1471 ;; On big-endian this is { zeroes, operand }
1472
1473 (define_insn "move_lo_quad_internal_<mode>"
1474   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1475         (vec_concat:VQ_NO2E
1476           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1477           (vec_duplicate:<VHALF> (const_int 0))))]
1478   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1479   "@
1480    dup\\t%d0, %1.d[0]
1481    fmov\\t%d0, %1
1482    dup\\t%d0, %1"
1483   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1484    (set_attr "length" "4")
1485    (set_attr "arch" "simd,fp,simd")]
1486 )
1487
1488 (define_insn "move_lo_quad_internal_<mode>"
1489   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1490         (vec_concat:VQ_2E
1491           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1492           (const_int 0)))]
1493   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1494   "@
1495    dup\\t%d0, %1.d[0]
1496    fmov\\t%d0, %1
1497    dup\\t%d0, %1"
1498   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1499    (set_attr "length" "4")
1500    (set_attr "arch" "simd,fp,simd")]
1501 )
1502
1503 (define_insn "move_lo_quad_internal_be_<mode>"
1504   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1505         (vec_concat:VQ_NO2E
1506           (vec_duplicate:<VHALF> (const_int 0))
1507           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1508   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1509   "@
1510    dup\\t%d0, %1.d[0]
1511    fmov\\t%d0, %1
1512    dup\\t%d0, %1"
1513   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1514    (set_attr "length" "4")
1515    (set_attr "arch" "simd,fp,simd")]
1516 )
1517
1518 (define_insn "move_lo_quad_internal_be_<mode>"
1519   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1520         (vec_concat:VQ_2E
1521           (const_int 0)
1522           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1523   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1524   "@
1525    dup\\t%d0, %1.d[0]
1526    fmov\\t%d0, %1
1527    dup\\t%d0, %1"
1528   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1529    (set_attr "length" "4")
1530    (set_attr "arch" "simd,fp,simd")]
1531 )
1532
1533 (define_expand "move_lo_quad_<mode>"
1534   [(match_operand:VQ 0 "register_operand")
1535    (match_operand:VQ 1 "register_operand")]
1536   "TARGET_SIMD"
1537 {
1538   if (BYTES_BIG_ENDIAN)
1539     emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1540   else
1541     emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1542   DONE;
1543 }
1544 )
1545
1546 ;; Move operand1 to the high architectural bits of the register, keeping
1547 ;; the low architectural bits of operand2.
1548 ;; For little-endian this is { operand2, operand1 }
1549 ;; For big-endian this is { operand1, operand2 }
1550
1551 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1552   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1553         (vec_concat:VQ
1554           (vec_select:<VHALF>
1555                 (match_dup 0)
1556                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1557           (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1558   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1559   "@
1560    ins\\t%0.d[1], %1.d[0]
1561    ins\\t%0.d[1], %1"
1562   [(set_attr "type" "neon_ins")]
1563 )
1564
1565 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1566   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1567         (vec_concat:VQ
1568           (match_operand:<VHALF> 1 "register_operand" "w,r")
1569           (vec_select:<VHALF>
1570                 (match_dup 0)
1571                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1572   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1573   "@
1574    ins\\t%0.d[1], %1.d[0]
1575    ins\\t%0.d[1], %1"
1576   [(set_attr "type" "neon_ins")]
1577 )
1578
1579 (define_expand "move_hi_quad_<mode>"
1580  [(match_operand:VQ 0 "register_operand")
1581   (match_operand:<VHALF> 1 "register_operand")]
1582  "TARGET_SIMD"
1583 {
1584   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1585   if (BYTES_BIG_ENDIAN)
1586     emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1587                     operands[1], p));
1588   else
1589     emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1590                     operands[1], p));
1591   DONE;
1592 })
1593
1594 ;; Narrowing operations.
1595
1596 ;; For doubles.
1597 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1598  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1599        (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1600  "TARGET_SIMD"
1601  "xtn\\t%0.<Vntype>, %1.<Vtype>"
1602   [(set_attr "type" "neon_shift_imm_narrow_q")]
1603 )
1604
1605 (define_expand "vec_pack_trunc_<mode>"
1606  [(match_operand:<VNARROWD> 0 "register_operand")
1607   (match_operand:VDN 1 "register_operand")
1608   (match_operand:VDN 2 "register_operand")]
1609  "TARGET_SIMD"
1610 {
1611   rtx tempreg = gen_reg_rtx (<VDBL>mode);
1612   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1613   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1614
1615   emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1616   emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1617   emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1618   DONE;
1619 })
1620
1621 ;; For quads.
1622
1623 (define_insn "vec_pack_trunc_<mode>"
1624  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1625        (vec_concat:<VNARROWQ2>
1626          (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1627          (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1628  "TARGET_SIMD"
1629  {
1630    if (BYTES_BIG_ENDIAN)
1631      return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1632    else
1633      return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1634  }
1635   [(set_attr "type" "multiple")
1636    (set_attr "length" "8")]
1637 )
1638
1639 ;; Widening operations.
1640
1641 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1642   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1643         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1644                                (match_operand:VQW 1 "register_operand" "w")
1645                                (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1646                             )))]
1647   "TARGET_SIMD"
1648   "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1649   [(set_attr "type" "neon_shift_imm_long")]
1650 )
1651
1652 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1653   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1654         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1655                                (match_operand:VQW 1 "register_operand" "w")
1656                                (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1657                             )))]
1658   "TARGET_SIMD"
1659   "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1660   [(set_attr "type" "neon_shift_imm_long")]
1661 )
1662
1663 (define_expand "vec_unpack<su>_hi_<mode>"
1664   [(match_operand:<VWIDE> 0 "register_operand")
1665    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1666   "TARGET_SIMD"
1667   {
1668     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1669     emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1670                                                           operands[1], p));
1671     DONE;
1672   }
1673 )
1674
1675 (define_expand "vec_unpack<su>_lo_<mode>"
1676   [(match_operand:<VWIDE> 0 "register_operand")
1677    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1678   "TARGET_SIMD"
1679   {
1680     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1681     emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1682                                                           operands[1], p));
1683     DONE;
1684   }
1685 )
1686
1687 ;; Widening arithmetic.
1688
1689 (define_insn "*aarch64_<su>mlal_lo<mode>"
1690   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1691         (plus:<VWIDE>
1692           (mult:<VWIDE>
1693               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1694                  (match_operand:VQW 2 "register_operand" "w")
1695                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1696               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1697                  (match_operand:VQW 4 "register_operand" "w")
1698                  (match_dup 3))))
1699           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1700   "TARGET_SIMD"
1701   "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1702   [(set_attr "type" "neon_mla_<Vetype>_long")]
1703 )
1704
1705 (define_insn "*aarch64_<su>mlal_hi<mode>"
1706   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1707         (plus:<VWIDE>
1708           (mult:<VWIDE>
1709               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1710                  (match_operand:VQW 2 "register_operand" "w")
1711                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1712               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1713                  (match_operand:VQW 4 "register_operand" "w")
1714                  (match_dup 3))))
1715           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1716   "TARGET_SIMD"
1717   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1718   [(set_attr "type" "neon_mla_<Vetype>_long")]
1719 )
1720
1721 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1722   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1723         (minus:<VWIDE>
1724           (match_operand:<VWIDE> 1 "register_operand" "0")
1725           (mult:<VWIDE>
1726               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1727                  (match_operand:VQW 2 "register_operand" "w")
1728                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1729               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1730                  (match_operand:VQW 4 "register_operand" "w")
1731                  (match_dup 3))))))]
1732   "TARGET_SIMD"
1733   "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1734   [(set_attr "type" "neon_mla_<Vetype>_long")]
1735 )
1736
1737 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1738   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1739         (minus:<VWIDE>
1740           (match_operand:<VWIDE> 1 "register_operand" "0")
1741           (mult:<VWIDE>
1742               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1743                  (match_operand:VQW 2 "register_operand" "w")
1744                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1745               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1746                  (match_operand:VQW 4 "register_operand" "w")
1747                  (match_dup 3))))))]
1748   "TARGET_SIMD"
1749   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1750   [(set_attr "type" "neon_mla_<Vetype>_long")]
1751 )
1752
1753 (define_insn "*aarch64_<su>mlal<mode>"
1754   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1755         (plus:<VWIDE>
1756           (mult:<VWIDE>
1757             (ANY_EXTEND:<VWIDE>
1758               (match_operand:VD_BHSI 1 "register_operand" "w"))
1759             (ANY_EXTEND:<VWIDE>
1760               (match_operand:VD_BHSI 2 "register_operand" "w")))
1761           (match_operand:<VWIDE> 3 "register_operand" "0")))]
1762   "TARGET_SIMD"
1763   "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1764   [(set_attr "type" "neon_mla_<Vetype>_long")]
1765 )
1766
1767 (define_insn "*aarch64_<su>mlsl<mode>"
1768   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1769         (minus:<VWIDE>
1770           (match_operand:<VWIDE> 1 "register_operand" "0")
1771           (mult:<VWIDE>
1772             (ANY_EXTEND:<VWIDE>
1773               (match_operand:VD_BHSI 2 "register_operand" "w"))
1774             (ANY_EXTEND:<VWIDE>
1775               (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1776   "TARGET_SIMD"
1777   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1778   [(set_attr "type" "neon_mla_<Vetype>_long")]
1779 )
1780
1781 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1782  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1783        (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1784                            (match_operand:VQW 1 "register_operand" "w")
1785                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1786                      (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1787                            (match_operand:VQW 2 "register_operand" "w")
1788                            (match_dup 3)))))]
1789   "TARGET_SIMD"
1790   "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1791   [(set_attr "type" "neon_mul_<Vetype>_long")]
1792 )
1793
1794 (define_expand "vec_widen_<su>mult_lo_<mode>"
1795   [(match_operand:<VWIDE> 0 "register_operand")
1796    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
1797    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
1798  "TARGET_SIMD"
1799  {
1800    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1801    emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1802                                                        operands[1],
1803                                                        operands[2], p));
1804    DONE;
1805  }
1806 )
1807
1808 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1809  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1810       (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1811                             (match_operand:VQW 1 "register_operand" "w")
1812                             (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1813                     (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1814                             (match_operand:VQW 2 "register_operand" "w")
1815                             (match_dup 3)))))]
1816   "TARGET_SIMD"
1817   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1818   [(set_attr "type" "neon_mul_<Vetype>_long")]
1819 )
1820
1821 (define_expand "vec_widen_<su>mult_hi_<mode>"
1822   [(match_operand:<VWIDE> 0 "register_operand")
1823    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
1824    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
1825  "TARGET_SIMD"
1826  {
1827    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1828    emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1829                                                        operands[1],
1830                                                        operands[2], p));
1831    DONE;
1832
1833  }
1834 )
1835
1836 ;; FP vector operations.
1837 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1838 ;; double-precision (64-bit) floating-point data types and arithmetic as
1839 ;; defined by the IEEE 754-2008 standard.  This makes them vectorizable
1840 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1841 ;;
1842 ;; Floating-point operations can raise an exception.  Vectorizing such
1843 ;; operations are safe because of reasons explained below.
1844 ;;
1845 ;; ARMv8 permits an extension to enable trapped floating-point
1846 ;; exception handling, however this is an optional feature.  In the
1847 ;; event of a floating-point exception being raised by vectorised
1848 ;; code then:
1849 ;; 1.  If trapped floating-point exceptions are available, then a trap
1850 ;;     will be taken when any lane raises an enabled exception.  A trap
1851 ;;     handler may determine which lane raised the exception.
1852 ;; 2.  Alternatively a sticky exception flag is set in the
1853 ;;     floating-point status register (FPSR).  Software may explicitly
1854 ;;     test the exception flags, in which case the tests will either
1855 ;;     prevent vectorisation, allowing precise identification of the
1856 ;;     failing operation, or if tested outside of vectorisable regions
1857 ;;     then the specific operation and lane are not of interest.
1858
1859 ;; FP arithmetic operations.
1860
1861 (define_insn "add<mode>3"
1862  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1863        (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1864                    (match_operand:VHSDF 2 "register_operand" "w")))]
1865  "TARGET_SIMD"
1866  "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1867   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1868 )
1869
1870 (define_insn "sub<mode>3"
1871  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1872        (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1873                     (match_operand:VHSDF 2 "register_operand" "w")))]
1874  "TARGET_SIMD"
1875  "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1876   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1877 )
1878
1879 (define_insn "mul<mode>3"
1880  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1881        (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1882                    (match_operand:VHSDF 2 "register_operand" "w")))]
1883  "TARGET_SIMD"
1884  "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1885   [(set_attr "type" "neon_fp_mul_<stype><q>")]
1886 )
1887
1888 (define_expand "div<mode>3"
1889  [(set (match_operand:VHSDF 0 "register_operand")
1890        (div:VHSDF (match_operand:VHSDF 1 "register_operand")
1891                   (match_operand:VHSDF 2 "register_operand")))]
1892  "TARGET_SIMD"
1893 {
1894   if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1895     DONE;
1896
1897   operands[1] = force_reg (<MODE>mode, operands[1]);
1898 })
1899
1900 (define_insn "*div<mode>3"
1901  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1902        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1903                  (match_operand:VHSDF 2 "register_operand" "w")))]
1904  "TARGET_SIMD"
1905  "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1906   [(set_attr "type" "neon_fp_div_<stype><q>")]
1907 )
1908
1909 (define_insn "neg<mode>2"
1910  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1911        (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1912  "TARGET_SIMD"
1913  "fneg\\t%0.<Vtype>, %1.<Vtype>"
1914   [(set_attr "type" "neon_fp_neg_<stype><q>")]
1915 )
1916
1917 (define_insn "abs<mode>2"
1918  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1919        (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1920  "TARGET_SIMD"
1921  "fabs\\t%0.<Vtype>, %1.<Vtype>"
1922   [(set_attr "type" "neon_fp_abs_<stype><q>")]
1923 )
1924
1925 (define_insn "fma<mode>4"
1926   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1927        (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1928                   (match_operand:VHSDF 2 "register_operand" "w")
1929                   (match_operand:VHSDF 3 "register_operand" "0")))]
1930   "TARGET_SIMD"
1931  "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1932   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1933 )
1934
1935 (define_insn "*aarch64_fma4_elt<mode>"
1936   [(set (match_operand:VDQF 0 "register_operand" "=w")
1937     (fma:VDQF
1938       (vec_duplicate:VDQF
1939         (vec_select:<VEL>
1940           (match_operand:VDQF 1 "register_operand" "<h_con>")
1941           (parallel [(match_operand:SI 2 "immediate_operand")])))
1942       (match_operand:VDQF 3 "register_operand" "w")
1943       (match_operand:VDQF 4 "register_operand" "0")))]
1944   "TARGET_SIMD"
1945   {
1946     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1947     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1948   }
1949   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1950 )
1951
1952 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1953   [(set (match_operand:VDQSF 0 "register_operand" "=w")
1954     (fma:VDQSF
1955       (vec_duplicate:VDQSF
1956         (vec_select:<VEL>
1957           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1958           (parallel [(match_operand:SI 2 "immediate_operand")])))
1959       (match_operand:VDQSF 3 "register_operand" "w")
1960       (match_operand:VDQSF 4 "register_operand" "0")))]
1961   "TARGET_SIMD"
1962   {
1963     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1964     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1965   }
1966   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1967 )
1968
1969 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1970   [(set (match_operand:VMUL 0 "register_operand" "=w")
1971     (fma:VMUL
1972       (vec_duplicate:VMUL
1973           (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1974       (match_operand:VMUL 2 "register_operand" "w")
1975       (match_operand:VMUL 3 "register_operand" "0")))]
1976   "TARGET_SIMD"
1977   "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1978   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1979 )
1980
1981 (define_insn "*aarch64_fma4_elt_to_64v2df"
1982   [(set (match_operand:DF 0 "register_operand" "=w")
1983     (fma:DF
1984         (vec_select:DF
1985           (match_operand:V2DF 1 "register_operand" "w")
1986           (parallel [(match_operand:SI 2 "immediate_operand")]))
1987       (match_operand:DF 3 "register_operand" "w")
1988       (match_operand:DF 4 "register_operand" "0")))]
1989   "TARGET_SIMD"
1990   {
1991     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1992     return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1993   }
1994   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1995 )
1996
1997 (define_insn "fnma<mode>4"
1998   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1999         (fma:VHSDF
2000           (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
2001           (match_operand:VHSDF 2 "register_operand" "w")
2002           (match_operand:VHSDF 3 "register_operand" "0")))]
2003   "TARGET_SIMD"
2004   "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2005   [(set_attr "type" "neon_fp_mla_<stype><q>")]
2006 )
2007
2008 (define_insn "*aarch64_fnma4_elt<mode>"
2009   [(set (match_operand:VDQF 0 "register_operand" "=w")
2010     (fma:VDQF
2011       (neg:VDQF
2012         (match_operand:VDQF 3 "register_operand" "w"))
2013       (vec_duplicate:VDQF
2014         (vec_select:<VEL>
2015           (match_operand:VDQF 1 "register_operand" "<h_con>")
2016           (parallel [(match_operand:SI 2 "immediate_operand")])))
2017       (match_operand:VDQF 4 "register_operand" "0")))]
2018   "TARGET_SIMD"
2019   {
2020     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2021     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
2022   }
2023   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2024 )
2025
2026 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
2027   [(set (match_operand:VDQSF 0 "register_operand" "=w")
2028     (fma:VDQSF
2029       (neg:VDQSF
2030         (match_operand:VDQSF 3 "register_operand" "w"))
2031       (vec_duplicate:VDQSF
2032         (vec_select:<VEL>
2033           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2034           (parallel [(match_operand:SI 2 "immediate_operand")])))
2035       (match_operand:VDQSF 4 "register_operand" "0")))]
2036   "TARGET_SIMD"
2037   {
2038     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2039     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
2040   }
2041   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2042 )
2043
2044 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
2045   [(set (match_operand:VMUL 0 "register_operand" "=w")
2046     (fma:VMUL
2047       (neg:VMUL
2048         (match_operand:VMUL 2 "register_operand" "w"))
2049       (vec_duplicate:VMUL
2050         (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2051       (match_operand:VMUL 3 "register_operand" "0")))]
2052   "TARGET_SIMD"
2053   "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2054   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2055 )
2056
2057 (define_insn "*aarch64_fnma4_elt_to_64v2df"
2058   [(set (match_operand:DF 0 "register_operand" "=w")
2059     (fma:DF
2060       (vec_select:DF
2061         (match_operand:V2DF 1 "register_operand" "w")
2062         (parallel [(match_operand:SI 2 "immediate_operand")]))
2063       (neg:DF
2064         (match_operand:DF 3 "register_operand" "w"))
2065       (match_operand:DF 4 "register_operand" "0")))]
2066   "TARGET_SIMD"
2067   {
2068     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2069     return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
2070   }
2071   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2072 )
2073
2074 ;; Vector versions of the floating-point frint patterns.
2075 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
2076 (define_insn "<frint_pattern><mode>2"
2077   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2078         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2079                        FRINT))]
2080   "TARGET_SIMD"
2081   "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
2082   [(set_attr "type" "neon_fp_round_<stype><q>")]
2083 )
2084
2085 ;; Vector versions of the fcvt standard patterns.
2086 ;; Expands to lbtrunc, lround, lceil, lfloor
2087 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
2088   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2089         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2090                                [(match_operand:VHSDF 1 "register_operand" "w")]
2091                                FCVT)))]
2092   "TARGET_SIMD"
2093   "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
2094   [(set_attr "type" "neon_fp_to_int_<stype><q>")]
2095 )
2096
2097 ;; HF Scalar variants of related SIMD instructions.
2098 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
2099   [(set (match_operand:HI 0 "register_operand" "=w")
2100         (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
2101                       FCVT)))]
2102   "TARGET_SIMD_F16INST"
2103   "fcvt<frint_suffix><su>\t%h0, %h1"
2104   [(set_attr "type" "neon_fp_to_int_s")]
2105 )
2106
2107 (define_insn "<optab>_trunchfhi2"
2108   [(set (match_operand:HI 0 "register_operand" "=w")
2109         (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
2110   "TARGET_SIMD_F16INST"
2111   "fcvtz<su>\t%h0, %h1"
2112   [(set_attr "type" "neon_fp_to_int_s")]
2113 )
2114
2115 (define_insn "<optab>hihf2"
2116   [(set (match_operand:HF 0 "register_operand" "=w")
2117         (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
2118   "TARGET_SIMD_F16INST"
2119   "<su_optab>cvtf\t%h0, %h1"
2120   [(set_attr "type" "neon_int_to_fp_s")]
2121 )
2122
2123 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
2124   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2125         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2126                                [(mult:VDQF
2127          (match_operand:VDQF 1 "register_operand" "w")
2128          (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
2129                                UNSPEC_FRINTZ)))]
2130   "TARGET_SIMD
2131    && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
2132                 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
2133   {
2134     int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
2135     char buf[64];
2136     snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
2137     output_asm_insn (buf, operands);
2138     return "";
2139   }
2140   [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
2141 )
2142
2143 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
2144   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2145         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2146                                [(match_operand:VHSDF 1 "register_operand")]
2147                                 UNSPEC_FRINTZ)))]
2148   "TARGET_SIMD"
2149   {})
2150
2151 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2152   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2153         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2154                                [(match_operand:VHSDF 1 "register_operand")]
2155                                 UNSPEC_FRINTZ)))]
2156   "TARGET_SIMD"
2157   {})
2158
2159 (define_expand "ftrunc<VHSDF:mode>2"
2160   [(set (match_operand:VHSDF 0 "register_operand")
2161         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2162                        UNSPEC_FRINTZ))]
2163   "TARGET_SIMD"
2164   {})
2165
2166 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2167   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2168         (FLOATUORS:VHSDF
2169           (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2170   "TARGET_SIMD"
2171   "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2172   [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2173 )
2174
2175 ;; Conversions between vectors of floats and doubles.
2176 ;; Contains a mix of patterns to match standard pattern names
2177 ;; and those for intrinsics.
2178
2179 ;; Float widening operations.
2180
2181 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2182   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2183         (float_extend:<VWIDE> (vec_select:<VHALF>
2184                                (match_operand:VQ_HSF 1 "register_operand" "w")
2185                                (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2186                             )))]
2187   "TARGET_SIMD"
2188   "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2189   [(set_attr "type" "neon_fp_cvt_widen_s")]
2190 )
2191
2192 ;; Convert between fixed-point and floating-point (vector modes)
2193
2194 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2195   [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2196         (unspec:<VHSDF:FCVT_TARGET>
2197           [(match_operand:VHSDF 1 "register_operand" "w")
2198            (match_operand:SI 2 "immediate_operand" "i")]
2199          FCVT_F2FIXED))]
2200   "TARGET_SIMD"
2201   "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2202   [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2203 )
2204
2205 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2206   [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2207         (unspec:<VDQ_HSDI:FCVT_TARGET>
2208           [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2209            (match_operand:SI 2 "immediate_operand" "i")]
2210          FCVT_FIXED2F))]
2211   "TARGET_SIMD"
2212   "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2213   [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2214 )
2215
2216 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2217 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2218 ;; the meaning of HI and LO changes depending on the target endianness.
2219 ;; While elsewhere we map the higher numbered elements of a vector to
2220 ;; the lower architectural lanes of the vector, for these patterns we want
2221 ;; to always treat "hi" as referring to the higher architectural lanes.
2222 ;; Consequently, while the patterns below look inconsistent with our
2223 ;; other big-endian patterns their behavior is as required.
2224
2225 (define_expand "vec_unpacks_lo_<mode>"
2226   [(match_operand:<VWIDE> 0 "register_operand")
2227    (match_operand:VQ_HSF 1 "register_operand")]
2228   "TARGET_SIMD"
2229   {
2230     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2231     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2232                                                        operands[1], p));
2233     DONE;
2234   }
2235 )
2236
2237 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2238   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2239         (float_extend:<VWIDE> (vec_select:<VHALF>
2240                                (match_operand:VQ_HSF 1 "register_operand" "w")
2241                                (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2242                             )))]
2243   "TARGET_SIMD"
2244   "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2245   [(set_attr "type" "neon_fp_cvt_widen_s")]
2246 )
2247
2248 (define_expand "vec_unpacks_hi_<mode>"
2249   [(match_operand:<VWIDE> 0 "register_operand")
2250    (match_operand:VQ_HSF 1 "register_operand")]
2251   "TARGET_SIMD"
2252   {
2253     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2254     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2255                                                        operands[1], p));
2256     DONE;
2257   }
2258 )
2259 (define_insn "aarch64_float_extend_lo_<Vwide>"
2260   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2261         (float_extend:<VWIDE>
2262           (match_operand:VDF 1 "register_operand" "w")))]
2263   "TARGET_SIMD"
2264   "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2265   [(set_attr "type" "neon_fp_cvt_widen_s")]
2266 )
2267
2268 ;; Float narrowing operations.
2269
2270 (define_insn "aarch64_float_truncate_lo_<mode>"
2271   [(set (match_operand:VDF 0 "register_operand" "=w")
2272       (float_truncate:VDF
2273         (match_operand:<VWIDE> 1 "register_operand" "w")))]
2274   "TARGET_SIMD"
2275   "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2276   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2277 )
2278
2279 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2280   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2281     (vec_concat:<VDBL>
2282       (match_operand:VDF 1 "register_operand" "0")
2283       (float_truncate:VDF
2284         (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2285   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2286   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2287   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2288 )
2289
2290 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2291   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2292     (vec_concat:<VDBL>
2293       (float_truncate:VDF
2294         (match_operand:<VWIDE> 2 "register_operand" "w"))
2295       (match_operand:VDF 1 "register_operand" "0")))]
2296   "TARGET_SIMD && BYTES_BIG_ENDIAN"
2297   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2298   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2299 )
2300
2301 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2302   [(match_operand:<VDBL> 0 "register_operand")
2303    (match_operand:VDF 1 "register_operand")
2304    (match_operand:<VWIDE> 2 "register_operand")]
2305   "TARGET_SIMD"
2306 {
2307   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2308                              ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2309                              : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2310   emit_insn (gen (operands[0], operands[1], operands[2]));
2311   DONE;
2312 }
2313 )
2314
2315 (define_expand "vec_pack_trunc_v2df"
2316   [(set (match_operand:V4SF 0 "register_operand")
2317       (vec_concat:V4SF
2318         (float_truncate:V2SF
2319             (match_operand:V2DF 1 "register_operand"))
2320         (float_truncate:V2SF
2321             (match_operand:V2DF 2 "register_operand"))
2322           ))]
2323   "TARGET_SIMD"
2324   {
2325     rtx tmp = gen_reg_rtx (V2SFmode);
2326     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2327     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2328
2329     emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2330     emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2331                                                    tmp, operands[hi]));
2332     DONE;
2333   }
2334 )
2335
2336 (define_expand "vec_pack_trunc_df"
2337   [(set (match_operand:V2SF 0 "register_operand")
2338       (vec_concat:V2SF
2339         (float_truncate:SF
2340             (match_operand:DF 1 "register_operand"))
2341         (float_truncate:SF
2342             (match_operand:DF 2 "register_operand"))
2343           ))]
2344   "TARGET_SIMD"
2345   {
2346     rtx tmp = gen_reg_rtx (V2SFmode);
2347     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2348     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2349
2350     emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2351     emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2352     emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2353     DONE;
2354   }
2355 )
2356
2357 ;; FP Max/Min
2358 ;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
2359 ;; expression like:
2360 ;;      a = (b < c) ? b : c;
2361 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
2362 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
2363 ;; -ffast-math.
2364 ;;
2365 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2366 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2367 ;; operand will be returned when both operands are zero (i.e. they may not
2368 ;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
2369 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2370 ;; NaNs.
2371
2372 (define_insn "<su><maxmin><mode>3"
2373   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2374         (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2375                        (match_operand:VHSDF 2 "register_operand" "w")))]
2376   "TARGET_SIMD"
2377   "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2378   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2379 )
2380
2381 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2382 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2383 ;; which implement the IEEE fmax ()/fmin () functions.
2384 (define_insn "<maxmin_uns><mode>3"
2385   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2386        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2387                       (match_operand:VHSDF 2 "register_operand" "w")]
2388                       FMAXMIN_UNS))]
2389   "TARGET_SIMD"
2390   "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2391   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2392 )
2393
2394 ;; 'across lanes' add.
2395
2396 (define_expand "reduc_plus_scal_<mode>"
2397   [(match_operand:<VEL> 0 "register_operand")
2398    (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand")]
2399                UNSPEC_ADDV)]
2400   "TARGET_SIMD"
2401   {
2402     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2403     rtx scratch = gen_reg_rtx (<MODE>mode);
2404     emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2405     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2406     DONE;
2407   }
2408 )
2409
2410 (define_insn "aarch64_faddp<mode>"
2411  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2412        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2413                       (match_operand:VHSDF 2 "register_operand" "w")]
2414         UNSPEC_FADDV))]
2415  "TARGET_SIMD"
2416  "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2417   [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2418 )
2419
2420 (define_insn "aarch64_reduc_plus_internal<mode>"
2421  [(set (match_operand:VDQV 0 "register_operand" "=w")
2422        (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2423                     UNSPEC_ADDV))]
2424  "TARGET_SIMD"
2425  "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2426   [(set_attr "type" "neon_reduc_add<q>")]
2427 )
2428
2429 (define_insn "aarch64_reduc_plus_internalv2si"
2430  [(set (match_operand:V2SI 0 "register_operand" "=w")
2431        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2432                     UNSPEC_ADDV))]
2433  "TARGET_SIMD"
2434  "addp\\t%0.2s, %1.2s, %1.2s"
2435   [(set_attr "type" "neon_reduc_add")]
2436 )
2437
2438 (define_insn "reduc_plus_scal_<mode>"
2439  [(set (match_operand:<VEL> 0 "register_operand" "=w")
2440        (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2441                    UNSPEC_FADDV))]
2442  "TARGET_SIMD"
2443  "faddp\\t%<Vetype>0, %1.<Vtype>"
2444   [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2445 )
2446
2447 (define_expand "reduc_plus_scal_v4sf"
2448  [(set (match_operand:SF 0 "register_operand")
2449        (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2450                     UNSPEC_FADDV))]
2451  "TARGET_SIMD"
2452 {
2453   rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2454   rtx scratch = gen_reg_rtx (V4SFmode);
2455   emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2456   emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2457   emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2458   DONE;
2459 })
2460
2461 (define_insn "clrsb<mode>2"
2462   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2463         (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2464   "TARGET_SIMD"
2465   "cls\\t%0.<Vtype>, %1.<Vtype>"
2466   [(set_attr "type" "neon_cls<q>")]
2467 )
2468
2469 (define_insn "clz<mode>2"
2470  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2471        (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2472  "TARGET_SIMD"
2473  "clz\\t%0.<Vtype>, %1.<Vtype>"
2474   [(set_attr "type" "neon_cls<q>")]
2475 )
2476
2477 (define_insn "popcount<mode>2"
2478   [(set (match_operand:VB 0 "register_operand" "=w")
2479         (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2480   "TARGET_SIMD"
2481   "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2482   [(set_attr "type" "neon_cnt<q>")]
2483 )
2484
2485 ;; 'across lanes' max and min ops.
2486
2487 ;; Template for outputting a scalar, so we can create __builtins which can be
2488 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function.  (This is FP smax/smin).
2489 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2490   [(match_operand:<VEL> 0 "register_operand")
2491    (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2492                   FMAXMINV)]
2493   "TARGET_SIMD"
2494   {
2495     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2496     rtx scratch = gen_reg_rtx (<MODE>mode);
2497     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2498                                                               operands[1]));
2499     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2500     DONE;
2501   }
2502 )
2503
2504 ;; Likewise for integer cases, signed and unsigned.
2505 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2506   [(match_operand:<VEL> 0 "register_operand")
2507    (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2508                     MAXMINV)]
2509   "TARGET_SIMD"
2510   {
2511     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2512     rtx scratch = gen_reg_rtx (<MODE>mode);
2513     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2514                                                               operands[1]));
2515     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2516     DONE;
2517   }
2518 )
2519
2520 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2521  [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2522        (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2523                     MAXMINV))]
2524  "TARGET_SIMD"
2525  "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2526   [(set_attr "type" "neon_reduc_minmax<q>")]
2527 )
2528
2529 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2530  [(set (match_operand:V2SI 0 "register_operand" "=w")
2531        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2532                     MAXMINV))]
2533  "TARGET_SIMD"
2534  "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2535   [(set_attr "type" "neon_reduc_minmax")]
2536 )
2537
2538 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2539  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2540        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2541                       FMAXMINV))]
2542  "TARGET_SIMD"
2543  "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2544   [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2545 )
2546
2547 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2548 ;; allocation.
2549 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2550 ;; to select.
2551 ;;
2552 ;; Thus our BSL is of the form:
2553 ;;   op0 = bsl (mask, op2, op3)
2554 ;; We can use any of:
2555 ;;
2556 ;;   if (op0 = mask)
2557 ;;     bsl mask, op1, op2
2558 ;;   if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2559 ;;     bit op0, op2, mask
2560 ;;   if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2561 ;;     bif op0, op1, mask
2562 ;;
2563 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2564 ;; Some forms of straight-line code may generate the equivalent form
2565 ;; in *aarch64_simd_bsl<mode>_alt.
2566
2567 (define_insn "aarch64_simd_bsl<mode>_internal"
2568   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2569         (xor:VDQ_I
2570            (and:VDQ_I
2571              (xor:VDQ_I
2572                (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2573                (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2574              (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2575           (match_dup:<V_INT_EQUIV> 3)
2576         ))]
2577   "TARGET_SIMD"
2578   "@
2579   bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2580   bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2581   bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2582   [(set_attr "type" "neon_bsl<q>")]
2583 )
2584
2585 ;; We need this form in addition to the above pattern to match the case
2586 ;; when combine tries merging three insns such that the second operand of
2587 ;; the outer XOR matches the second operand of the inner XOR rather than
2588 ;; the first.  The two are equivalent but since recog doesn't try all
2589 ;; permutations of commutative operations, we have to have a separate pattern.
2590
2591 (define_insn "*aarch64_simd_bsl<mode>_alt"
2592   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2593         (xor:VDQ_I
2594            (and:VDQ_I
2595              (xor:VDQ_I
2596                (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2597                (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2598               (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2599           (match_dup:<V_INT_EQUIV> 2)))]
2600   "TARGET_SIMD"
2601   "@
2602   bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2603   bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2604   bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2605   [(set_attr "type" "neon_bsl<q>")]
2606 )
2607
2608 ;; DImode is special, we want to avoid computing operations which are
2609 ;; more naturally computed in general purpose registers in the vector
2610 ;; registers.  If we do that, we need to move all three operands from general
2611 ;; purpose registers to vector registers, then back again.  However, we
2612 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2613 ;; optimizations based on the component operations of a BSL.
2614 ;;
2615 ;; That means we need a splitter back to the individual operations, if they
2616 ;; would be better calculated on the integer side.
2617
2618 (define_insn_and_split "aarch64_simd_bsldi_internal"
2619   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2620         (xor:DI
2621            (and:DI
2622              (xor:DI
2623                (match_operand:DI 3 "register_operand" "w,0,w,r")
2624                (match_operand:DI 2 "register_operand" "w,w,0,r"))
2625              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2626           (match_dup:DI 3)
2627         ))]
2628   "TARGET_SIMD"
2629   "@
2630   bsl\\t%0.8b, %2.8b, %3.8b
2631   bit\\t%0.8b, %2.8b, %1.8b
2632   bif\\t%0.8b, %3.8b, %1.8b
2633   #"
2634   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2635   [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2636 {
2637   /* Split back to individual operations.  If we're before reload, and
2638      able to create a temporary register, do so.  If we're after reload,
2639      we've got an early-clobber destination register, so use that.
2640      Otherwise, we can't create pseudos and we can't yet guarantee that
2641      operands[0] is safe to write, so FAIL to split.  */
2642
2643   rtx scratch;
2644   if (reload_completed)
2645     scratch = operands[0];
2646   else if (can_create_pseudo_p ())
2647     scratch = gen_reg_rtx (DImode);
2648   else
2649     FAIL;
2650
2651   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2652   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2653   emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2654   DONE;
2655 }
2656   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2657    (set_attr "length" "4,4,4,12")]
2658 )
2659
2660 (define_insn_and_split "aarch64_simd_bsldi_alt"
2661   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2662         (xor:DI
2663            (and:DI
2664              (xor:DI
2665                (match_operand:DI 3 "register_operand" "w,w,0,r")
2666                (match_operand:DI 2 "register_operand" "w,0,w,r"))
2667              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2668           (match_dup:DI 2)
2669         ))]
2670   "TARGET_SIMD"
2671   "@
2672   bsl\\t%0.8b, %3.8b, %2.8b
2673   bit\\t%0.8b, %3.8b, %1.8b
2674   bif\\t%0.8b, %2.8b, %1.8b
2675   #"
2676   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2677   [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2678 {
2679   /* Split back to individual operations.  If we're before reload, and
2680      able to create a temporary register, do so.  If we're after reload,
2681      we've got an early-clobber destination register, so use that.
2682      Otherwise, we can't create pseudos and we can't yet guarantee that
2683      operands[0] is safe to write, so FAIL to split.  */
2684
2685   rtx scratch;
2686   if (reload_completed)
2687     scratch = operands[0];
2688   else if (can_create_pseudo_p ())
2689     scratch = gen_reg_rtx (DImode);
2690   else
2691     FAIL;
2692
2693   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2694   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2695   emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2696   DONE;
2697 }
2698   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2699    (set_attr "length" "4,4,4,12")]
2700 )
2701
2702 (define_expand "aarch64_simd_bsl<mode>"
2703   [(match_operand:VALLDIF 0 "register_operand")
2704    (match_operand:<V_INT_EQUIV> 1 "register_operand")
2705    (match_operand:VALLDIF 2 "register_operand")
2706    (match_operand:VALLDIF 3 "register_operand")]
2707  "TARGET_SIMD"
2708 {
2709   /* We can't alias operands together if they have different modes.  */
2710   rtx tmp = operands[0];
2711   if (FLOAT_MODE_P (<MODE>mode))
2712     {
2713       operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2714       operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2715       tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2716     }
2717   operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2718   emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2719                                                          operands[1],
2720                                                          operands[2],
2721                                                          operands[3]));
2722   if (tmp != operands[0])
2723     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2724
2725   DONE;
2726 })
2727
2728 (define_expand "vcond_mask_<mode><v_int_equiv>"
2729   [(match_operand:VALLDI 0 "register_operand")
2730    (match_operand:VALLDI 1 "nonmemory_operand")
2731    (match_operand:VALLDI 2 "nonmemory_operand")
2732    (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2733   "TARGET_SIMD"
2734 {
2735   /* If we have (a = (P) ? -1 : 0);
2736      Then we can simply move the generated mask (result must be int).  */
2737   if (operands[1] == CONSTM1_RTX (<MODE>mode)
2738       && operands[2] == CONST0_RTX (<MODE>mode))
2739     emit_move_insn (operands[0], operands[3]);
2740   /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask.  */
2741   else if (operands[1] == CONST0_RTX (<MODE>mode)
2742            && operands[2] == CONSTM1_RTX (<MODE>mode))
2743     emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2744   else
2745     {
2746       if (!REG_P (operands[1]))
2747         operands[1] = force_reg (<MODE>mode, operands[1]);
2748       if (!REG_P (operands[2]))
2749         operands[2] = force_reg (<MODE>mode, operands[2]);
2750       emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2751                                              operands[1], operands[2]));
2752     }
2753
2754   DONE;
2755 })
2756
2757 ;; Patterns comparing two vectors to produce a mask.
2758
2759 (define_expand "vec_cmp<mode><mode>"
2760   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2761           (match_operator 1 "comparison_operator"
2762             [(match_operand:VSDQ_I_DI 2 "register_operand")
2763              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2764   "TARGET_SIMD"
2765 {
2766   rtx mask = operands[0];
2767   enum rtx_code code = GET_CODE (operands[1]);
2768
2769   switch (code)
2770     {
2771     case NE:
2772     case LE:
2773     case LT:
2774     case GE:
2775     case GT:
2776     case EQ:
2777       if (operands[3] == CONST0_RTX (<MODE>mode))
2778         break;
2779
2780       /* Fall through.  */
2781     default:
2782       if (!REG_P (operands[3]))
2783         operands[3] = force_reg (<MODE>mode, operands[3]);
2784
2785       break;
2786     }
2787
2788   switch (code)
2789     {
2790     case LT:
2791       emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2792       break;
2793
2794     case GE:
2795       emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2796       break;
2797
2798     case LE:
2799       emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2800       break;
2801
2802     case GT:
2803       emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2804       break;
2805
2806     case LTU:
2807       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2808       break;
2809
2810     case GEU:
2811       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2812       break;
2813
2814     case LEU:
2815       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2816       break;
2817
2818     case GTU:
2819       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2820       break;
2821
2822     case NE:
2823       /* Handle NE as !EQ.  */
2824       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2825       emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2826       break;
2827
2828     case EQ:
2829       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2830       break;
2831
2832     default:
2833       gcc_unreachable ();
2834     }
2835
2836   DONE;
2837 })
2838
2839 (define_expand "vec_cmp<mode><v_int_equiv>"
2840   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2841         (match_operator 1 "comparison_operator"
2842             [(match_operand:VDQF 2 "register_operand")
2843              (match_operand:VDQF 3 "nonmemory_operand")]))]
2844   "TARGET_SIMD"
2845 {
2846   int use_zero_form = 0;
2847   enum rtx_code code = GET_CODE (operands[1]);
2848   rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2849
2850   rtx (*comparison) (rtx, rtx, rtx) = NULL;
2851
2852   switch (code)
2853     {
2854     case LE:
2855     case LT:
2856     case GE:
2857     case GT:
2858     case EQ:
2859       if (operands[3] == CONST0_RTX (<MODE>mode))
2860         {
2861           use_zero_form = 1;
2862           break;
2863         }
2864       /* Fall through.  */
2865     default:
2866       if (!REG_P (operands[3]))
2867         operands[3] = force_reg (<MODE>mode, operands[3]);
2868
2869       break;
2870     }
2871
2872   switch (code)
2873     {
2874     case LT:
2875       if (use_zero_form)
2876         {
2877           comparison = gen_aarch64_cmlt<mode>;
2878           break;
2879         }
2880       /* Fall through.  */
2881     case UNLT:
2882       std::swap (operands[2], operands[3]);
2883       /* Fall through.  */
2884     case UNGT:
2885     case GT:
2886       comparison = gen_aarch64_cmgt<mode>;
2887       break;
2888     case LE:
2889       if (use_zero_form)
2890         {
2891           comparison = gen_aarch64_cmle<mode>;
2892           break;
2893         }
2894       /* Fall through.  */
2895     case UNLE:
2896       std::swap (operands[2], operands[3]);
2897       /* Fall through.  */
2898     case UNGE:
2899     case GE:
2900       comparison = gen_aarch64_cmge<mode>;
2901       break;
2902     case NE:
2903     case EQ:
2904       comparison = gen_aarch64_cmeq<mode>;
2905       break;
2906     case UNEQ:
2907     case ORDERED:
2908     case UNORDERED:
2909     case LTGT:
2910       break;
2911     default:
2912       gcc_unreachable ();
2913     }
2914
2915   switch (code)
2916     {
2917     case UNGE:
2918     case UNGT:
2919     case UNLE:
2920     case UNLT:
2921       {
2922         /* All of the above must not raise any FP exceptions.  Thus we first
2923            check each operand for NaNs and force any elements containing NaN to
2924            zero before using them in the compare.
2925            Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2926                                      (cm<cc> (isnan (a) ? 0.0 : a,
2927                                               isnan (b) ? 0.0 : b))
2928            We use the following transformations for doing the comparisions:
2929            a UNGE b -> a GE b
2930            a UNGT b -> a GT b
2931            a UNLE b -> b GE a
2932            a UNLT b -> b GT a.  */
2933
2934         rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2935         rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2936         rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2937         emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2938         emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2939         emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2940         emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2941                                           lowpart_subreg (<V_INT_EQUIV>mode,
2942                                                           operands[2],
2943                                                           <MODE>mode)));
2944         emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2945                                           lowpart_subreg (<V_INT_EQUIV>mode,
2946                                                           operands[3],
2947                                                           <MODE>mode)));
2948         gcc_assert (comparison != NULL);
2949         emit_insn (comparison (operands[0],
2950                                lowpart_subreg (<MODE>mode,
2951                                                tmp0, <V_INT_EQUIV>mode),
2952                                lowpart_subreg (<MODE>mode,
2953                                                tmp1, <V_INT_EQUIV>mode)));
2954         emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2955       }
2956       break;
2957
2958     case LT:
2959     case LE:
2960     case GT:
2961     case GE:
2962     case EQ:
2963     case NE:
2964       /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
2965          As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
2966          a GE b -> a GE b
2967          a GT b -> a GT b
2968          a LE b -> b GE a
2969          a LT b -> b GT a
2970          a EQ b -> a EQ b
2971          a NE b -> ~(a EQ b)  */
2972       gcc_assert (comparison != NULL);
2973       emit_insn (comparison (operands[0], operands[2], operands[3]));
2974       if (code == NE)
2975         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2976       break;
2977
2978     case LTGT:
2979       /* LTGT is not guranteed to not generate a FP exception.  So let's
2980          go the faster way : ((a > b) || (b > a)).  */
2981       emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2982                                          operands[2], operands[3]));
2983       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2984       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2985       break;
2986
2987     case ORDERED:
2988     case UNORDERED:
2989     case UNEQ:
2990       /* cmeq (a, a) & cmeq (b, b).  */
2991       emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2992                                          operands[2], operands[2]));
2993       emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2994       emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2995
2996       if (code == UNORDERED)
2997         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2998       else if (code == UNEQ)
2999         {
3000           emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
3001           emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
3002         }
3003       break;
3004
3005     default:
3006       gcc_unreachable ();
3007     }
3008
3009   DONE;
3010 })
3011
3012 (define_expand "vec_cmpu<mode><mode>"
3013   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3014           (match_operator 1 "comparison_operator"
3015             [(match_operand:VSDQ_I_DI 2 "register_operand")
3016              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
3017   "TARGET_SIMD"
3018 {
3019   emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
3020                                       operands[2], operands[3]));
3021   DONE;
3022 })
3023
3024 (define_expand "vcond<mode><mode>"
3025   [(set (match_operand:VALLDI 0 "register_operand")
3026         (if_then_else:VALLDI
3027           (match_operator 3 "comparison_operator"
3028             [(match_operand:VALLDI 4 "register_operand")
3029              (match_operand:VALLDI 5 "nonmemory_operand")])
3030           (match_operand:VALLDI 1 "nonmemory_operand")
3031           (match_operand:VALLDI 2 "nonmemory_operand")))]
3032   "TARGET_SIMD"
3033 {
3034   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3035   enum rtx_code code = GET_CODE (operands[3]);
3036
3037   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3038      it as well as switch operands 1/2 in order to avoid the additional
3039      NOT instruction.  */
3040   if (code == NE)
3041     {
3042       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3043                                     operands[4], operands[5]);
3044       std::swap (operands[1], operands[2]);
3045     }
3046   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3047                                              operands[4], operands[5]));
3048   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3049                                                  operands[2], mask));
3050
3051   DONE;
3052 })
3053
3054 (define_expand "vcond<v_cmp_mixed><mode>"
3055   [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
3056         (if_then_else:<V_cmp_mixed>
3057           (match_operator 3 "comparison_operator"
3058             [(match_operand:VDQF_COND 4 "register_operand")
3059              (match_operand:VDQF_COND 5 "nonmemory_operand")])
3060           (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
3061           (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
3062   "TARGET_SIMD"
3063 {
3064   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3065   enum rtx_code code = GET_CODE (operands[3]);
3066
3067   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3068      it as well as switch operands 1/2 in order to avoid the additional
3069      NOT instruction.  */
3070   if (code == NE)
3071     {
3072       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3073                                     operands[4], operands[5]);
3074       std::swap (operands[1], operands[2]);
3075     }
3076   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3077                                              operands[4], operands[5]));
3078   emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
3079                                                 operands[0], operands[1],
3080                                                 operands[2], mask));
3081
3082   DONE;
3083 })
3084
3085 (define_expand "vcondu<mode><mode>"
3086   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3087         (if_then_else:VSDQ_I_DI
3088           (match_operator 3 "comparison_operator"
3089             [(match_operand:VSDQ_I_DI 4 "register_operand")
3090              (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
3091           (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
3092           (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
3093   "TARGET_SIMD"
3094 {
3095   rtx mask = gen_reg_rtx (<MODE>mode);
3096   enum rtx_code code = GET_CODE (operands[3]);
3097
3098   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3099      it as well as switch operands 1/2 in order to avoid the additional
3100      NOT instruction.  */
3101   if (code == NE)
3102     {
3103       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3104                                     operands[4], operands[5]);
3105       std::swap (operands[1], operands[2]);
3106     }
3107   emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
3108                                       operands[4], operands[5]));
3109   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3110                                                  operands[2], mask));
3111   DONE;
3112 })
3113
3114 (define_expand "vcondu<mode><v_cmp_mixed>"
3115   [(set (match_operand:VDQF 0 "register_operand")
3116         (if_then_else:VDQF
3117           (match_operator 3 "comparison_operator"
3118             [(match_operand:<V_cmp_mixed> 4 "register_operand")
3119              (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
3120           (match_operand:VDQF 1 "nonmemory_operand")
3121           (match_operand:VDQF 2 "nonmemory_operand")))]
3122   "TARGET_SIMD"
3123 {
3124   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3125   enum rtx_code code = GET_CODE (operands[3]);
3126
3127   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3128      it as well as switch operands 1/2 in order to avoid the additional
3129      NOT instruction.  */
3130   if (code == NE)
3131     {
3132       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3133                                     operands[4], operands[5]);
3134       std::swap (operands[1], operands[2]);
3135     }
3136   emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
3137                                                   mask, operands[3],
3138                                                   operands[4], operands[5]));
3139   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3140                                                  operands[2], mask));
3141   DONE;
3142 })
3143
3144 ;; Patterns for AArch64 SIMD Intrinsics.
3145
3146 ;; Lane extraction with sign extension to general purpose register.
3147 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3148   [(set (match_operand:GPI 0 "register_operand" "=r")
3149         (sign_extend:GPI
3150           (vec_select:<VDQQH:VEL>
3151             (match_operand:VDQQH 1 "register_operand" "w")
3152             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3153   "TARGET_SIMD"
3154   {
3155     operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3156                                            INTVAL (operands[2]));
3157     return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3158   }
3159   [(set_attr "type" "neon_to_gp<VDQQH:q>")]
3160 )
3161
3162 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
3163   [(set (match_operand:GPI 0 "register_operand" "=r")
3164         (zero_extend:GPI
3165           (vec_select:<VDQQH:VEL>
3166             (match_operand:VDQQH 1 "register_operand" "w")
3167             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3168   "TARGET_SIMD"
3169   {
3170     operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3171                                            INTVAL (operands[2]));
3172     return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
3173   }
3174   [(set_attr "type" "neon_to_gp<VDQQH:q>")]
3175 )
3176
3177 ;; Lane extraction of a value, neither sign nor zero extension
3178 ;; is guaranteed so upper bits should be considered undefined.
3179 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3180 (define_insn "aarch64_get_lane<mode>"
3181   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3182         (vec_select:<VEL>
3183           (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3184           (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3185   "TARGET_SIMD"
3186   {
3187     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3188     switch (which_alternative)
3189       {
3190         case 0:
3191           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3192         case 1:
3193           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3194         case 2:
3195           return "st1\\t{%1.<Vetype>}[%2], %0";
3196         default:
3197           gcc_unreachable ();
3198       }
3199   }
3200   [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3201 )
3202
3203 (define_insn "load_pair_lanes<mode>"
3204   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3205         (vec_concat:<VDBL>
3206            (match_operand:VDC 1 "memory_operand" "Utq")
3207            (match_operand:VDC 2 "memory_operand" "m")))]
3208   "TARGET_SIMD && !STRICT_ALIGNMENT
3209    && rtx_equal_p (XEXP (operands[2], 0),
3210                    plus_constant (Pmode,
3211                                   XEXP (operands[1], 0),
3212                                   GET_MODE_SIZE (<MODE>mode)))"
3213   "ldr\\t%q0, %1"
3214   [(set_attr "type" "neon_load1_1reg_q")]
3215 )
3216
3217 (define_insn "store_pair_lanes<mode>"
3218   [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
3219         (vec_concat:<VDBL>
3220            (match_operand:VDC 1 "register_operand" "w, r")
3221            (match_operand:VDC 2 "register_operand" "w, r")))]
3222   "TARGET_SIMD"
3223   "@
3224    stp\\t%d1, %d2, %y0
3225    stp\\t%x1, %x2, %y0"
3226   [(set_attr "type" "neon_stp, store_16")]
3227 )
3228
3229 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3230 ;; dest vector.
3231
3232 (define_insn "@aarch64_combinez<mode>"
3233   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3234         (vec_concat:<VDBL>
3235           (match_operand:VDC 1 "general_operand" "w,?r,m")
3236           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3237   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3238   "@
3239    mov\\t%0.8b, %1.8b
3240    fmov\t%d0, %1
3241    ldr\\t%d0, %1"
3242   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3243    (set_attr "arch" "simd,fp,simd")]
3244 )
3245
3246 (define_insn "@aarch64_combinez_be<mode>"
3247   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3248         (vec_concat:<VDBL>
3249           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3250           (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3251   "TARGET_SIMD && BYTES_BIG_ENDIAN"
3252   "@
3253    mov\\t%0.8b, %1.8b
3254    fmov\t%d0, %1
3255    ldr\\t%d0, %1"
3256   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3257    (set_attr "arch" "simd,fp,simd")]
3258 )
3259
3260 (define_expand "aarch64_combine<mode>"
3261   [(match_operand:<VDBL> 0 "register_operand")
3262    (match_operand:VDC 1 "register_operand")
3263    (match_operand:VDC 2 "register_operand")]
3264   "TARGET_SIMD"
3265 {
3266   aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3267
3268   DONE;
3269 }
3270 )
3271
3272 (define_expand "@aarch64_simd_combine<mode>"
3273   [(match_operand:<VDBL> 0 "register_operand")
3274    (match_operand:VDC 1 "register_operand")
3275    (match_operand:VDC 2 "register_operand")]
3276   "TARGET_SIMD"
3277   {
3278     emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3279     emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3280     DONE;
3281   }
3282 [(set_attr "type" "multiple")]
3283 )
3284
3285 ;; <su><addsub>l<q>.
3286
3287 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3288  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3289        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3290                            (match_operand:VQW 1 "register_operand" "w")
3291                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3292                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3293                            (match_operand:VQW 2 "register_operand" "w")
3294                            (match_dup 3)))))]
3295   "TARGET_SIMD"
3296   "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3297   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3298 )
3299
3300 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3301  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3302        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3303                            (match_operand:VQW 1 "register_operand" "w")
3304                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3305                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3306                            (match_operand:VQW 2 "register_operand" "w")
3307                            (match_dup 3)))))]
3308   "TARGET_SIMD"
3309   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3310   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3311 )
3312
3313
3314 (define_expand "aarch64_saddl2<mode>"
3315   [(match_operand:<VWIDE> 0 "register_operand")
3316    (match_operand:VQW 1 "register_operand")
3317    (match_operand:VQW 2 "register_operand")]
3318   "TARGET_SIMD"
3319 {
3320   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3321   emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3322                                                   operands[2], p));
3323   DONE;
3324 })
3325
3326 (define_expand "aarch64_uaddl2<mode>"
3327   [(match_operand:<VWIDE> 0 "register_operand")
3328    (match_operand:VQW 1 "register_operand")
3329    (match_operand:VQW 2 "register_operand")]
3330   "TARGET_SIMD"
3331 {
3332   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3333   emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3334                                                   operands[2], p));
3335   DONE;
3336 })
3337
3338 (define_expand "aarch64_ssubl2<mode>"
3339   [(match_operand:<VWIDE> 0 "register_operand")
3340    (match_operand:VQW 1 "register_operand")
3341    (match_operand:VQW 2 "register_operand")]
3342   "TARGET_SIMD"
3343 {
3344   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3345   emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3346                                                 operands[2], p));
3347   DONE;
3348 })
3349
3350 (define_expand "aarch64_usubl2<mode>"
3351   [(match_operand:<VWIDE> 0 "register_operand")
3352    (match_operand:VQW 1 "register_operand")
3353    (match_operand:VQW 2 "register_operand")]
3354   "TARGET_SIMD"
3355 {
3356   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3357   emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3358                                                 operands[2], p));
3359   DONE;
3360 })
3361
3362 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3363  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3364        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3365                            (match_operand:VD_BHSI 1 "register_operand" "w"))
3366                        (ANY_EXTEND:<VWIDE>
3367                            (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3368   "TARGET_SIMD"
3369   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3370   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3371 )
3372
3373 ;; <su><addsub>w<q>.
3374
3375 (define_expand "widen_ssum<mode>3"
3376   [(set (match_operand:<VDBLW> 0 "register_operand")
3377         (plus:<VDBLW> (sign_extend:<VDBLW>
3378                         (match_operand:VQW 1 "register_operand"))
3379                       (match_operand:<VDBLW> 2 "register_operand")))]
3380   "TARGET_SIMD"
3381   {
3382     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3383     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3384
3385     emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3386                                                 operands[1], p));
3387     emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3388     DONE;
3389   }
3390 )
3391
3392 (define_expand "widen_ssum<mode>3"
3393   [(set (match_operand:<VWIDE> 0 "register_operand")
3394         (plus:<VWIDE> (sign_extend:<VWIDE>
3395                         (match_operand:VD_BHSI 1 "register_operand"))
3396                       (match_operand:<VWIDE> 2 "register_operand")))]
3397   "TARGET_SIMD"
3398 {
3399   emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3400   DONE;
3401 })
3402
3403 (define_expand "widen_usum<mode>3"
3404   [(set (match_operand:<VDBLW> 0 "register_operand")
3405         (plus:<VDBLW> (zero_extend:<VDBLW>
3406                         (match_operand:VQW 1 "register_operand"))
3407                       (match_operand:<VDBLW> 2 "register_operand")))]
3408   "TARGET_SIMD"
3409   {
3410     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3411     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3412
3413     emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3414                                                  operands[1], p));
3415     emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3416     DONE;
3417   }
3418 )
3419
3420 (define_expand "widen_usum<mode>3"
3421   [(set (match_operand:<VWIDE> 0 "register_operand")
3422         (plus:<VWIDE> (zero_extend:<VWIDE>
3423                         (match_operand:VD_BHSI 1 "register_operand"))
3424                       (match_operand:<VWIDE> 2 "register_operand")))]
3425   "TARGET_SIMD"
3426 {
3427   emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3428   DONE;
3429 })
3430
3431 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
3432   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3433         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3434           (ANY_EXTEND:<VWIDE>
3435             (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3436   "TARGET_SIMD"
3437   "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3438   [(set_attr "type" "neon_sub_widen")]
3439 )
3440
3441 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
3442   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3443         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3444           (ANY_EXTEND:<VWIDE>
3445             (vec_select:<VHALF>
3446               (match_operand:VQW 2 "register_operand" "w")
3447               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3448   "TARGET_SIMD"
3449   "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3450   [(set_attr "type" "neon_sub_widen")]
3451 )
3452
3453 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
3454   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3455         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3456           (ANY_EXTEND:<VWIDE>
3457             (vec_select:<VHALF>
3458               (match_operand:VQW 2 "register_operand" "w")
3459               (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3460   "TARGET_SIMD"
3461   "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3462   [(set_attr "type" "neon_sub_widen")]
3463 )
3464
3465 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
3466   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3467         (plus:<VWIDE>
3468           (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
3469           (match_operand:<VWIDE> 1 "register_operand" "w")))]
3470   "TARGET_SIMD"
3471   "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3472   [(set_attr "type" "neon_add_widen")]
3473 )
3474
3475 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
3476   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3477         (plus:<VWIDE>
3478           (ANY_EXTEND:<VWIDE>
3479             (vec_select:<VHALF>
3480               (match_operand:VQW 2 "register_operand" "w")
3481               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3482           (match_operand:<VWIDE> 1 "register_operand" "w")))]
3483   "TARGET_SIMD"
3484   "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3485   [(set_attr "type" "neon_add_widen")]
3486 )
3487
3488 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
3489   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3490         (plus:<VWIDE>
3491           (ANY_EXTEND:<VWIDE>
3492             (vec_select:<VHALF>
3493               (match_operand:VQW 2 "register_operand" "w")
3494               (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3495           (match_operand:<VWIDE> 1 "register_operand" "w")))]
3496   "TARGET_SIMD"
3497   "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3498   [(set_attr "type" "neon_add_widen")]
3499 )
3500
3501 (define_expand "aarch64_saddw2<mode>"
3502   [(match_operand:<VWIDE> 0 "register_operand")
3503    (match_operand:<VWIDE> 1 "register_operand")
3504    (match_operand:VQW 2 "register_operand")]
3505   "TARGET_SIMD"
3506 {
3507   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3508   emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3509                                                 operands[2], p));
3510   DONE;
3511 })
3512
3513 (define_expand "aarch64_uaddw2<mode>"
3514   [(match_operand:<VWIDE> 0 "register_operand")
3515    (match_operand:<VWIDE> 1 "register_operand")
3516    (match_operand:VQW 2 "register_operand")]
3517   "TARGET_SIMD"
3518 {
3519   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3520   emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3521                                                 operands[2], p));
3522   DONE;
3523 })
3524
3525
3526 (define_expand "aarch64_ssubw2<mode>"
3527   [(match_operand:<VWIDE> 0 "register_operand")
3528    (match_operand:<VWIDE> 1 "register_operand")
3529    (match_operand:VQW 2 "register_operand")]
3530   "TARGET_SIMD"
3531 {
3532   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3533   emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3534                                                 operands[2], p));
3535   DONE;
3536 })
3537
3538 (define_expand "aarch64_usubw2<mode>"
3539   [(match_operand:<VWIDE> 0 "register_operand")
3540    (match_operand:<VWIDE> 1 "register_operand")
3541    (match_operand:VQW 2 "register_operand")]
3542   "TARGET_SIMD"
3543 {
3544   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3545   emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3546                                                 operands[2], p));
3547   DONE;
3548 })
3549
3550 ;; <su><r>h<addsub>.
3551
3552 (define_expand "<u>avg<mode>3_floor"
3553   [(set (match_operand:VDQ_BHSI 0 "register_operand")
3554         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3555                           (match_operand:VDQ_BHSI 2 "register_operand")]
3556                          HADD))]
3557   "TARGET_SIMD"
3558 )
3559
3560 (define_expand "<u>avg<mode>3_ceil"
3561   [(set (match_operand:VDQ_BHSI 0 "register_operand")
3562         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3563                           (match_operand:VDQ_BHSI 2 "register_operand")]
3564                          RHADD))]
3565   "TARGET_SIMD"
3566 )
3567
3568 (define_insn "aarch64_<sur>h<addsub><mode>"
3569   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3570         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3571                       (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3572                      HADDSUB))]
3573   "TARGET_SIMD"
3574   "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3575   [(set_attr "type" "neon_<addsub>_halve<q>")]
3576 )
3577
3578 ;; <r><addsub>hn<q>.
3579
3580 (define_insn "aarch64_<sur><addsub>hn<mode>"
3581   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3582         (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3583                             (match_operand:VQN 2 "register_operand" "w")]
3584                            ADDSUBHN))]
3585   "TARGET_SIMD"
3586   "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3587   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3588 )
3589
3590 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3591   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3592         (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3593                              (match_operand:VQN 2 "register_operand" "w")
3594                              (match_operand:VQN 3 "register_operand" "w")]
3595                             ADDSUBHN2))]
3596   "TARGET_SIMD"
3597   "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3598   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3599 )
3600
3601 ;; pmul.
3602
3603 (define_insn "aarch64_pmul<mode>"
3604   [(set (match_operand:VB 0 "register_operand" "=w")
3605         (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3606                     (match_operand:VB 2 "register_operand" "w")]
3607                    UNSPEC_PMUL))]
3608  "TARGET_SIMD"
3609  "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3610   [(set_attr "type" "neon_mul_<Vetype><q>")]
3611 )
3612
3613 ;; fmulx.
3614
3615 (define_insn "aarch64_fmulx<mode>"
3616   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3617         (unspec:VHSDF_HSDF
3618           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3619            (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3620            UNSPEC_FMULX))]
3621  "TARGET_SIMD"
3622  "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3623  [(set_attr "type" "neon_fp_mul_<stype>")]
3624 )
3625
3626 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3627
3628 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3629   [(set (match_operand:VDQSF 0 "register_operand" "=w")
3630         (unspec:VDQSF
3631          [(match_operand:VDQSF 1 "register_operand" "w")
3632           (vec_duplicate:VDQSF
3633            (vec_select:<VEL>
3634             (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3635             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3636          UNSPEC_FMULX))]
3637   "TARGET_SIMD"
3638   {
3639     operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3640     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3641   }
3642   [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3643 )
3644
3645 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3646
3647 (define_insn "*aarch64_mulx_elt<mode>"
3648   [(set (match_operand:VDQF 0 "register_operand" "=w")
3649         (unspec:VDQF
3650          [(match_operand:VDQF 1 "register_operand" "w")
3651           (vec_duplicate:VDQF
3652            (vec_select:<VEL>
3653             (match_operand:VDQF 2 "register_operand" "w")
3654             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3655          UNSPEC_FMULX))]
3656   "TARGET_SIMD"
3657   {
3658     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3659     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3660   }
3661   [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3662 )
3663
3664 ;; vmulxq_lane
3665
3666 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3667   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3668         (unspec:VHSDF
3669          [(match_operand:VHSDF 1 "register_operand" "w")
3670           (vec_duplicate:VHSDF
3671             (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3672          UNSPEC_FMULX))]
3673   "TARGET_SIMD"
3674   "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3675   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3676 )
3677
3678 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3679 ;; vmulxd_lane_f64 ==  vmulx_lane_f64
3680 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3681
3682 (define_insn "*aarch64_vgetfmulx<mode>"
3683   [(set (match_operand:<VEL> 0 "register_operand" "=w")
3684         (unspec:<VEL>
3685          [(match_operand:<VEL> 1 "register_operand" "w")
3686           (vec_select:<VEL>
3687            (match_operand:VDQF 2 "register_operand" "w")
3688             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3689          UNSPEC_FMULX))]
3690   "TARGET_SIMD"
3691   {
3692     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3693     return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3694   }
3695   [(set_attr "type" "fmul<Vetype>")]
3696 )
3697 ;; <su>q<addsub>
3698
3699 (define_insn "aarch64_<su_optab><optab><mode>"
3700   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3701         (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3702                           (match_operand:VSDQ_I 2 "register_operand" "w")))]
3703   "TARGET_SIMD"
3704   "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3705   [(set_attr "type" "neon_<optab><q>")]
3706 )
3707
3708 ;; suqadd and usqadd
3709
3710 (define_insn "aarch64_<sur>qadd<mode>"
3711   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3712         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3713                         (match_operand:VSDQ_I 2 "register_operand" "w")]
3714                        USSUQADD))]
3715   "TARGET_SIMD"
3716   "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3717   [(set_attr "type" "neon_qadd<q>")]
3718 )
3719
3720 ;; sqmovun
3721
3722 (define_insn "aarch64_sqmovun<mode>"
3723   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3724         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3725                             UNSPEC_SQXTUN))]
3726    "TARGET_SIMD"
3727    "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3728    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3729 )
3730
3731 ;; sqmovn and uqmovn
3732
3733 (define_insn "aarch64_<sur>qmovn<mode>"
3734   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3735         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3736                             SUQMOVN))]
3737   "TARGET_SIMD"
3738   "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3739    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3740 )
3741
3742 ;; <su>q<absneg>
3743
3744 (define_insn "aarch64_s<optab><mode>"
3745   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3746         (UNQOPS:VSDQ_I
3747           (match_operand:VSDQ_I 1 "register_operand" "w")))]
3748   "TARGET_SIMD"
3749   "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3750   [(set_attr "type" "neon_<optab><q>")]
3751 )
3752
3753 ;; sq<r>dmulh.
3754
3755 (define_insn "aarch64_sq<r>dmulh<mode>"
3756   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3757         (unspec:VSDQ_HSI
3758           [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3759            (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3760          VQDMULH))]
3761   "TARGET_SIMD"
3762   "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3763   [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3764 )
3765
3766 ;; sq<r>dmulh_lane
3767
3768 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3769   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3770         (unspec:VDQHS
3771           [(match_operand:VDQHS 1 "register_operand" "w")
3772            (vec_select:<VEL>
3773              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3774              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3775          VQDMULH))]
3776   "TARGET_SIMD"
3777   "*
3778    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3779    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3780   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3781 )
3782
3783 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3784   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3785         (unspec:VDQHS
3786           [(match_operand:VDQHS 1 "register_operand" "w")
3787            (vec_select:<VEL>
3788              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3789              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3790          VQDMULH))]
3791   "TARGET_SIMD"
3792   "*
3793    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3794    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3795   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3796 )
3797
3798 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3799   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3800         (unspec:SD_HSI
3801           [(match_operand:SD_HSI 1 "register_operand" "w")
3802            (vec_select:<VEL>
3803              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3804              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3805          VQDMULH))]
3806   "TARGET_SIMD"
3807   "*
3808    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3809    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3810   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3811 )
3812
3813 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3814   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3815         (unspec:SD_HSI
3816           [(match_operand:SD_HSI 1 "register_operand" "w")
3817            (vec_select:<VEL>
3818              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3819              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3820          VQDMULH))]
3821   "TARGET_SIMD"
3822   "*
3823    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3824    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3825   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3826 )
3827
3828 ;; sqrdml[as]h.
3829
3830 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3831   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3832         (unspec:VSDQ_HSI
3833           [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3834            (match_operand:VSDQ_HSI 2 "register_operand" "w")
3835            (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3836           SQRDMLH_AS))]
3837    "TARGET_SIMD_RDMA"
3838    "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3839    [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3840 )
3841
3842 ;; sqrdml[as]h_lane.
3843
3844 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3845   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3846         (unspec:VDQHS
3847           [(match_operand:VDQHS 1 "register_operand" "0")
3848            (match_operand:VDQHS 2 "register_operand" "w")
3849            (vec_select:<VEL>
3850              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3851              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3852           SQRDMLH_AS))]
3853    "TARGET_SIMD_RDMA"
3854    {
3855      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3856      return
3857       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3858    }
3859    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3860 )
3861
3862 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3863   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3864         (unspec:SD_HSI
3865           [(match_operand:SD_HSI 1 "register_operand" "0")
3866            (match_operand:SD_HSI 2 "register_operand" "w")
3867            (vec_select:<VEL>
3868              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3869              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3870           SQRDMLH_AS))]
3871    "TARGET_SIMD_RDMA"
3872    {
3873      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3874      return
3875       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3876    }
3877    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3878 )
3879
3880 ;; sqrdml[as]h_laneq.
3881
3882 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3883   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3884         (unspec:VDQHS
3885           [(match_operand:VDQHS 1 "register_operand" "0")
3886            (match_operand:VDQHS 2 "register_operand" "w")
3887            (vec_select:<VEL>
3888              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3889              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3890           SQRDMLH_AS))]
3891    "TARGET_SIMD_RDMA"
3892    {
3893      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3894      return
3895       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3896    }
3897    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3898 )
3899
3900 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3901   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3902         (unspec:SD_HSI
3903           [(match_operand:SD_HSI 1 "register_operand" "0")
3904            (match_operand:SD_HSI 2 "register_operand" "w")
3905            (vec_select:<VEL>
3906              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3907              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3908           SQRDMLH_AS))]
3909    "TARGET_SIMD_RDMA"
3910    {
3911      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3912      return
3913       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3914    }
3915    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3916 )
3917
3918 ;; vqdml[sa]l
3919
3920 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3921   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3922         (SBINQOPS:<VWIDE>
3923           (match_operand:<VWIDE> 1 "register_operand" "0")
3924           (ss_ashift:<VWIDE>
3925               (mult:<VWIDE>
3926                 (sign_extend:<VWIDE>
3927                       (match_operand:VSD_HSI 2 "register_operand" "w"))
3928                 (sign_extend:<VWIDE>
3929                       (match_operand:VSD_HSI 3 "register_operand" "w")))
3930               (const_int 1))))]
3931   "TARGET_SIMD"
3932   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3933   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3934 )
3935
3936 ;; vqdml[sa]l_lane
3937
3938 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3939   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3940         (SBINQOPS:<VWIDE>
3941           (match_operand:<VWIDE> 1 "register_operand" "0")
3942           (ss_ashift:<VWIDE>
3943             (mult:<VWIDE>
3944               (sign_extend:<VWIDE>
3945                 (match_operand:VD_HSI 2 "register_operand" "w"))
3946               (sign_extend:<VWIDE>
3947                 (vec_duplicate:VD_HSI
3948                   (vec_select:<VEL>
3949                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3950                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3951               ))
3952             (const_int 1))))]
3953   "TARGET_SIMD"
3954   {
3955     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3956     return
3957       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3958   }
3959   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3960 )
3961
3962 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3963   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3964         (SBINQOPS:<VWIDE>
3965           (match_operand:<VWIDE> 1 "register_operand" "0")
3966           (ss_ashift:<VWIDE>
3967             (mult:<VWIDE>
3968               (sign_extend:<VWIDE>
3969                 (match_operand:VD_HSI 2 "register_operand" "w"))
3970               (sign_extend:<VWIDE>
3971                 (vec_duplicate:VD_HSI
3972                   (vec_select:<VEL>
3973                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3974                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3975               ))
3976             (const_int 1))))]
3977   "TARGET_SIMD"
3978   {
3979     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3980     return
3981       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3982   }
3983   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3984 )
3985
3986 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3987   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3988         (SBINQOPS:<VWIDE>
3989           (match_operand:<VWIDE> 1 "register_operand" "0")
3990           (ss_ashift:<VWIDE>
3991             (mult:<VWIDE>
3992               (sign_extend:<VWIDE>
3993                 (match_operand:SD_HSI 2 "register_operand" "w"))
3994               (sign_extend:<VWIDE>
3995                 (vec_select:<VEL>
3996                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3997                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3998               )
3999             (const_int 1))))]
4000   "TARGET_SIMD"
4001   {
4002     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4003     return
4004       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4005   }
4006   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4007 )
4008
4009 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
4010   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4011         (SBINQOPS:<VWIDE>
4012           (match_operand:<VWIDE> 1 "register_operand" "0")
4013           (ss_ashift:<VWIDE>
4014             (mult:<VWIDE>
4015               (sign_extend:<VWIDE>
4016                 (match_operand:SD_HSI 2 "register_operand" "w"))
4017               (sign_extend:<VWIDE>
4018                 (vec_select:<VEL>
4019                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4020                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4021               )
4022             (const_int 1))))]
4023   "TARGET_SIMD"
4024   {
4025     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4026     return
4027       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4028   }
4029   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4030 )
4031
4032 ;; vqdml[sa]l_n
4033
4034 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
4035   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4036         (SBINQOPS:<VWIDE>
4037           (match_operand:<VWIDE> 1 "register_operand" "0")
4038           (ss_ashift:<VWIDE>
4039               (mult:<VWIDE>
4040                 (sign_extend:<VWIDE>
4041                       (match_operand:VD_HSI 2 "register_operand" "w"))
4042                 (sign_extend:<VWIDE>
4043                   (vec_duplicate:VD_HSI
4044                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4045               (const_int 1))))]
4046   "TARGET_SIMD"
4047   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4048   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4049 )
4050
4051 ;; sqdml[as]l2
4052
4053 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
4054   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4055         (SBINQOPS:<VWIDE>
4056          (match_operand:<VWIDE> 1 "register_operand" "0")
4057          (ss_ashift:<VWIDE>
4058              (mult:<VWIDE>
4059                (sign_extend:<VWIDE>
4060                  (vec_select:<VHALF>
4061                      (match_operand:VQ_HSI 2 "register_operand" "w")
4062                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4063                (sign_extend:<VWIDE>
4064                  (vec_select:<VHALF>
4065                      (match_operand:VQ_HSI 3 "register_operand" "w")
4066                      (match_dup 4))))
4067              (const_int 1))))]
4068   "TARGET_SIMD"
4069   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4070   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4071 )
4072
4073 (define_expand "aarch64_sqdmlal2<mode>"
4074   [(match_operand:<VWIDE> 0 "register_operand")
4075    (match_operand:<VWIDE> 1 "register_operand")
4076    (match_operand:VQ_HSI 2 "register_operand")
4077    (match_operand:VQ_HSI 3 "register_operand")]
4078   "TARGET_SIMD"
4079 {
4080   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4081   emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
4082                                                   operands[2], operands[3], p));
4083   DONE;
4084 })
4085
4086 (define_expand "aarch64_sqdmlsl2<mode>"
4087   [(match_operand:<VWIDE> 0 "register_operand")
4088    (match_operand:<VWIDE> 1 "register_operand")
4089    (match_operand:VQ_HSI 2 "register_operand")
4090    (match_operand:VQ_HSI 3 "register_operand")]
4091   "TARGET_SIMD"
4092 {
4093   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4094   emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
4095                                                   operands[2], operands[3], p));
4096   DONE;
4097 })
4098
4099 ;; vqdml[sa]l2_lane
4100
4101 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
4102   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4103         (SBINQOPS:<VWIDE>
4104           (match_operand:<VWIDE> 1 "register_operand" "0")
4105           (ss_ashift:<VWIDE>
4106               (mult:<VWIDE>
4107                 (sign_extend:<VWIDE>
4108                   (vec_select:<VHALF>
4109                     (match_operand:VQ_HSI 2 "register_operand" "w")
4110                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4111                 (sign_extend:<VWIDE>
4112                   (vec_duplicate:<VHALF>
4113                     (vec_select:<VEL>
4114                       (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4115                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4116                     ))))
4117               (const_int 1))))]
4118   "TARGET_SIMD"
4119   {
4120     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4121     return
4122      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4123   }
4124   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4125 )
4126
4127 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
4128   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4129         (SBINQOPS:<VWIDE>
4130           (match_operand:<VWIDE> 1 "register_operand" "0")
4131           (ss_ashift:<VWIDE>
4132               (mult:<VWIDE>
4133                 (sign_extend:<VWIDE>
4134                   (vec_select:<VHALF>
4135                     (match_operand:VQ_HSI 2 "register_operand" "w")
4136                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4137                 (sign_extend:<VWIDE>
4138                   (vec_duplicate:<VHALF>
4139                     (vec_select:<VEL>
4140                       (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4141                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4142                     ))))
4143               (const_int 1))))]
4144   "TARGET_SIMD"
4145   {
4146     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4147     return
4148      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4149   }
4150   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4151 )
4152
4153 (define_expand "aarch64_sqdmlal2_lane<mode>"
4154   [(match_operand:<VWIDE> 0 "register_operand")
4155    (match_operand:<VWIDE> 1 "register_operand")
4156    (match_operand:VQ_HSI 2 "register_operand")
4157    (match_operand:<VCOND> 3 "register_operand")
4158    (match_operand:SI 4 "immediate_operand")]
4159   "TARGET_SIMD"
4160 {
4161   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4162   emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
4163                                                        operands[2], operands[3],
4164                                                        operands[4], p));
4165   DONE;
4166 })
4167
4168 (define_expand "aarch64_sqdmlal2_laneq<mode>"
4169   [(match_operand:<VWIDE> 0 "register_operand")
4170    (match_operand:<VWIDE> 1 "register_operand")
4171    (match_operand:VQ_HSI 2 "register_operand")
4172    (match_operand:<VCONQ> 3 "register_operand")
4173    (match_operand:SI 4 "immediate_operand")]
4174   "TARGET_SIMD"
4175 {
4176   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4177   emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
4178                                                        operands[2], operands[3],
4179                                                        operands[4], p));
4180   DONE;
4181 })
4182
4183 (define_expand "aarch64_sqdmlsl2_lane<mode>"
4184   [(match_operand:<VWIDE> 0 "register_operand")
4185    (match_operand:<VWIDE> 1 "register_operand")
4186    (match_operand:VQ_HSI 2 "register_operand")
4187    (match_operand:<VCOND> 3 "register_operand")
4188    (match_operand:SI 4 "immediate_operand")]
4189   "TARGET_SIMD"
4190 {
4191   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4192   emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
4193                                                        operands[2], operands[3],
4194                                                        operands[4], p));
4195   DONE;
4196 })
4197
4198 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
4199   [(match_operand:<VWIDE> 0 "register_operand")
4200    (match_operand:<VWIDE> 1 "register_operand")
4201    (match_operand:VQ_HSI 2 "register_operand")
4202    (match_operand:<VCONQ> 3 "register_operand")
4203    (match_operand:SI 4 "immediate_operand")]
4204   "TARGET_SIMD"
4205 {
4206   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4207   emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4208                                                        operands[2], operands[3],
4209                                                        operands[4], p));
4210   DONE;
4211 })
4212
4213 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4214   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4215         (SBINQOPS:<VWIDE>
4216           (match_operand:<VWIDE> 1 "register_operand" "0")
4217           (ss_ashift:<VWIDE>
4218             (mult:<VWIDE>
4219               (sign_extend:<VWIDE>
4220                 (vec_select:<VHALF>
4221                   (match_operand:VQ_HSI 2 "register_operand" "w")
4222                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4223               (sign_extend:<VWIDE>
4224                 (vec_duplicate:<VHALF>
4225                   (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4226             (const_int 1))))]
4227   "TARGET_SIMD"
4228   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4229   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4230 )
4231
4232 (define_expand "aarch64_sqdmlal2_n<mode>"
4233   [(match_operand:<VWIDE> 0 "register_operand")
4234    (match_operand:<VWIDE> 1 "register_operand")
4235    (match_operand:VQ_HSI 2 "register_operand")
4236    (match_operand:<VEL> 3 "register_operand")]
4237   "TARGET_SIMD"
4238 {
4239   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4240   emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4241                                                     operands[2], operands[3],
4242                                                     p));
4243   DONE;
4244 })
4245
4246 (define_expand "aarch64_sqdmlsl2_n<mode>"
4247   [(match_operand:<VWIDE> 0 "register_operand")
4248    (match_operand:<VWIDE> 1 "register_operand")
4249    (match_operand:VQ_HSI 2 "register_operand")
4250    (match_operand:<VEL> 3 "register_operand")]
4251   "TARGET_SIMD"
4252 {
4253   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4254   emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4255                                                     operands[2], operands[3],
4256                                                     p));
4257   DONE;
4258 })
4259
4260 ;; vqdmull
4261
4262 (define_insn "aarch64_sqdmull<mode>"
4263   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4264         (ss_ashift:<VWIDE>
4265              (mult:<VWIDE>
4266                (sign_extend:<VWIDE>
4267                      (match_operand:VSD_HSI 1 "register_operand" "w"))
4268                (sign_extend:<VWIDE>
4269                      (match_operand:VSD_HSI 2 "register_operand" "w")))
4270              (const_int 1)))]
4271   "TARGET_SIMD"
4272   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4273   [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4274 )
4275
4276 ;; vqdmull_lane
4277
4278 (define_insn "aarch64_sqdmull_lane<mode>"
4279   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4280         (ss_ashift:<VWIDE>
4281              (mult:<VWIDE>
4282                (sign_extend:<VWIDE>
4283                  (match_operand:VD_HSI 1 "register_operand" "w"))
4284                (sign_extend:<VWIDE>
4285                  (vec_duplicate:VD_HSI
4286                    (vec_select:<VEL>
4287                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4288                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4289                ))
4290              (const_int 1)))]
4291   "TARGET_SIMD"
4292   {
4293     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4294     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4295   }
4296   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4297 )
4298
4299 (define_insn "aarch64_sqdmull_laneq<mode>"
4300   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4301         (ss_ashift:<VWIDE>
4302              (mult:<VWIDE>
4303                (sign_extend:<VWIDE>
4304                  (match_operand:VD_HSI 1 "register_operand" "w"))
4305                (sign_extend:<VWIDE>
4306                  (vec_duplicate:VD_HSI
4307                    (vec_select:<VEL>
4308                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4309                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4310                ))
4311              (const_int 1)))]
4312   "TARGET_SIMD"
4313   {
4314     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4315     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4316   }
4317   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4318 )
4319
4320 (define_insn "aarch64_sqdmull_lane<mode>"
4321   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4322         (ss_ashift:<VWIDE>
4323              (mult:<VWIDE>
4324                (sign_extend:<VWIDE>
4325                  (match_operand:SD_HSI 1 "register_operand" "w"))
4326                (sign_extend:<VWIDE>
4327                  (vec_select:<VEL>
4328                    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4329                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4330                ))
4331              (const_int 1)))]
4332   "TARGET_SIMD"
4333   {
4334     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4335     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4336   }
4337   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4338 )
4339
4340 (define_insn "aarch64_sqdmull_laneq<mode>"
4341   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4342         (ss_ashift:<VWIDE>
4343              (mult:<VWIDE>
4344                (sign_extend:<VWIDE>
4345                  (match_operand:SD_HSI 1 "register_operand" "w"))
4346                (sign_extend:<VWIDE>
4347                  (vec_select:<VEL>
4348                    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4349                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4350                ))
4351              (const_int 1)))]
4352   "TARGET_SIMD"
4353   {
4354     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4355     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4356   }
4357   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4358 )
4359
4360 ;; vqdmull_n
4361
4362 (define_insn "aarch64_sqdmull_n<mode>"
4363   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4364         (ss_ashift:<VWIDE>
4365              (mult:<VWIDE>
4366                (sign_extend:<VWIDE>
4367                  (match_operand:VD_HSI 1 "register_operand" "w"))
4368                (sign_extend:<VWIDE>
4369                  (vec_duplicate:VD_HSI
4370                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4371                )
4372              (const_int 1)))]
4373   "TARGET_SIMD"
4374   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4375   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4376 )
4377
4378 ;; vqdmull2
4379
4380
4381
4382 (define_insn "aarch64_sqdmull2<mode>_internal"
4383   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4384         (ss_ashift:<VWIDE>
4385              (mult:<VWIDE>
4386                (sign_extend:<VWIDE>
4387                  (vec_select:<VHALF>
4388                    (match_operand:VQ_HSI 1 "register_operand" "w")
4389                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4390                (sign_extend:<VWIDE>
4391                  (vec_select:<VHALF>
4392                    (match_operand:VQ_HSI 2 "register_operand" "w")
4393                    (match_dup 3)))
4394                )
4395              (const_int 1)))]
4396   "TARGET_SIMD"
4397   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4398   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4399 )
4400
4401 (define_expand "aarch64_sqdmull2<mode>"
4402   [(match_operand:<VWIDE> 0 "register_operand")
4403    (match_operand:VQ_HSI 1 "register_operand")
4404    (match_operand:VQ_HSI 2 "register_operand")]
4405   "TARGET_SIMD"
4406 {
4407   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4408   emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4409                                                   operands[2], p));
4410   DONE;
4411 })
4412
4413 ;; vqdmull2_lane
4414
4415 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4416   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4417         (ss_ashift:<VWIDE>
4418              (mult:<VWIDE>
4419                (sign_extend:<VWIDE>
4420                  (vec_select:<VHALF>
4421                    (match_operand:VQ_HSI 1 "register_operand" "w")
4422                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4423                (sign_extend:<VWIDE>
4424                  (vec_duplicate:<VHALF>
4425                    (vec_select:<VEL>
4426                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4427                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4428                ))
4429              (const_int 1)))]
4430   "TARGET_SIMD"
4431   {
4432     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4433     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4434   }
4435   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4436 )
4437
4438 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4439   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4440         (ss_ashift:<VWIDE>
4441              (mult:<VWIDE>
4442                (sign_extend:<VWIDE>
4443                  (vec_select:<VHALF>
4444                    (match_operand:VQ_HSI 1 "register_operand" "w")
4445                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4446                (sign_extend:<VWIDE>
4447                  (vec_duplicate:<VHALF>
4448                    (vec_select:<VEL>
4449                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4450                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4451                ))
4452              (const_int 1)))]
4453   "TARGET_SIMD"
4454   {
4455     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4456     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4457   }
4458   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4459 )
4460
4461 (define_expand "aarch64_sqdmull2_lane<mode>"
4462   [(match_operand:<VWIDE> 0 "register_operand")
4463    (match_operand:VQ_HSI 1 "register_operand")
4464    (match_operand:<VCOND> 2 "register_operand")
4465    (match_operand:SI 3 "immediate_operand")]
4466   "TARGET_SIMD"
4467 {
4468   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4469   emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4470                                                        operands[2], operands[3],
4471                                                        p));
4472   DONE;
4473 })
4474
4475 (define_expand "aarch64_sqdmull2_laneq<mode>"
4476   [(match_operand:<VWIDE> 0 "register_operand")
4477    (match_operand:VQ_HSI 1 "register_operand")
4478    (match_operand:<VCONQ> 2 "register_operand")
4479    (match_operand:SI 3 "immediate_operand")]
4480   "TARGET_SIMD"
4481 {
4482   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4483   emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4484                                                        operands[2], operands[3],
4485                                                        p));
4486   DONE;
4487 })
4488
4489 ;; vqdmull2_n
4490
4491 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4492   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4493         (ss_ashift:<VWIDE>
4494              (mult:<VWIDE>
4495                (sign_extend:<VWIDE>
4496                  (vec_select:<VHALF>
4497                    (match_operand:VQ_HSI 1 "register_operand" "w")
4498                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4499                (sign_extend:<VWIDE>
4500                  (vec_duplicate:<VHALF>
4501                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4502                )
4503              (const_int 1)))]
4504   "TARGET_SIMD"
4505   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4506   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4507 )
4508
4509 (define_expand "aarch64_sqdmull2_n<mode>"
4510   [(match_operand:<VWIDE> 0 "register_operand")
4511    (match_operand:VQ_HSI 1 "register_operand")
4512    (match_operand:<VEL> 2 "register_operand")]
4513   "TARGET_SIMD"
4514 {
4515   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4516   emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4517                                                     operands[2], p));
4518   DONE;
4519 })
4520
4521 ;; vshl
4522
4523 (define_insn "aarch64_<sur>shl<mode>"
4524   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4525         (unspec:VSDQ_I_DI
4526           [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4527            (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4528          VSHL))]
4529   "TARGET_SIMD"
4530   "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4531   [(set_attr "type" "neon_shift_reg<q>")]
4532 )
4533
4534
4535 ;; vqshl
4536
4537 (define_insn "aarch64_<sur>q<r>shl<mode>"
4538   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4539         (unspec:VSDQ_I
4540           [(match_operand:VSDQ_I 1 "register_operand" "w")
4541            (match_operand:VSDQ_I 2 "register_operand" "w")]
4542          VQSHL))]
4543   "TARGET_SIMD"
4544   "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4545   [(set_attr "type" "neon_sat_shift_reg<q>")]
4546 )
4547
4548 ;; vshll_n
4549
4550 (define_insn "aarch64_<sur>shll_n<mode>"
4551   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4552         (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4553                          (match_operand:SI 2
4554                            "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4555                          VSHLL))]
4556   "TARGET_SIMD"
4557   {
4558     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4559       return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4560     else
4561       return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4562   }
4563   [(set_attr "type" "neon_shift_imm_long")]
4564 )
4565
4566 ;; vshll_high_n
4567
4568 (define_insn "aarch64_<sur>shll2_n<mode>"
4569   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4570         (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4571                          (match_operand:SI 2 "immediate_operand" "i")]
4572                          VSHLL))]
4573   "TARGET_SIMD"
4574   {
4575     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4576       return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4577     else
4578       return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4579   }
4580   [(set_attr "type" "neon_shift_imm_long")]
4581 )
4582
4583 ;; vrshr_n
4584
4585 (define_insn "aarch64_<sur>shr_n<mode>"
4586   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4587         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4588                            (match_operand:SI 2
4589                              "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4590                           VRSHR_N))]
4591   "TARGET_SIMD"
4592   "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4593   [(set_attr "type" "neon_sat_shift_imm<q>")]
4594 )
4595
4596 ;; v(r)sra_n
4597
4598 (define_insn "aarch64_<sur>sra_n<mode>"
4599   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4600         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4601                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4602                        (match_operand:SI 3
4603                          "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4604                       VSRA))]
4605   "TARGET_SIMD"
4606   "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4607   [(set_attr "type" "neon_shift_acc<q>")]
4608 )
4609
4610 ;; vs<lr>i_n
4611
4612 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4613   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4614         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4615                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4616                        (match_operand:SI 3
4617                          "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4618                       VSLRI))]
4619   "TARGET_SIMD"
4620   "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4621   [(set_attr "type" "neon_shift_imm<q>")]
4622 )
4623
4624 ;; vqshl(u)
4625
4626 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4627   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4628         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4629                        (match_operand:SI 2
4630                          "aarch64_simd_shift_imm_<ve_mode>" "i")]
4631                       VQSHL_N))]
4632   "TARGET_SIMD"
4633   "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4634   [(set_attr "type" "neon_sat_shift_imm<q>")]
4635 )
4636
4637
4638 ;; vq(r)shr(u)n_n
4639
4640 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4641   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4642         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4643                             (match_operand:SI 2
4644                               "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4645                            VQSHRN_N))]
4646   "TARGET_SIMD"
4647   "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4648   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4649 )
4650
4651
4652 ;; cm(eq|ge|gt|lt|le)
4653 ;; Note, we have constraints for Dz and Z as different expanders
4654 ;; have different ideas of what should be passed to this pattern.
4655
4656 (define_insn "aarch64_cm<optab><mode>"
4657   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4658         (neg:<V_INT_EQUIV>
4659           (COMPARISONS:<V_INT_EQUIV>
4660             (match_operand:VDQ_I 1 "register_operand" "w,w")
4661             (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4662           )))]
4663   "TARGET_SIMD"
4664   "@
4665   cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4666   cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4667   [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4668 )
4669
4670 (define_insn_and_split "aarch64_cm<optab>di"
4671   [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4672         (neg:DI
4673           (COMPARISONS:DI
4674             (match_operand:DI 1 "register_operand" "w,w,r")
4675             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4676           )))
4677      (clobber (reg:CC CC_REGNUM))]
4678   "TARGET_SIMD"
4679   "#"
4680   "&& reload_completed"
4681   [(set (match_operand:DI 0 "register_operand")
4682         (neg:DI
4683           (COMPARISONS:DI
4684             (match_operand:DI 1 "register_operand")
4685             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4686           )))]
4687   {
4688     /* If we are in the general purpose register file,
4689        we split to a sequence of comparison and store.  */
4690     if (GP_REGNUM_P (REGNO (operands[0]))
4691         && GP_REGNUM_P (REGNO (operands[1])))
4692       {
4693         machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4694         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4695         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4696         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4697         DONE;
4698       }
4699     /* Otherwise, we expand to a similar pattern which does not
4700        clobber CC_REGNUM.  */
4701   }
4702   [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4703 )
4704
4705 (define_insn "*aarch64_cm<optab>di"
4706   [(set (match_operand:DI 0 "register_operand" "=w,w")
4707         (neg:DI
4708           (COMPARISONS:DI
4709             (match_operand:DI 1 "register_operand" "w,w")
4710             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4711           )))]
4712   "TARGET_SIMD && reload_completed"
4713   "@
4714   cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4715   cm<optab>\t%d0, %d1, #0"
4716   [(set_attr "type" "neon_compare, neon_compare_zero")]
4717 )
4718
4719 ;; cm(hs|hi)
4720
4721 (define_insn "aarch64_cm<optab><mode>"
4722   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4723         (neg:<V_INT_EQUIV>
4724           (UCOMPARISONS:<V_INT_EQUIV>
4725             (match_operand:VDQ_I 1 "register_operand" "w")
4726             (match_operand:VDQ_I 2 "register_operand" "w")
4727           )))]
4728   "TARGET_SIMD"
4729   "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4730   [(set_attr "type" "neon_compare<q>")]
4731 )
4732
4733 (define_insn_and_split "aarch64_cm<optab>di"
4734   [(set (match_operand:DI 0 "register_operand" "=w,r")
4735         (neg:DI
4736           (UCOMPARISONS:DI
4737             (match_operand:DI 1 "register_operand" "w,r")
4738             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4739           )))
4740     (clobber (reg:CC CC_REGNUM))]
4741   "TARGET_SIMD"
4742   "#"
4743   "&& reload_completed"
4744   [(set (match_operand:DI 0 "register_operand")
4745         (neg:DI
4746           (UCOMPARISONS:DI
4747             (match_operand:DI 1 "register_operand")
4748             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4749           )))]
4750   {
4751     /* If we are in the general purpose register file,
4752        we split to a sequence of comparison and store.  */
4753     if (GP_REGNUM_P (REGNO (operands[0]))
4754         && GP_REGNUM_P (REGNO (operands[1])))
4755       {
4756         machine_mode mode = CCmode;
4757         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4758         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4759         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4760         DONE;
4761       }
4762     /* Otherwise, we expand to a similar pattern which does not
4763        clobber CC_REGNUM.  */
4764   }
4765   [(set_attr "type" "neon_compare,multiple")]
4766 )
4767
4768 (define_insn "*aarch64_cm<optab>di"
4769   [(set (match_operand:DI 0 "register_operand" "=w")
4770         (neg:DI
4771           (UCOMPARISONS:DI
4772             (match_operand:DI 1 "register_operand" "w")
4773             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4774           )))]
4775   "TARGET_SIMD && reload_completed"
4776   "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4777   [(set_attr "type" "neon_compare")]
4778 )
4779
4780 ;; cmtst
4781
4782 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4783 ;; we don't have any insns using ne, and aarch64_vcond outputs
4784 ;; not (neg (eq (and x y) 0))
4785 ;; which is rewritten by simplify_rtx as
4786 ;; plus (eq (and x y) 0) -1.
4787
4788 (define_insn "aarch64_cmtst<mode>"
4789   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4790         (plus:<V_INT_EQUIV>
4791           (eq:<V_INT_EQUIV>
4792             (and:VDQ_I
4793               (match_operand:VDQ_I 1 "register_operand" "w")
4794               (match_operand:VDQ_I 2 "register_operand" "w"))
4795             (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4796           (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4797   ]
4798   "TARGET_SIMD"
4799   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4800   [(set_attr "type" "neon_tst<q>")]
4801 )
4802
4803 (define_insn_and_split "aarch64_cmtstdi"
4804   [(set (match_operand:DI 0 "register_operand" "=w,r")
4805         (neg:DI
4806           (ne:DI
4807             (and:DI
4808               (match_operand:DI 1 "register_operand" "w,r")
4809               (match_operand:DI 2 "register_operand" "w,r"))
4810             (const_int 0))))
4811     (clobber (reg:CC CC_REGNUM))]
4812   "TARGET_SIMD"
4813   "#"
4814   "&& reload_completed"
4815   [(set (match_operand:DI 0 "register_operand")
4816         (neg:DI
4817           (ne:DI
4818             (and:DI
4819               (match_operand:DI 1 "register_operand")
4820               (match_operand:DI 2 "register_operand"))
4821             (const_int 0))))]
4822   {
4823     /* If we are in the general purpose register file,
4824        we split to a sequence of comparison and store.  */
4825     if (GP_REGNUM_P (REGNO (operands[0]))
4826         && GP_REGNUM_P (REGNO (operands[1])))
4827       {
4828         rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4829         machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4830         rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4831         rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4832         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4833         DONE;
4834       }
4835     /* Otherwise, we expand to a similar pattern which does not
4836        clobber CC_REGNUM.  */
4837   }
4838   [(set_attr "type" "neon_tst,multiple")]
4839 )
4840
4841 (define_insn "*aarch64_cmtstdi"
4842   [(set (match_operand:DI 0 "register_operand" "=w")
4843         (neg:DI
4844           (ne:DI
4845             (and:DI
4846               (match_operand:DI 1 "register_operand" "w")
4847               (match_operand:DI 2 "register_operand" "w"))
4848             (const_int 0))))]
4849   "TARGET_SIMD"
4850   "cmtst\t%d0, %d1, %d2"
4851   [(set_attr "type" "neon_tst")]
4852 )
4853
4854 ;; fcm(eq|ge|gt|le|lt)
4855
4856 (define_insn "aarch64_cm<optab><mode>"
4857   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4858         (neg:<V_INT_EQUIV>
4859           (COMPARISONS:<V_INT_EQUIV>
4860             (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4861             (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4862           )))]
4863   "TARGET_SIMD"
4864   "@
4865   fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4866   fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4867   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4868 )
4869
4870 ;; fac(ge|gt)
4871 ;; Note we can also handle what would be fac(le|lt) by
4872 ;; generating fac(ge|gt).
4873
4874 (define_insn "aarch64_fac<optab><mode>"
4875   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4876         (neg:<V_INT_EQUIV>
4877           (FAC_COMPARISONS:<V_INT_EQUIV>
4878             (abs:VHSDF_HSDF
4879               (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4880             (abs:VHSDF_HSDF
4881               (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4882   )))]
4883   "TARGET_SIMD"
4884   "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4885   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4886 )
4887
4888 ;; addp
4889
4890 (define_insn "aarch64_addp<mode>"
4891   [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4892         (unspec:VD_BHSI
4893           [(match_operand:VD_BHSI 1 "register_operand" "w")
4894            (match_operand:VD_BHSI 2 "register_operand" "w")]
4895           UNSPEC_ADDP))]
4896   "TARGET_SIMD"
4897   "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4898   [(set_attr "type" "neon_reduc_add<q>")]
4899 )
4900
4901 (define_insn "aarch64_addpdi"
4902   [(set (match_operand:DI 0 "register_operand" "=w")
4903         (unspec:DI
4904           [(match_operand:V2DI 1 "register_operand" "w")]
4905           UNSPEC_ADDP))]
4906   "TARGET_SIMD"
4907   "addp\t%d0, %1.2d"
4908   [(set_attr "type" "neon_reduc_add")]
4909 )
4910
4911 ;; sqrt
4912
4913 (define_expand "sqrt<mode>2"
4914   [(set (match_operand:VHSDF 0 "register_operand")
4915         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))]
4916   "TARGET_SIMD"
4917 {
4918   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4919     DONE;
4920 })
4921
4922 (define_insn "*sqrt<mode>2"
4923   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4924         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4925   "TARGET_SIMD"
4926   "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4927   [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4928 )
4929
4930 ;; Patterns for vector struct loads and stores.
4931
4932 (define_insn "aarch64_simd_ld2<mode>"
4933   [(set (match_operand:OI 0 "register_operand" "=w")
4934         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4935                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4936                    UNSPEC_LD2))]
4937   "TARGET_SIMD"
4938   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4939   [(set_attr "type" "neon_load2_2reg<q>")]
4940 )
4941
4942 (define_insn "aarch64_simd_ld2r<mode>"
4943   [(set (match_operand:OI 0 "register_operand" "=w")
4944        (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4945                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4946                   UNSPEC_LD2_DUP))]
4947   "TARGET_SIMD"
4948   "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4949   [(set_attr "type" "neon_load2_all_lanes<q>")]
4950 )
4951
4952 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4953   [(set (match_operand:OI 0 "register_operand" "=w")
4954         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4955                     (match_operand:OI 2 "register_operand" "0")
4956                     (match_operand:SI 3 "immediate_operand" "i")
4957                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4958                    UNSPEC_LD2_LANE))]
4959   "TARGET_SIMD"
4960   {
4961     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4962     return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4963   }
4964   [(set_attr "type" "neon_load2_one_lane")]
4965 )
4966
4967 (define_expand "vec_load_lanesoi<mode>"
4968   [(set (match_operand:OI 0 "register_operand")
4969         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand")
4970                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4971                    UNSPEC_LD2))]
4972   "TARGET_SIMD"
4973 {
4974   if (BYTES_BIG_ENDIAN)
4975     {
4976       rtx tmp = gen_reg_rtx (OImode);
4977       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4978       emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4979       emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4980     }
4981   else
4982     emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4983   DONE;
4984 })
4985
4986 (define_insn "aarch64_simd_st2<mode>"
4987   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4988         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4989                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4990                    UNSPEC_ST2))]
4991   "TARGET_SIMD"
4992   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4993   [(set_attr "type" "neon_store2_2reg<q>")]
4994 )
4995
4996 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4997 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4998   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4999         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5000                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5001                     (match_operand:SI 2 "immediate_operand" "i")]
5002                    UNSPEC_ST2_LANE))]
5003   "TARGET_SIMD"
5004   {
5005     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5006     return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
5007   }
5008   [(set_attr "type" "neon_store2_one_lane<q>")]
5009 )
5010
5011 (define_expand "vec_store_lanesoi<mode>"
5012   [(set (match_operand:OI 0 "aarch64_simd_struct_operand")
5013         (unspec:OI [(match_operand:OI 1 "register_operand")
5014                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5015                    UNSPEC_ST2))]
5016   "TARGET_SIMD"
5017 {
5018   if (BYTES_BIG_ENDIAN)
5019     {
5020       rtx tmp = gen_reg_rtx (OImode);
5021       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5022       emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
5023       emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
5024     }
5025   else
5026     emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
5027   DONE;
5028 })
5029
5030 (define_insn "aarch64_simd_ld3<mode>"
5031   [(set (match_operand:CI 0 "register_operand" "=w")
5032         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5033                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5034                    UNSPEC_LD3))]
5035   "TARGET_SIMD"
5036   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5037   [(set_attr "type" "neon_load3_3reg<q>")]
5038 )
5039
5040 (define_insn "aarch64_simd_ld3r<mode>"
5041   [(set (match_operand:CI 0 "register_operand" "=w")
5042        (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5043                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5044                   UNSPEC_LD3_DUP))]
5045   "TARGET_SIMD"
5046   "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5047   [(set_attr "type" "neon_load3_all_lanes<q>")]
5048 )
5049
5050 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
5051   [(set (match_operand:CI 0 "register_operand" "=w")
5052         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5053                     (match_operand:CI 2 "register_operand" "0")
5054                     (match_operand:SI 3 "immediate_operand" "i")
5055                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5056                    UNSPEC_LD3_LANE))]
5057   "TARGET_SIMD"
5058 {
5059     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5060     return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
5061 }
5062   [(set_attr "type" "neon_load3_one_lane")]
5063 )
5064
5065 (define_expand "vec_load_lanesci<mode>"
5066   [(set (match_operand:CI 0 "register_operand")
5067         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand")
5068                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5069                    UNSPEC_LD3))]
5070   "TARGET_SIMD"
5071 {
5072   if (BYTES_BIG_ENDIAN)
5073     {
5074       rtx tmp = gen_reg_rtx (CImode);
5075       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5076       emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
5077       emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
5078     }
5079   else
5080     emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
5081   DONE;
5082 })
5083
5084 (define_insn "aarch64_simd_st3<mode>"
5085   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5086         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
5087                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5088                    UNSPEC_ST3))]
5089   "TARGET_SIMD"
5090   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5091   [(set_attr "type" "neon_store3_3reg<q>")]
5092 )
5093
5094 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5095 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
5096   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5097         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5098                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5099                      (match_operand:SI 2 "immediate_operand" "i")]
5100                     UNSPEC_ST3_LANE))]
5101   "TARGET_SIMD"
5102   {
5103     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5104     return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
5105   }
5106   [(set_attr "type" "neon_store3_one_lane<q>")]
5107 )
5108
5109 (define_expand "vec_store_lanesci<mode>"
5110   [(set (match_operand:CI 0 "aarch64_simd_struct_operand")
5111         (unspec:CI [(match_operand:CI 1 "register_operand")
5112                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5113                    UNSPEC_ST3))]
5114   "TARGET_SIMD"
5115 {
5116   if (BYTES_BIG_ENDIAN)
5117     {
5118       rtx tmp = gen_reg_rtx (CImode);
5119       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5120       emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
5121       emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
5122     }
5123   else
5124     emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
5125   DONE;
5126 })
5127
5128 (define_insn "aarch64_simd_ld4<mode>"
5129   [(set (match_operand:XI 0 "register_operand" "=w")
5130         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5131                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5132                    UNSPEC_LD4))]
5133   "TARGET_SIMD"
5134   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5135   [(set_attr "type" "neon_load4_4reg<q>")]
5136 )
5137
5138 (define_insn "aarch64_simd_ld4r<mode>"
5139   [(set (match_operand:XI 0 "register_operand" "=w")
5140        (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5141                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5142                   UNSPEC_LD4_DUP))]
5143   "TARGET_SIMD"
5144   "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5145   [(set_attr "type" "neon_load4_all_lanes<q>")]
5146 )
5147
5148 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
5149   [(set (match_operand:XI 0 "register_operand" "=w")
5150         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5151                     (match_operand:XI 2 "register_operand" "0")
5152                     (match_operand:SI 3 "immediate_operand" "i")
5153                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5154                    UNSPEC_LD4_LANE))]
5155   "TARGET_SIMD"
5156 {
5157     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5158     return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
5159 }
5160   [(set_attr "type" "neon_load4_one_lane")]
5161 )
5162
5163 (define_expand "vec_load_lanesxi<mode>"
5164   [(set (match_operand:XI 0 "register_operand")
5165         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand")
5166                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5167                    UNSPEC_LD4))]
5168   "TARGET_SIMD"
5169 {
5170   if (BYTES_BIG_ENDIAN)
5171     {
5172       rtx tmp = gen_reg_rtx (XImode);
5173       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5174       emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
5175       emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
5176     }
5177   else
5178     emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
5179   DONE;
5180 })
5181
5182 (define_insn "aarch64_simd_st4<mode>"
5183   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5184         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5185                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5186                    UNSPEC_ST4))]
5187   "TARGET_SIMD"
5188   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5189   [(set_attr "type" "neon_store4_4reg<q>")]
5190 )
5191
5192 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5193 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
5194   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5195         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5196                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5197                      (match_operand:SI 2 "immediate_operand" "i")]
5198                     UNSPEC_ST4_LANE))]
5199   "TARGET_SIMD"
5200   {
5201     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5202     return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5203   }
5204   [(set_attr "type" "neon_store4_one_lane<q>")]
5205 )
5206
5207 (define_expand "vec_store_lanesxi<mode>"
5208   [(set (match_operand:XI 0 "aarch64_simd_struct_operand")
5209         (unspec:XI [(match_operand:XI 1 "register_operand")
5210                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5211                    UNSPEC_ST4))]
5212   "TARGET_SIMD"
5213 {
5214   if (BYTES_BIG_ENDIAN)
5215     {
5216       rtx tmp = gen_reg_rtx (XImode);
5217       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5218       emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5219       emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5220     }
5221   else
5222     emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5223   DONE;
5224 })
5225
5226 (define_insn_and_split "aarch64_rev_reglist<mode>"
5227 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5228         (unspec:VSTRUCT
5229                    [(match_operand:VSTRUCT 1 "register_operand" "w")
5230                     (match_operand:V16QI 2 "register_operand" "w")]
5231                    UNSPEC_REV_REGLIST))]
5232   "TARGET_SIMD"
5233   "#"
5234   "&& reload_completed"
5235   [(const_int 0)]
5236 {
5237   int i;
5238   int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5239   for (i = 0; i < nregs; i++)
5240     {
5241       rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5242       rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5243       emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5244     }
5245   DONE;
5246 }
5247   [(set_attr "type" "neon_tbl1_q")
5248    (set_attr "length" "<insn_count>")]
5249 )
5250
5251 ;; Reload patterns for AdvSIMD register list operands.
5252
5253 (define_expand "mov<mode>"
5254   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
5255         (match_operand:VSTRUCT 1 "general_operand"))]
5256   "TARGET_SIMD"
5257 {
5258   if (can_create_pseudo_p ())
5259     {
5260       if (GET_CODE (operands[0]) != REG)
5261         operands[1] = force_reg (<MODE>mode, operands[1]);
5262     }
5263 })
5264
5265
5266 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5267   [(match_operand:CI 0 "register_operand")
5268    (match_operand:DI 1 "register_operand")
5269    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5270   "TARGET_SIMD"
5271 {
5272   rtx mem = gen_rtx_MEM (CImode, operands[1]);
5273   emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5274   DONE;
5275 })
5276
5277 (define_insn "aarch64_ld1_x3_<mode>"
5278   [(set (match_operand:CI 0 "register_operand" "=w")
5279         (unspec:CI
5280           [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5281            (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5282   "TARGET_SIMD"
5283   "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5284   [(set_attr "type" "neon_load1_3reg<q>")]
5285 )
5286
5287 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5288   [(match_operand:DI 0 "register_operand")
5289    (match_operand:OI 1 "register_operand")
5290    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5291   "TARGET_SIMD"
5292 {
5293   rtx mem = gen_rtx_MEM (OImode, operands[0]);
5294   emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5295   DONE;
5296 })
5297
5298 (define_insn "aarch64_st1_x2_<mode>"
5299    [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5300          (unspec:OI
5301           [(match_operand:OI 1 "register_operand" "w")
5302           (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5303   "TARGET_SIMD"
5304   "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5305   [(set_attr "type" "neon_store1_2reg<q>")]
5306 )
5307
5308 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5309   [(match_operand:DI 0 "register_operand")
5310    (match_operand:CI 1 "register_operand")
5311    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5312   "TARGET_SIMD"
5313 {
5314   rtx mem = gen_rtx_MEM (CImode, operands[0]);
5315   emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5316   DONE;
5317 })
5318
5319 (define_insn "aarch64_st1_x3_<mode>"
5320    [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5321         (unspec:CI
5322          [(match_operand:CI 1 "register_operand" "w")
5323           (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5324   "TARGET_SIMD"
5325   "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5326   [(set_attr "type" "neon_store1_3reg<q>")]
5327 )
5328
5329 (define_insn "*aarch64_mov<mode>"
5330   [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5331         (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5332   "TARGET_SIMD && !BYTES_BIG_ENDIAN
5333    && (register_operand (operands[0], <MODE>mode)
5334        || register_operand (operands[1], <MODE>mode))"
5335   "@
5336    #
5337    st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5338    ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5339   [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5340                      neon_load<nregs>_<nregs>reg_q")
5341    (set_attr "length" "<insn_count>,4,4")]
5342 )
5343
5344 (define_insn "aarch64_be_ld1<mode>"
5345   [(set (match_operand:VALLDI_F16 0     "register_operand" "=w")
5346         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5347                              "aarch64_simd_struct_operand" "Utv")]
5348         UNSPEC_LD1))]
5349   "TARGET_SIMD"
5350   "ld1\\t{%0<Vmtype>}, %1"
5351   [(set_attr "type" "neon_load1_1reg<q>")]
5352 )
5353
5354 (define_insn "aarch64_be_st1<mode>"
5355   [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5356         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5357         UNSPEC_ST1))]
5358   "TARGET_SIMD"
5359   "st1\\t{%1<Vmtype>}, %0"
5360   [(set_attr "type" "neon_store1_1reg<q>")]
5361 )
5362
5363 (define_insn "*aarch64_be_movoi"
5364   [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5365         (match_operand:OI 1 "general_operand"      " w,w,m"))]
5366   "TARGET_SIMD && BYTES_BIG_ENDIAN
5367    && (register_operand (operands[0], OImode)
5368        || register_operand (operands[1], OImode))"
5369   "@
5370    #
5371    stp\\t%q1, %R1, %0
5372    ldp\\t%q0, %R0, %1"
5373   [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5374    (set_attr "length" "8,4,4")]
5375 )
5376
5377 (define_insn "*aarch64_be_movci"
5378   [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5379         (match_operand:CI 1 "general_operand"      " w,w,o"))]
5380   "TARGET_SIMD && BYTES_BIG_ENDIAN
5381    && (register_operand (operands[0], CImode)
5382        || register_operand (operands[1], CImode))"
5383   "#"
5384   [(set_attr "type" "multiple")
5385    (set_attr "length" "12,4,4")]
5386 )
5387
5388 (define_insn "*aarch64_be_movxi"
5389   [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5390         (match_operand:XI 1 "general_operand"      " w,w,o"))]
5391   "TARGET_SIMD && BYTES_BIG_ENDIAN
5392    && (register_operand (operands[0], XImode)
5393        || register_operand (operands[1], XImode))"
5394   "#"
5395   [(set_attr "type" "multiple")
5396    (set_attr "length" "16,4,4")]
5397 )
5398
5399 (define_split
5400   [(set (match_operand:OI 0 "register_operand")
5401         (match_operand:OI 1 "register_operand"))]
5402   "TARGET_SIMD && reload_completed"
5403   [(const_int 0)]
5404 {
5405   aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5406   DONE;
5407 })
5408
5409 (define_split
5410   [(set (match_operand:CI 0 "nonimmediate_operand")
5411         (match_operand:CI 1 "general_operand"))]
5412   "TARGET_SIMD && reload_completed"
5413   [(const_int 0)]
5414 {
5415   if (register_operand (operands[0], CImode)
5416       && register_operand (operands[1], CImode))
5417     {
5418       aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5419       DONE;
5420     }
5421   else if (BYTES_BIG_ENDIAN)
5422     {
5423       emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5424                       simplify_gen_subreg (OImode, operands[1], CImode, 0));
5425       emit_move_insn (gen_lowpart (V16QImode,
5426                                    simplify_gen_subreg (TImode, operands[0],
5427                                                         CImode, 32)),
5428                       gen_lowpart (V16QImode,
5429                                    simplify_gen_subreg (TImode, operands[1],
5430                                                         CImode, 32)));
5431       DONE;
5432     }
5433   else
5434     FAIL;
5435 })
5436
5437 (define_split
5438   [(set (match_operand:XI 0 "nonimmediate_operand")
5439         (match_operand:XI 1 "general_operand"))]
5440   "TARGET_SIMD && reload_completed"
5441   [(const_int 0)]
5442 {
5443   if (register_operand (operands[0], XImode)
5444       && register_operand (operands[1], XImode))
5445     {
5446       aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5447       DONE;
5448     }
5449   else if (BYTES_BIG_ENDIAN)
5450     {
5451       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5452                       simplify_gen_subreg (OImode, operands[1], XImode, 0));
5453       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5454                       simplify_gen_subreg (OImode, operands[1], XImode, 32));
5455       DONE;
5456     }
5457   else
5458     FAIL;
5459 })
5460
5461 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5462   [(match_operand:VSTRUCT 0 "register_operand")
5463    (match_operand:DI 1 "register_operand")
5464    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5465   "TARGET_SIMD"
5466 {
5467   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5468   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5469                      * <VSTRUCT:nregs>);
5470
5471   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5472                                                                 mem));
5473   DONE;
5474 })
5475
5476 (define_insn "aarch64_ld2<mode>_dreg"
5477   [(set (match_operand:OI 0 "register_operand" "=w")
5478         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5479                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5480                    UNSPEC_LD2_DREG))]
5481   "TARGET_SIMD"
5482   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5483   [(set_attr "type" "neon_load2_2reg<q>")]
5484 )
5485
5486 (define_insn "aarch64_ld2<mode>_dreg"
5487   [(set (match_operand:OI 0 "register_operand" "=w")
5488         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5489                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5490                    UNSPEC_LD2_DREG))]
5491   "TARGET_SIMD"
5492   "ld1\\t{%S0.1d - %T0.1d}, %1"
5493   [(set_attr "type" "neon_load1_2reg<q>")]
5494 )
5495
5496 (define_insn "aarch64_ld3<mode>_dreg"
5497   [(set (match_operand:CI 0 "register_operand" "=w")
5498         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5499                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5500                    UNSPEC_LD3_DREG))]
5501   "TARGET_SIMD"
5502   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5503   [(set_attr "type" "neon_load3_3reg<q>")]
5504 )
5505
5506 (define_insn "aarch64_ld3<mode>_dreg"
5507   [(set (match_operand:CI 0 "register_operand" "=w")
5508         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5509                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5510                    UNSPEC_LD3_DREG))]
5511   "TARGET_SIMD"
5512   "ld1\\t{%S0.1d - %U0.1d}, %1"
5513   [(set_attr "type" "neon_load1_3reg<q>")]
5514 )
5515
5516 (define_insn "aarch64_ld4<mode>_dreg"
5517   [(set (match_operand:XI 0 "register_operand" "=w")
5518         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5519                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5520                    UNSPEC_LD4_DREG))]
5521   "TARGET_SIMD"
5522   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5523   [(set_attr "type" "neon_load4_4reg<q>")]
5524 )
5525
5526 (define_insn "aarch64_ld4<mode>_dreg"
5527   [(set (match_operand:XI 0 "register_operand" "=w")
5528         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5529                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5530                    UNSPEC_LD4_DREG))]
5531   "TARGET_SIMD"
5532   "ld1\\t{%S0.1d - %V0.1d}, %1"
5533   [(set_attr "type" "neon_load1_4reg<q>")]
5534 )
5535
5536 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5537  [(match_operand:VSTRUCT 0 "register_operand")
5538   (match_operand:DI 1 "register_operand")
5539   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5540   "TARGET_SIMD"
5541 {
5542   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5543   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5544
5545   emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5546   DONE;
5547 })
5548
5549 (define_expand "aarch64_ld1<VALL_F16:mode>"
5550  [(match_operand:VALL_F16 0 "register_operand")
5551   (match_operand:DI 1 "register_operand")]
5552   "TARGET_SIMD"
5553 {
5554   machine_mode mode = <VALL_F16:MODE>mode;
5555   rtx mem = gen_rtx_MEM (mode, operands[1]);
5556
5557   if (BYTES_BIG_ENDIAN)
5558     emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5559   else
5560     emit_move_insn (operands[0], mem);
5561   DONE;
5562 })
5563
5564 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5565  [(match_operand:VSTRUCT 0 "register_operand")
5566   (match_operand:DI 1 "register_operand")
5567   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5568   "TARGET_SIMD"
5569 {
5570   machine_mode mode = <VSTRUCT:MODE>mode;
5571   rtx mem = gen_rtx_MEM (mode, operands[1]);
5572
5573   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5574   DONE;
5575 })
5576
5577 (define_expand "aarch64_ld1x2<VQ:mode>"
5578  [(match_operand:OI 0 "register_operand")
5579   (match_operand:DI 1 "register_operand")
5580   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5581   "TARGET_SIMD"
5582 {
5583   machine_mode mode = OImode;
5584   rtx mem = gen_rtx_MEM (mode, operands[1]);
5585
5586   emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5587   DONE;
5588 })
5589
5590 (define_expand "aarch64_ld1x2<VDC:mode>"
5591  [(match_operand:OI 0 "register_operand")
5592   (match_operand:DI 1 "register_operand")
5593   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5594   "TARGET_SIMD"
5595 {
5596   machine_mode mode = OImode;
5597   rtx mem = gen_rtx_MEM (mode, operands[1]);
5598
5599   emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5600   DONE;
5601 })
5602
5603
5604 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5605   [(match_operand:VSTRUCT 0 "register_operand")
5606         (match_operand:DI 1 "register_operand")
5607         (match_operand:VSTRUCT 2 "register_operand")
5608         (match_operand:SI 3 "immediate_operand")
5609         (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5610   "TARGET_SIMD"
5611 {
5612   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5613   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5614                      * <VSTRUCT:nregs>);
5615
5616   aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5617   emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5618         operands[0], mem, operands[2], operands[3]));
5619   DONE;
5620 })
5621
5622 ;; Expanders for builtins to extract vector registers from large
5623 ;; opaque integer modes.
5624
5625 ;; D-register list.
5626
5627 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5628  [(match_operand:VDC 0 "register_operand")
5629   (match_operand:VSTRUCT 1 "register_operand")
5630   (match_operand:SI 2 "immediate_operand")]
5631   "TARGET_SIMD"
5632 {
5633   int part = INTVAL (operands[2]);
5634   rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5635   int offset = part * 16;
5636
5637   emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5638   emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5639   DONE;
5640 })
5641
5642 ;; Q-register list.
5643
5644 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5645  [(match_operand:VQ 0 "register_operand")
5646   (match_operand:VSTRUCT 1 "register_operand")
5647   (match_operand:SI 2 "immediate_operand")]
5648   "TARGET_SIMD"
5649 {
5650   int part = INTVAL (operands[2]);
5651   int offset = part * 16;
5652
5653   emit_move_insn (operands[0],
5654                   gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5655   DONE;
5656 })
5657
5658 ;; Permuted-store expanders for neon intrinsics.
5659
5660 ;; Permute instructions
5661
5662 ;; vec_perm support
5663
5664 (define_expand "vec_perm<mode>"
5665   [(match_operand:VB 0 "register_operand")
5666    (match_operand:VB 1 "register_operand")
5667    (match_operand:VB 2 "register_operand")
5668    (match_operand:VB 3 "register_operand")]
5669   "TARGET_SIMD"
5670 {
5671   aarch64_expand_vec_perm (operands[0], operands[1],
5672                            operands[2], operands[3], <nunits>);
5673   DONE;
5674 })
5675
5676 (define_insn "aarch64_tbl1<mode>"
5677   [(set (match_operand:VB 0 "register_operand" "=w")
5678         (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5679                     (match_operand:VB 2 "register_operand" "w")]
5680                    UNSPEC_TBL))]
5681   "TARGET_SIMD"
5682   "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5683   [(set_attr "type" "neon_tbl1<q>")]
5684 )
5685
5686 ;; Two source registers.
5687
5688 (define_insn "aarch64_tbl2v16qi"
5689   [(set (match_operand:V16QI 0 "register_operand" "=w")
5690         (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5691                        (match_operand:V16QI 2 "register_operand" "w")]
5692                       UNSPEC_TBL))]
5693   "TARGET_SIMD"
5694   "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5695   [(set_attr "type" "neon_tbl2_q")]
5696 )
5697
5698 (define_insn "aarch64_tbl3<mode>"
5699   [(set (match_operand:VB 0 "register_operand" "=w")
5700         (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5701                       (match_operand:VB 2 "register_operand" "w")]
5702                       UNSPEC_TBL))]
5703   "TARGET_SIMD"
5704   "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5705   [(set_attr "type" "neon_tbl3")]
5706 )
5707
5708 (define_insn "aarch64_tbx4<mode>"
5709   [(set (match_operand:VB 0 "register_operand" "=w")
5710         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5711                       (match_operand:OI 2 "register_operand" "w")
5712                       (match_operand:VB 3 "register_operand" "w")]
5713                       UNSPEC_TBX))]
5714   "TARGET_SIMD"
5715   "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5716   [(set_attr "type" "neon_tbl4")]
5717 )
5718
5719 ;; Three source registers.
5720
5721 (define_insn "aarch64_qtbl3<mode>"
5722   [(set (match_operand:VB 0 "register_operand" "=w")
5723         (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5724                       (match_operand:VB 2 "register_operand" "w")]
5725                       UNSPEC_TBL))]
5726   "TARGET_SIMD"
5727   "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5728   [(set_attr "type" "neon_tbl3")]
5729 )
5730
5731 (define_insn "aarch64_qtbx3<mode>"
5732   [(set (match_operand:VB 0 "register_operand" "=w")
5733         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5734                       (match_operand:CI 2 "register_operand" "w")
5735                       (match_operand:VB 3 "register_operand" "w")]
5736                       UNSPEC_TBX))]
5737   "TARGET_SIMD"
5738   "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5739   [(set_attr "type" "neon_tbl3")]
5740 )
5741
5742 ;; Four source registers.
5743
5744 (define_insn "aarch64_qtbl4<mode>"
5745   [(set (match_operand:VB 0 "register_operand" "=w")
5746         (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5747                       (match_operand:VB 2 "register_operand" "w")]
5748                       UNSPEC_TBL))]
5749   "TARGET_SIMD"
5750   "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5751   [(set_attr "type" "neon_tbl4")]
5752 )
5753
5754 (define_insn "aarch64_qtbx4<mode>"
5755   [(set (match_operand:VB 0 "register_operand" "=w")
5756         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5757                       (match_operand:XI 2 "register_operand" "w")
5758                       (match_operand:VB 3 "register_operand" "w")]
5759                       UNSPEC_TBX))]
5760   "TARGET_SIMD"
5761   "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5762   [(set_attr "type" "neon_tbl4")]
5763 )
5764
5765 (define_insn_and_split "aarch64_combinev16qi"
5766   [(set (match_operand:OI 0 "register_operand" "=w")
5767         (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5768                     (match_operand:V16QI 2 "register_operand" "w")]
5769                    UNSPEC_CONCAT))]
5770   "TARGET_SIMD"
5771   "#"
5772   "&& reload_completed"
5773   [(const_int 0)]
5774 {
5775   aarch64_split_combinev16qi (operands);
5776   DONE;
5777 }
5778 [(set_attr "type" "multiple")]
5779 )
5780
5781 ;; This instruction's pattern is generated directly by
5782 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5783 ;; need corresponding changes there.
5784 (define_insn "aarch64_<PERMUTE:perm_insn><mode>"
5785   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5786         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5787                           (match_operand:VALL_F16 2 "register_operand" "w")]
5788          PERMUTE))]
5789   "TARGET_SIMD"
5790   "<PERMUTE:perm_insn>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5791   [(set_attr "type" "neon_permute<q>")]
5792 )
5793
5794 ;; This instruction's pattern is generated directly by
5795 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5796 ;; need corresponding changes there.  Note that the immediate (third)
5797 ;; operand is a lane index not a byte index.
5798 (define_insn "aarch64_ext<mode>"
5799   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5800         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5801                           (match_operand:VALL_F16 2 "register_operand" "w")
5802                           (match_operand:SI 3 "immediate_operand" "i")]
5803          UNSPEC_EXT))]
5804   "TARGET_SIMD"
5805 {
5806   operands[3] = GEN_INT (INTVAL (operands[3])
5807       * GET_MODE_UNIT_SIZE (<MODE>mode));
5808   return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5809 }
5810   [(set_attr "type" "neon_ext<q>")]
5811 )
5812
5813 ;; This instruction's pattern is generated directly by
5814 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5815 ;; need corresponding changes there.
5816 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5817   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5818         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5819                     REVERSE))]
5820   "TARGET_SIMD"
5821   "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5822   [(set_attr "type" "neon_rev<q>")]
5823 )
5824
5825 (define_insn "aarch64_st2<mode>_dreg"
5826   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5827         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5828                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5829                    UNSPEC_ST2))]
5830   "TARGET_SIMD"
5831   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5832   [(set_attr "type" "neon_store2_2reg")]
5833 )
5834
5835 (define_insn "aarch64_st2<mode>_dreg"
5836   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5837         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5838                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5839                    UNSPEC_ST2))]
5840   "TARGET_SIMD"
5841   "st1\\t{%S1.1d - %T1.1d}, %0"
5842   [(set_attr "type" "neon_store1_2reg")]
5843 )
5844
5845 (define_insn "aarch64_st3<mode>_dreg"
5846   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5847         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5848                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5849                    UNSPEC_ST3))]
5850   "TARGET_SIMD"
5851   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5852   [(set_attr "type" "neon_store3_3reg")]
5853 )
5854
5855 (define_insn "aarch64_st3<mode>_dreg"
5856   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5857         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5858                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5859                    UNSPEC_ST3))]
5860   "TARGET_SIMD"
5861   "st1\\t{%S1.1d - %U1.1d}, %0"
5862   [(set_attr "type" "neon_store1_3reg")]
5863 )
5864
5865 (define_insn "aarch64_st4<mode>_dreg"
5866   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5867         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5868                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5869                    UNSPEC_ST4))]
5870   "TARGET_SIMD"
5871   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5872   [(set_attr "type" "neon_store4_4reg")]
5873 )
5874
5875 (define_insn "aarch64_st4<mode>_dreg"
5876   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5877         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5878                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5879                    UNSPEC_ST4))]
5880   "TARGET_SIMD"
5881   "st1\\t{%S1.1d - %V1.1d}, %0"
5882   [(set_attr "type" "neon_store1_4reg")]
5883 )
5884
5885 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5886  [(match_operand:DI 0 "register_operand")
5887   (match_operand:VSTRUCT 1 "register_operand")
5888   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5889   "TARGET_SIMD"
5890 {
5891   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5892   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5893
5894   emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5895   DONE;
5896 })
5897
5898 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5899  [(match_operand:DI 0 "register_operand")
5900   (match_operand:VSTRUCT 1 "register_operand")
5901   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5902   "TARGET_SIMD"
5903 {
5904   machine_mode mode = <VSTRUCT:MODE>mode;
5905   rtx mem = gen_rtx_MEM (mode, operands[0]);
5906
5907   emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5908   DONE;
5909 })
5910
5911 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5912  [(match_operand:DI 0 "register_operand")
5913   (match_operand:VSTRUCT 1 "register_operand")
5914   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5915   (match_operand:SI 2 "immediate_operand")]
5916   "TARGET_SIMD"
5917 {
5918   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5919   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5920                      * <VSTRUCT:nregs>);
5921
5922   emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5923                 mem, operands[1], operands[2]));
5924   DONE;
5925 })
5926
5927 (define_expand "aarch64_st1<VALL_F16:mode>"
5928  [(match_operand:DI 0 "register_operand")
5929   (match_operand:VALL_F16 1 "register_operand")]
5930   "TARGET_SIMD"
5931 {
5932   machine_mode mode = <VALL_F16:MODE>mode;
5933   rtx mem = gen_rtx_MEM (mode, operands[0]);
5934
5935   if (BYTES_BIG_ENDIAN)
5936     emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5937   else
5938     emit_move_insn (mem, operands[1]);
5939   DONE;
5940 })
5941
5942 ;; Expander for builtins to insert vector registers into large
5943 ;; opaque integer modes.
5944
5945 ;; Q-register list.  We don't need a D-reg inserter as we zero
5946 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5947
5948 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5949  [(match_operand:VSTRUCT 0 "register_operand")
5950   (match_operand:VSTRUCT 1 "register_operand")
5951   (match_operand:VQ 2 "register_operand")
5952   (match_operand:SI 3 "immediate_operand")]
5953   "TARGET_SIMD"
5954 {
5955   int part = INTVAL (operands[3]);
5956   int offset = part * 16;
5957
5958   emit_move_insn (operands[0], operands[1]);
5959   emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5960                   operands[2]);
5961   DONE;
5962 })
5963
5964 ;; Standard pattern name vec_init<mode><Vel>.
5965
5966 (define_expand "vec_init<mode><Vel>"
5967   [(match_operand:VALL_F16 0 "register_operand")
5968    (match_operand 1 "" "")]
5969   "TARGET_SIMD"
5970 {
5971   aarch64_expand_vector_init (operands[0], operands[1]);
5972   DONE;
5973 })
5974
5975 (define_expand "vec_init<mode><Vhalf>"
5976   [(match_operand:VQ_NO2E 0 "register_operand")
5977    (match_operand 1 "" "")]
5978   "TARGET_SIMD"
5979 {
5980   aarch64_expand_vector_init (operands[0], operands[1]);
5981   DONE;
5982 })
5983
5984 (define_insn "*aarch64_simd_ld1r<mode>"
5985   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5986         (vec_duplicate:VALL_F16
5987           (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5988   "TARGET_SIMD"
5989   "ld1r\\t{%0.<Vtype>}, %1"
5990   [(set_attr "type" "neon_load1_all_lanes")]
5991 )
5992
5993 (define_insn "aarch64_simd_ld1<mode>_x2"
5994   [(set (match_operand:OI 0 "register_operand" "=w")
5995         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5996                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5997                    UNSPEC_LD1))]
5998   "TARGET_SIMD"
5999   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
6000   [(set_attr "type" "neon_load1_2reg<q>")]
6001 )
6002
6003 (define_insn "aarch64_simd_ld1<mode>_x2"
6004   [(set (match_operand:OI 0 "register_operand" "=w")
6005         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
6006                     (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6007                    UNSPEC_LD1))]
6008   "TARGET_SIMD"
6009   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
6010   [(set_attr "type" "neon_load1_2reg<q>")]
6011 )
6012
6013
6014 (define_insn "@aarch64_frecpe<mode>"
6015   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
6016         (unspec:VHSDF_HSDF
6017          [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
6018          UNSPEC_FRECPE))]
6019   "TARGET_SIMD"
6020   "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
6021   [(set_attr "type" "neon_fp_recpe_<stype><q>")]
6022 )
6023
6024 (define_insn "aarch64_frecpx<mode>"
6025   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
6026         (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
6027          UNSPEC_FRECPX))]
6028   "TARGET_SIMD"
6029   "frecpx\t%<s>0, %<s>1"
6030   [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
6031 )
6032
6033 (define_insn "@aarch64_frecps<mode>"
6034   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
6035         (unspec:VHSDF_HSDF
6036           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
6037           (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
6038           UNSPEC_FRECPS))]
6039   "TARGET_SIMD"
6040   "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6041   [(set_attr "type" "neon_fp_recps_<stype><q>")]
6042 )
6043
6044 (define_insn "aarch64_urecpe<mode>"
6045   [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
6046         (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
6047                 UNSPEC_URECPE))]
6048  "TARGET_SIMD"
6049  "urecpe\\t%0.<Vtype>, %1.<Vtype>"
6050   [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
6051
6052 ;; Standard pattern name vec_extract<mode><Vel>.
6053
6054 (define_expand "vec_extract<mode><Vel>"
6055   [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
6056    (match_operand:VALL_F16 1 "register_operand")
6057    (match_operand:SI 2 "immediate_operand")]
6058   "TARGET_SIMD"
6059 {
6060     emit_insn
6061       (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
6062     DONE;
6063 })
6064
6065 ;; aes
6066
6067 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
6068   [(set (match_operand:V16QI 0 "register_operand" "=w")
6069         (unspec:V16QI
6070                 [(xor:V16QI
6071                  (match_operand:V16QI 1 "register_operand" "%0")
6072                  (match_operand:V16QI 2 "register_operand" "w"))]
6073          CRYPTO_AES))]
6074   "TARGET_SIMD && TARGET_AES"
6075   "aes<aes_op>\\t%0.16b, %2.16b"
6076   [(set_attr "type" "crypto_aese")]
6077 )
6078
6079 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
6080   [(set (match_operand:V16QI 0 "register_operand" "=w")
6081         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
6082          CRYPTO_AESMC))]
6083   "TARGET_SIMD && TARGET_AES"
6084   "aes<aesmc_op>\\t%0.16b, %1.16b"
6085   [(set_attr "type" "crypto_aesmc")]
6086 )
6087
6088 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
6089 ;; and enforce the register dependency without scheduling or register
6090 ;; allocation messing up the order or introducing moves inbetween.
6091 ;;  Mash the two together during combine.
6092
6093 (define_insn "*aarch64_crypto_aese_fused"
6094   [(set (match_operand:V16QI 0 "register_operand" "=w")
6095         (unspec:V16QI
6096           [(unspec:V16QI
6097            [(xor:V16QI
6098                 (match_operand:V16QI 1 "register_operand" "%0")
6099                 (match_operand:V16QI 2 "register_operand" "w"))]
6100              UNSPEC_AESE)]
6101         UNSPEC_AESMC))]
6102   "TARGET_SIMD && TARGET_AES
6103    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6104   "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
6105   [(set_attr "type" "crypto_aese")
6106    (set_attr "length" "8")]
6107 )
6108
6109 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
6110 ;; and enforce the register dependency without scheduling or register
6111 ;; allocation messing up the order or introducing moves inbetween.
6112 ;;  Mash the two together during combine.
6113
6114 (define_insn "*aarch64_crypto_aesd_fused"
6115   [(set (match_operand:V16QI 0 "register_operand" "=w")
6116         (unspec:V16QI
6117           [(unspec:V16QI
6118                     [(xor:V16QI
6119                         (match_operand:V16QI 1 "register_operand" "%0")
6120                         (match_operand:V16QI 2 "register_operand" "w"))]
6121                 UNSPEC_AESD)]
6122           UNSPEC_AESIMC))]
6123   "TARGET_SIMD && TARGET_AES
6124    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6125   "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
6126   [(set_attr "type" "crypto_aese")
6127    (set_attr "length" "8")]
6128 )
6129
6130 ;; sha1
6131
6132 (define_insn "aarch64_crypto_sha1hsi"
6133   [(set (match_operand:SI 0 "register_operand" "=w")
6134         (unspec:SI [(match_operand:SI 1
6135                        "register_operand" "w")]
6136          UNSPEC_SHA1H))]
6137   "TARGET_SIMD && TARGET_SHA2"
6138   "sha1h\\t%s0, %s1"
6139   [(set_attr "type" "crypto_sha1_fast")]
6140 )
6141
6142 (define_insn "aarch64_crypto_sha1hv4si"
6143   [(set (match_operand:SI 0 "register_operand" "=w")
6144         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6145                      (parallel [(const_int 0)]))]
6146          UNSPEC_SHA1H))]
6147   "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
6148   "sha1h\\t%s0, %s1"
6149   [(set_attr "type" "crypto_sha1_fast")]
6150 )
6151
6152 (define_insn "aarch64_be_crypto_sha1hv4si"
6153   [(set (match_operand:SI 0 "register_operand" "=w")
6154         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6155                      (parallel [(const_int 3)]))]
6156          UNSPEC_SHA1H))]
6157   "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
6158   "sha1h\\t%s0, %s1"
6159   [(set_attr "type" "crypto_sha1_fast")]
6160 )
6161
6162 (define_insn "aarch64_crypto_sha1su1v4si"
6163   [(set (match_operand:V4SI 0 "register_operand" "=w")
6164         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6165                       (match_operand:V4SI 2 "register_operand" "w")]
6166          UNSPEC_SHA1SU1))]
6167   "TARGET_SIMD && TARGET_SHA2"
6168   "sha1su1\\t%0.4s, %2.4s"
6169   [(set_attr "type" "crypto_sha1_fast")]
6170 )
6171
6172 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
6173   [(set (match_operand:V4SI 0 "register_operand" "=w")
6174         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6175                       (match_operand:SI 2 "register_operand" "w")
6176                       (match_operand:V4SI 3 "register_operand" "w")]
6177          CRYPTO_SHA1))]
6178   "TARGET_SIMD && TARGET_SHA2"
6179   "sha1<sha1_op>\\t%q0, %s2, %3.4s"
6180   [(set_attr "type" "crypto_sha1_slow")]
6181 )
6182
6183 (define_insn "aarch64_crypto_sha1su0v4si"
6184   [(set (match_operand:V4SI 0 "register_operand" "=w")
6185         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6186                       (match_operand:V4SI 2 "register_operand" "w")
6187                       (match_operand:V4SI 3 "register_operand" "w")]
6188          UNSPEC_SHA1SU0))]
6189   "TARGET_SIMD && TARGET_SHA2"
6190   "sha1su0\\t%0.4s, %2.4s, %3.4s"
6191   [(set_attr "type" "crypto_sha1_xor")]
6192 )
6193
6194 ;; sha256
6195
6196 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6197   [(set (match_operand:V4SI 0 "register_operand" "=w")
6198         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6199                       (match_operand:V4SI 2 "register_operand" "w")
6200                       (match_operand:V4SI 3 "register_operand" "w")]
6201          CRYPTO_SHA256))]
6202   "TARGET_SIMD && TARGET_SHA2"
6203   "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
6204   [(set_attr "type" "crypto_sha256_slow")]
6205 )
6206
6207 (define_insn "aarch64_crypto_sha256su0v4si"
6208   [(set (match_operand:V4SI 0 "register_operand" "=w")
6209         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6210                       (match_operand:V4SI 2 "register_operand" "w")]
6211          UNSPEC_SHA256SU0))]
6212   "TARGET_SIMD && TARGET_SHA2"
6213   "sha256su0\\t%0.4s, %2.4s"
6214   [(set_attr "type" "crypto_sha256_fast")]
6215 )
6216
6217 (define_insn "aarch64_crypto_sha256su1v4si"
6218   [(set (match_operand:V4SI 0 "register_operand" "=w")
6219         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6220                       (match_operand:V4SI 2 "register_operand" "w")
6221                       (match_operand:V4SI 3 "register_operand" "w")]
6222          UNSPEC_SHA256SU1))]
6223   "TARGET_SIMD && TARGET_SHA2"
6224   "sha256su1\\t%0.4s, %2.4s, %3.4s"
6225   [(set_attr "type" "crypto_sha256_slow")]
6226 )
6227
6228 ;; sha512
6229
6230 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6231   [(set (match_operand:V2DI 0 "register_operand" "=w")
6232         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6233                       (match_operand:V2DI 2 "register_operand" "w")
6234                       (match_operand:V2DI 3 "register_operand" "w")]
6235          CRYPTO_SHA512))]
6236   "TARGET_SIMD && TARGET_SHA3"
6237   "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6238   [(set_attr "type" "crypto_sha512")]
6239 )
6240
6241 (define_insn "aarch64_crypto_sha512su0qv2di"
6242   [(set (match_operand:V2DI 0 "register_operand" "=w")
6243         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6244                       (match_operand:V2DI 2 "register_operand" "w")]
6245          UNSPEC_SHA512SU0))]
6246   "TARGET_SIMD && TARGET_SHA3"
6247   "sha512su0\\t%0.2d, %2.2d"
6248   [(set_attr "type" "crypto_sha512")]
6249 )
6250
6251 (define_insn "aarch64_crypto_sha512su1qv2di"
6252   [(set (match_operand:V2DI 0 "register_operand" "=w")
6253         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6254                       (match_operand:V2DI 2 "register_operand" "w")
6255                       (match_operand:V2DI 3 "register_operand" "w")]
6256          UNSPEC_SHA512SU1))]
6257   "TARGET_SIMD && TARGET_SHA3"
6258   "sha512su1\\t%0.2d, %2.2d, %3.2d"
6259   [(set_attr "type" "crypto_sha512")]
6260 )
6261
6262 ;; sha3
6263
6264 (define_insn "eor3q<mode>4"
6265   [(set (match_operand:VQ_I 0 "register_operand" "=w")
6266         (xor:VQ_I
6267          (xor:VQ_I
6268           (match_operand:VQ_I 2 "register_operand" "w")
6269           (match_operand:VQ_I 3 "register_operand" "w"))
6270          (match_operand:VQ_I 1 "register_operand" "w")))]
6271   "TARGET_SIMD && TARGET_SHA3"
6272   "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6273   [(set_attr "type" "crypto_sha3")]
6274 )
6275
6276 (define_insn "aarch64_rax1qv2di"
6277   [(set (match_operand:V2DI 0 "register_operand" "=w")
6278         (xor:V2DI
6279          (rotate:V2DI
6280           (match_operand:V2DI 2 "register_operand" "w")
6281           (const_int 1))
6282          (match_operand:V2DI 1 "register_operand" "w")))]
6283   "TARGET_SIMD && TARGET_SHA3"
6284   "rax1\\t%0.2d, %1.2d, %2.2d"
6285   [(set_attr "type" "crypto_sha3")]
6286 )
6287
6288 (define_insn "aarch64_xarqv2di"
6289   [(set (match_operand:V2DI 0 "register_operand" "=w")
6290         (rotatert:V2DI
6291          (xor:V2DI
6292           (match_operand:V2DI 1 "register_operand" "%w")
6293           (match_operand:V2DI 2 "register_operand" "w"))
6294          (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6295   "TARGET_SIMD && TARGET_SHA3"
6296   "xar\\t%0.2d, %1.2d, %2.2d, %3"
6297   [(set_attr "type" "crypto_sha3")]
6298 )
6299
6300 (define_insn "bcaxq<mode>4"
6301   [(set (match_operand:VQ_I 0 "register_operand" "=w")
6302         (xor:VQ_I
6303          (and:VQ_I
6304           (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6305           (match_operand:VQ_I 2 "register_operand" "w"))
6306          (match_operand:VQ_I 1 "register_operand" "w")))]
6307   "TARGET_SIMD && TARGET_SHA3"
6308   "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6309   [(set_attr "type" "crypto_sha3")]
6310 )
6311
6312 ;; SM3
6313
6314 (define_insn "aarch64_sm3ss1qv4si"
6315   [(set (match_operand:V4SI 0 "register_operand" "=w")
6316         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6317                       (match_operand:V4SI 2 "register_operand" "w")
6318                       (match_operand:V4SI 3 "register_operand" "w")]
6319          UNSPEC_SM3SS1))]
6320   "TARGET_SIMD && TARGET_SM4"
6321   "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6322   [(set_attr "type" "crypto_sm3")]
6323 )
6324
6325
6326 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6327   [(set (match_operand:V4SI 0 "register_operand" "=w")
6328         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6329                       (match_operand:V4SI 2 "register_operand" "w")
6330                       (match_operand:V4SI 3 "register_operand" "w")
6331                       (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6332          CRYPTO_SM3TT))]
6333   "TARGET_SIMD && TARGET_SM4"
6334   "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6335   [(set_attr "type" "crypto_sm3")]
6336 )
6337
6338 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6339   [(set (match_operand:V4SI 0 "register_operand" "=w")
6340         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6341                       (match_operand:V4SI 2 "register_operand" "w")
6342                       (match_operand:V4SI 3 "register_operand" "w")]
6343          CRYPTO_SM3PART))]
6344   "TARGET_SIMD && TARGET_SM4"
6345   "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6346   [(set_attr "type" "crypto_sm3")]
6347 )
6348
6349 ;; SM4
6350
6351 (define_insn "aarch64_sm4eqv4si"
6352   [(set (match_operand:V4SI 0 "register_operand" "=w")
6353         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6354                       (match_operand:V4SI 2 "register_operand" "w")]
6355          UNSPEC_SM4E))]
6356   "TARGET_SIMD && TARGET_SM4"
6357   "sm4e\\t%0.4s, %2.4s"
6358   [(set_attr "type" "crypto_sm4")]
6359 )
6360
6361 (define_insn "aarch64_sm4ekeyqv4si"
6362   [(set (match_operand:V4SI 0 "register_operand" "=w")
6363         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6364                       (match_operand:V4SI 2 "register_operand" "w")]
6365          UNSPEC_SM4EKEY))]
6366   "TARGET_SIMD && TARGET_SM4"
6367   "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6368   [(set_attr "type" "crypto_sm4")]
6369 )
6370
6371 ;; fp16fml
6372
6373 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6374   [(set (match_operand:VDQSF 0 "register_operand")
6375         (unspec:VDQSF
6376          [(match_operand:VDQSF 1 "register_operand")
6377           (match_operand:<VFMLA_W> 2 "register_operand")
6378           (match_operand:<VFMLA_W> 3 "register_operand")]
6379          VFMLA16_LOW))]
6380   "TARGET_F16FML"
6381 {
6382   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6383                                             <nunits> * 2, false);
6384   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6385                                             <nunits> * 2, false);
6386
6387   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6388                                                                 operands[1],
6389                                                                 operands[2],
6390                                                                 operands[3],
6391                                                                 p1, p2));
6392   DONE;
6393
6394 })
6395
6396 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6397   [(set (match_operand:VDQSF 0 "register_operand")
6398         (unspec:VDQSF
6399          [(match_operand:VDQSF 1 "register_operand")
6400           (match_operand:<VFMLA_W> 2 "register_operand")
6401           (match_operand:<VFMLA_W> 3 "register_operand")]
6402          VFMLA16_HIGH))]
6403   "TARGET_F16FML"
6404 {
6405   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6406   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6407
6408   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6409                                                                  operands[1],
6410                                                                  operands[2],
6411                                                                  operands[3],
6412                                                                  p1, p2));
6413   DONE;
6414 })
6415
6416 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6417   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6418         (fma:VDQSF
6419          (float_extend:VDQSF
6420           (vec_select:<VFMLA_SEL_W>
6421            (match_operand:<VFMLA_W> 2 "register_operand" "w")
6422            (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6423          (float_extend:VDQSF
6424           (vec_select:<VFMLA_SEL_W>
6425            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6426            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6427          (match_operand:VDQSF 1 "register_operand" "0")))]
6428   "TARGET_F16FML"
6429   "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6430   [(set_attr "type" "neon_fp_mul_s")]
6431 )
6432
6433 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6434   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6435         (fma:VDQSF
6436          (float_extend:VDQSF
6437           (neg:<VFMLA_SEL_W>
6438            (vec_select:<VFMLA_SEL_W>
6439             (match_operand:<VFMLA_W> 2 "register_operand" "w")
6440             (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6441          (float_extend:VDQSF
6442           (vec_select:<VFMLA_SEL_W>
6443            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6444            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6445          (match_operand:VDQSF 1 "register_operand" "0")))]
6446   "TARGET_F16FML"
6447   "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6448   [(set_attr "type" "neon_fp_mul_s")]
6449 )
6450
6451 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6452   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6453         (fma:VDQSF
6454          (float_extend:VDQSF
6455           (vec_select:<VFMLA_SEL_W>
6456            (match_operand:<VFMLA_W> 2 "register_operand" "w")
6457            (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6458          (float_extend:VDQSF
6459           (vec_select:<VFMLA_SEL_W>
6460            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6461            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6462          (match_operand:VDQSF 1 "register_operand" "0")))]
6463   "TARGET_F16FML"
6464   "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6465   [(set_attr "type" "neon_fp_mul_s")]
6466 )
6467
6468 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6469   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6470         (fma:VDQSF
6471          (float_extend:VDQSF
6472           (neg:<VFMLA_SEL_W>
6473            (vec_select:<VFMLA_SEL_W>
6474             (match_operand:<VFMLA_W> 2 "register_operand" "w")
6475             (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6476          (float_extend:VDQSF
6477           (vec_select:<VFMLA_SEL_W>
6478            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6479            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6480          (match_operand:VDQSF 1 "register_operand" "0")))]
6481   "TARGET_F16FML"
6482   "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6483   [(set_attr "type" "neon_fp_mul_s")]
6484 )
6485
6486 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6487   [(set (match_operand:V2SF 0 "register_operand")
6488         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6489                            (match_operand:V4HF 2 "register_operand")
6490                            (match_operand:V4HF 3 "register_operand")
6491                            (match_operand:SI 4 "aarch64_imm2")]
6492          VFMLA16_LOW))]
6493   "TARGET_F16FML"
6494 {
6495     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6496     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6497
6498     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6499                                                             operands[1],
6500                                                             operands[2],
6501                                                             operands[3],
6502                                                             p1, lane));
6503     DONE;
6504 }
6505 )
6506
6507 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6508   [(set (match_operand:V2SF 0 "register_operand")
6509         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6510                            (match_operand:V4HF 2 "register_operand")
6511                            (match_operand:V4HF 3 "register_operand")
6512                            (match_operand:SI 4 "aarch64_imm2")]
6513          VFMLA16_HIGH))]
6514   "TARGET_F16FML"
6515 {
6516     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6517     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6518
6519     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6520                                                              operands[1],
6521                                                              operands[2],
6522                                                              operands[3],
6523                                                              p1, lane));
6524     DONE;
6525 })
6526
6527 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6528   [(set (match_operand:V2SF 0 "register_operand" "=w")
6529         (fma:V2SF
6530          (float_extend:V2SF
6531            (vec_select:V2HF
6532             (match_operand:V4HF 2 "register_operand" "w")
6533             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6534          (float_extend:V2SF
6535            (vec_duplicate:V2HF
6536             (vec_select:HF
6537              (match_operand:V4HF 3 "register_operand" "x")
6538              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6539          (match_operand:V2SF 1 "register_operand" "0")))]
6540   "TARGET_F16FML"
6541   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6542   [(set_attr "type" "neon_fp_mul_s")]
6543 )
6544
6545 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6546   [(set (match_operand:V2SF 0 "register_operand" "=w")
6547         (fma:V2SF
6548          (float_extend:V2SF
6549           (neg:V2HF
6550            (vec_select:V2HF
6551             (match_operand:V4HF 2 "register_operand" "w")
6552             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6553          (float_extend:V2SF
6554           (vec_duplicate:V2HF
6555            (vec_select:HF
6556             (match_operand:V4HF 3 "register_operand" "x")
6557             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6558          (match_operand:V2SF 1 "register_operand" "0")))]
6559   "TARGET_F16FML"
6560   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6561   [(set_attr "type" "neon_fp_mul_s")]
6562 )
6563
6564 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6565   [(set (match_operand:V2SF 0 "register_operand" "=w")
6566         (fma:V2SF
6567          (float_extend:V2SF
6568            (vec_select:V2HF
6569             (match_operand:V4HF 2 "register_operand" "w")
6570             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6571          (float_extend:V2SF
6572            (vec_duplicate:V2HF
6573             (vec_select:HF
6574              (match_operand:V4HF 3 "register_operand" "x")
6575              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6576          (match_operand:V2SF 1 "register_operand" "0")))]
6577   "TARGET_F16FML"
6578   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6579   [(set_attr "type" "neon_fp_mul_s")]
6580 )
6581
6582 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6583   [(set (match_operand:V2SF 0 "register_operand" "=w")
6584         (fma:V2SF
6585          (float_extend:V2SF
6586            (neg:V2HF
6587             (vec_select:V2HF
6588              (match_operand:V4HF 2 "register_operand" "w")
6589              (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6590          (float_extend:V2SF
6591            (vec_duplicate:V2HF
6592             (vec_select:HF
6593              (match_operand:V4HF 3 "register_operand" "x")
6594              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6595          (match_operand:V2SF 1 "register_operand" "0")))]
6596   "TARGET_F16FML"
6597   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6598   [(set_attr "type" "neon_fp_mul_s")]
6599 )
6600
6601 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6602   [(set (match_operand:V4SF 0 "register_operand")
6603         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6604                            (match_operand:V8HF 2 "register_operand")
6605                            (match_operand:V8HF 3 "register_operand")
6606                            (match_operand:SI 4 "aarch64_lane_imm3")]
6607          VFMLA16_LOW))]
6608   "TARGET_F16FML"
6609 {
6610     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6611     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6612
6613     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6614                                                               operands[1],
6615                                                               operands[2],
6616                                                               operands[3],
6617                                                               p1, lane));
6618     DONE;
6619 })
6620
6621 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6622   [(set (match_operand:V4SF 0 "register_operand")
6623         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6624                            (match_operand:V8HF 2 "register_operand")
6625                            (match_operand:V8HF 3 "register_operand")
6626                            (match_operand:SI 4 "aarch64_lane_imm3")]
6627          VFMLA16_HIGH))]
6628   "TARGET_F16FML"
6629 {
6630     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6631     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6632
6633     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6634                                                                operands[1],
6635                                                                operands[2],
6636                                                                operands[3],
6637                                                                p1, lane));
6638     DONE;
6639 })
6640
6641 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6642   [(set (match_operand:V4SF 0 "register_operand" "=w")
6643         (fma:V4SF
6644          (float_extend:V4SF
6645           (vec_select:V4HF
6646             (match_operand:V8HF 2 "register_operand" "w")
6647             (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6648          (float_extend:V4SF
6649           (vec_duplicate:V4HF
6650            (vec_select:HF
6651             (match_operand:V8HF 3 "register_operand" "x")
6652             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6653          (match_operand:V4SF 1 "register_operand" "0")))]
6654   "TARGET_F16FML"
6655   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6656   [(set_attr "type" "neon_fp_mul_s")]
6657 )
6658
6659 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6660   [(set (match_operand:V4SF 0 "register_operand" "=w")
6661         (fma:V4SF
6662           (float_extend:V4SF
6663            (neg:V4HF
6664             (vec_select:V4HF
6665              (match_operand:V8HF 2 "register_operand" "w")
6666              (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6667          (float_extend:V4SF
6668           (vec_duplicate:V4HF
6669            (vec_select:HF
6670             (match_operand:V8HF 3 "register_operand" "x")
6671             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6672          (match_operand:V4SF 1 "register_operand" "0")))]
6673   "TARGET_F16FML"
6674   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6675   [(set_attr "type" "neon_fp_mul_s")]
6676 )
6677
6678 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6679   [(set (match_operand:V4SF 0 "register_operand" "=w")
6680         (fma:V4SF
6681          (float_extend:V4SF
6682           (vec_select:V4HF
6683             (match_operand:V8HF 2 "register_operand" "w")
6684             (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6685          (float_extend:V4SF
6686           (vec_duplicate:V4HF
6687            (vec_select:HF
6688             (match_operand:V8HF 3 "register_operand" "x")
6689             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6690          (match_operand:V4SF 1 "register_operand" "0")))]
6691   "TARGET_F16FML"
6692   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6693   [(set_attr "type" "neon_fp_mul_s")]
6694 )
6695
6696 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6697   [(set (match_operand:V4SF 0 "register_operand" "=w")
6698         (fma:V4SF
6699          (float_extend:V4SF
6700           (neg:V4HF
6701            (vec_select:V4HF
6702             (match_operand:V8HF 2 "register_operand" "w")
6703             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6704          (float_extend:V4SF
6705           (vec_duplicate:V4HF
6706            (vec_select:HF
6707             (match_operand:V8HF 3 "register_operand" "x")
6708             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6709          (match_operand:V4SF 1 "register_operand" "0")))]
6710   "TARGET_F16FML"
6711   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6712   [(set_attr "type" "neon_fp_mul_s")]
6713 )
6714
6715 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6716   [(set (match_operand:V2SF 0 "register_operand")
6717         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6718                       (match_operand:V4HF 2 "register_operand")
6719                       (match_operand:V8HF 3 "register_operand")
6720                       (match_operand:SI 4 "aarch64_lane_imm3")]
6721          VFMLA16_LOW))]
6722   "TARGET_F16FML"
6723 {
6724     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6725     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6726
6727     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6728                                                              operands[1],
6729                                                              operands[2],
6730                                                              operands[3],
6731                                                              p1, lane));
6732     DONE;
6733
6734 })
6735
6736 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6737   [(set (match_operand:V2SF 0 "register_operand")
6738         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6739                       (match_operand:V4HF 2 "register_operand")
6740                       (match_operand:V8HF 3 "register_operand")
6741                       (match_operand:SI 4 "aarch64_lane_imm3")]
6742          VFMLA16_HIGH))]
6743   "TARGET_F16FML"
6744 {
6745     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6746     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6747
6748     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6749                                                               operands[1],
6750                                                               operands[2],
6751                                                               operands[3],
6752                                                               p1, lane));
6753     DONE;
6754
6755 })
6756
6757 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6758   [(set (match_operand:V2SF 0 "register_operand" "=w")
6759         (fma:V2SF
6760          (float_extend:V2SF
6761            (vec_select:V2HF
6762             (match_operand:V4HF 2 "register_operand" "w")
6763             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6764          (float_extend:V2SF
6765           (vec_duplicate:V2HF
6766            (vec_select:HF
6767             (match_operand:V8HF 3 "register_operand" "x")
6768             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6769          (match_operand:V2SF 1 "register_operand" "0")))]
6770   "TARGET_F16FML"
6771   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6772   [(set_attr "type" "neon_fp_mul_s")]
6773 )
6774
6775 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6776   [(set (match_operand:V2SF 0 "register_operand" "=w")
6777         (fma:V2SF
6778          (float_extend:V2SF
6779           (neg:V2HF
6780            (vec_select:V2HF
6781             (match_operand:V4HF 2 "register_operand" "w")
6782             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6783          (float_extend:V2SF
6784           (vec_duplicate:V2HF
6785            (vec_select:HF
6786             (match_operand:V8HF 3 "register_operand" "x")
6787             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6788          (match_operand:V2SF 1 "register_operand" "0")))]
6789   "TARGET_F16FML"
6790   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6791   [(set_attr "type" "neon_fp_mul_s")]
6792 )
6793
6794 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6795   [(set (match_operand:V2SF 0 "register_operand" "=w")
6796         (fma:V2SF
6797          (float_extend:V2SF
6798            (vec_select:V2HF
6799             (match_operand:V4HF 2 "register_operand" "w")
6800             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6801          (float_extend:V2SF
6802           (vec_duplicate:V2HF
6803            (vec_select:HF
6804             (match_operand:V8HF 3 "register_operand" "x")
6805             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6806          (match_operand:V2SF 1 "register_operand" "0")))]
6807   "TARGET_F16FML"
6808   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6809   [(set_attr "type" "neon_fp_mul_s")]
6810 )
6811
6812 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6813   [(set (match_operand:V2SF 0 "register_operand" "=w")
6814         (fma:V2SF
6815          (float_extend:V2SF
6816           (neg:V2HF
6817            (vec_select:V2HF
6818             (match_operand:V4HF 2 "register_operand" "w")
6819             (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6820          (float_extend:V2SF
6821           (vec_duplicate:V2HF
6822            (vec_select:HF
6823             (match_operand:V8HF 3 "register_operand" "x")
6824             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6825          (match_operand:V2SF 1 "register_operand" "0")))]
6826   "TARGET_F16FML"
6827   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6828   [(set_attr "type" "neon_fp_mul_s")]
6829 )
6830
6831 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6832   [(set (match_operand:V4SF 0 "register_operand")
6833         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6834                       (match_operand:V8HF 2 "register_operand")
6835                       (match_operand:V4HF 3 "register_operand")
6836                       (match_operand:SI 4 "aarch64_imm2")]
6837          VFMLA16_LOW))]
6838   "TARGET_F16FML"
6839 {
6840     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6841     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6842
6843     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6844                                                              operands[1],
6845                                                              operands[2],
6846                                                              operands[3],
6847                                                              p1, lane));
6848     DONE;
6849 })
6850
6851 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6852   [(set (match_operand:V4SF 0 "register_operand")
6853         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6854                       (match_operand:V8HF 2 "register_operand")
6855                       (match_operand:V4HF 3 "register_operand")
6856                       (match_operand:SI 4 "aarch64_imm2")]
6857          VFMLA16_HIGH))]
6858   "TARGET_F16FML"
6859 {
6860     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6861     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6862
6863     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6864                                                               operands[1],
6865                                                               operands[2],
6866                                                               operands[3],
6867                                                               p1, lane));
6868     DONE;
6869 })
6870
6871 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6872   [(set (match_operand:V4SF 0 "register_operand" "=w")
6873         (fma:V4SF
6874          (float_extend:V4SF
6875           (vec_select:V4HF
6876            (match_operand:V8HF 2 "register_operand" "w")
6877            (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6878          (float_extend:V4SF
6879           (vec_duplicate:V4HF
6880            (vec_select:HF
6881             (match_operand:V4HF 3 "register_operand" "x")
6882             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6883          (match_operand:V4SF 1 "register_operand" "0")))]
6884   "TARGET_F16FML"
6885   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6886   [(set_attr "type" "neon_fp_mul_s")]
6887 )
6888
6889 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6890   [(set (match_operand:V4SF 0 "register_operand" "=w")
6891         (fma:V4SF
6892          (float_extend:V4SF
6893           (neg:V4HF
6894            (vec_select:V4HF
6895             (match_operand:V8HF 2 "register_operand" "w")
6896             (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6897          (float_extend:V4SF
6898           (vec_duplicate:V4HF
6899            (vec_select:HF
6900             (match_operand:V4HF 3 "register_operand" "x")
6901             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6902          (match_operand:V4SF 1 "register_operand" "0")))]
6903   "TARGET_F16FML"
6904   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6905   [(set_attr "type" "neon_fp_mul_s")]
6906 )
6907
6908 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6909   [(set (match_operand:V4SF 0 "register_operand" "=w")
6910         (fma:V4SF
6911          (float_extend:V4SF
6912           (vec_select:V4HF
6913            (match_operand:V8HF 2 "register_operand" "w")
6914            (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6915          (float_extend:V4SF
6916           (vec_duplicate:V4HF
6917            (vec_select:HF
6918             (match_operand:V4HF 3 "register_operand" "x")
6919             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6920          (match_operand:V4SF 1 "register_operand" "0")))]
6921   "TARGET_F16FML"
6922   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6923   [(set_attr "type" "neon_fp_mul_s")]
6924 )
6925
6926 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6927   [(set (match_operand:V4SF 0 "register_operand" "=w")
6928         (fma:V4SF
6929          (float_extend:V4SF
6930           (neg:V4HF
6931            (vec_select:V4HF
6932             (match_operand:V8HF 2 "register_operand" "w")
6933             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6934          (float_extend:V4SF
6935           (vec_duplicate:V4HF
6936            (vec_select:HF
6937             (match_operand:V4HF 3 "register_operand" "x")
6938             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6939          (match_operand:V4SF 1 "register_operand" "0")))]
6940   "TARGET_F16FML"
6941   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6942   [(set_attr "type" "neon_fp_mul_s")]
6943 )
6944
6945 ;; pmull
6946
6947 (define_insn "aarch64_crypto_pmulldi"
6948   [(set (match_operand:TI 0 "register_operand" "=w")
6949         (unspec:TI  [(match_operand:DI 1 "register_operand" "w")
6950                      (match_operand:DI 2 "register_operand" "w")]
6951                     UNSPEC_PMULL))]
6952  "TARGET_SIMD && TARGET_AES"
6953  "pmull\\t%0.1q, %1.1d, %2.1d"
6954   [(set_attr "type" "crypto_pmull")]
6955 )
6956
6957 (define_insn "aarch64_crypto_pmullv2di"
6958  [(set (match_operand:TI 0 "register_operand" "=w")
6959        (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
6960                    (match_operand:V2DI 2 "register_operand" "w")]
6961                   UNSPEC_PMULL2))]
6962   "TARGET_SIMD && TARGET_AES"
6963   "pmull2\\t%0.1q, %1.2d, %2.2d"
6964   [(set_attr "type" "crypto_pmull")]
6965 )