gcc/config/aarch64/aarch64-simd.md

   1 ;; Machine description for AArch64 AdvSIMD architecture.
   2 ;; Copyright (C) 2011-2019 Free Software Foundation, Inc.
   3 ;; Contributed by ARM Ltd.
   4 ;;
   5 ;; This file is part of GCC.
   6 ;;
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published by
   9 ;; the Free Software Foundation; either version 3, or (at your option)
  10 ;; any later version.
  11 ;;
  12 ;; GCC is distributed in the hope that it will be useful, but
  13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 ;; General Public License for more details.
  16 ;;
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 (define_expand "mov<mode>"
  22   [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
  23         (match_operand:VALL_F16 1 "general_operand" ""))]
  24   "TARGET_SIMD"
  25   "
  26   /* Force the operand into a register if it is not an
  27      immediate whose use can be replaced with xzr.
  28      If the mode is 16 bytes wide, then we will be doing
  29      a stp in DI mode, so we check the validity of that.
  30      If the mode is 8 bytes wide, then we will do doing a
  31      normal str, so the check need not apply.  */
  32   if (GET_CODE (operands[0]) == MEM
  33       && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
  34            && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
  35                 && aarch64_mem_pair_operand (operands[0], DImode))
  36                || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
  37       operands[1] = force_reg (<MODE>mode, operands[1]);
  38   "
  39 )
  40
  41 (define_expand "movmisalign<mode>"
  42   [(set (match_operand:VALL 0 "nonimmediate_operand" "")
  43         (match_operand:VALL 1 "general_operand" ""))]
  44   "TARGET_SIMD"
  45 {
  46   /* This pattern is not permitted to fail during expansion: if both arguments
  47      are non-registers (e.g. memory := constant, which can be created by the
  48      auto-vectorizer), force operand 1 into a register.  */
  49   if (!register_operand (operands[0], <MODE>mode)
  50       && !register_operand (operands[1], <MODE>mode))
  51     operands[1] = force_reg (<MODE>mode, operands[1]);
  52 })
  53
  54 (define_insn "aarch64_simd_dup<mode>"
  55   [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
  56         (vec_duplicate:VDQ_I
  57           (match_operand:<VEL> 1 "register_operand" "w,?r")))]
  58   "TARGET_SIMD"
  59   "@
  60    dup\\t%0.<Vtype>, %1.<Vetype>[0]
  61    dup\\t%0.<Vtype>, %<vw>1"
  62   [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
  63 )
  64
  65 (define_insn "aarch64_simd_dup<mode>"
  66   [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
  67         (vec_duplicate:VDQF_F16
  68           (match_operand:<VEL> 1 "register_operand" "w")))]
  69   "TARGET_SIMD"
  70   "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
  71   [(set_attr "type" "neon_dup<q>")]
  72 )
  73
  74 (define_insn "aarch64_dup_lane<mode>"
  75   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
  76         (vec_duplicate:VALL_F16
  77           (vec_select:<VEL>
  78             (match_operand:VALL_F16 1 "register_operand" "w")
  79             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  80           )))]
  81   "TARGET_SIMD"
  82   {
  83     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
  84     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
  85   }
  86   [(set_attr "type" "neon_dup<q>")]
  87 )
  88
  89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
  90   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
  91         (vec_duplicate:VALL_F16_NO_V2Q
  92           (vec_select:<VEL>
  93             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
  94             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  95           )))]
  96   "TARGET_SIMD"
  97   {
  98     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
  99     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
 100   }
 101   [(set_attr "type" "neon_dup<q>")]
 102 )
 103
 104 (define_insn "*aarch64_simd_mov<VD:mode>"
 105   [(set (match_operand:VD 0 "nonimmediate_operand"
 106                 "=w, m,  m,  w, ?r, ?w, ?r, w")
 107         (match_operand:VD 1 "general_operand"
 108                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 109   "TARGET_SIMD
 110    && (register_operand (operands[0], <MODE>mode)
 111        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 112 {
 113    switch (which_alternative)
 114      {
 115      case 0: return "ldr\t%d0, %1";
 116      case 1: return "str\txzr, %0";
 117      case 2: return "str\t%d1, %0";
 118      case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 119      case 4: return "umov\t%0, %1.d[0]";
 120      case 5: return "fmov\t%d0, %1";
 121      case 6: return "mov\t%0, %1";
 122      case 7:
 123         return aarch64_output_simd_mov_immediate (operands[1], 64);
 124      default: gcc_unreachable ();
 125      }
 126 }
 127   [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
 128                      neon_logic<q>, neon_to_gp<q>, f_mcr,\
 129                      mov_reg, neon_move<q>")]
 130 )
 131
 132 (define_insn "*aarch64_simd_mov<VQ:mode>"
 133   [(set (match_operand:VQ 0 "nonimmediate_operand"
 134                 "=w, Umn,  m,  w, ?r, ?w, ?r, w")
 135         (match_operand:VQ 1 "general_operand"
 136                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 137   "TARGET_SIMD
 138    && (register_operand (operands[0], <MODE>mode)
 139        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 140 {
 141   switch (which_alternative)
 142     {
 143     case 0:
 144         return "ldr\t%q0, %1";
 145     case 1:
 146         return "stp\txzr, xzr, %0";
 147     case 2:
 148         return "str\t%q1, %0";
 149     case 3:
 150         return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 151     case 4:
 152     case 5:
 153     case 6:
 154         return "#";
 155     case 7:
 156         return aarch64_output_simd_mov_immediate (operands[1], 128);
 157     default:
 158         gcc_unreachable ();
 159     }
 160 }
 161   [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
 162                      neon_logic<q>, multiple, multiple,\
 163                      multiple, neon_move<q>")
 164    (set_attr "length" "4,4,4,4,8,8,8,4")]
 165 )
 166
 167 ;; When storing lane zero we can use the normal STR and its more permissive
 168 ;; addressing modes.
 169
 170 (define_insn "aarch64_store_lane0<mode>"
 171   [(set (match_operand:<VEL> 0 "memory_operand" "=m")
 172         (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
 173                         (parallel [(match_operand 2 "const_int_operand" "n")])))]
 174   "TARGET_SIMD
 175    && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
 176   "str\\t%<Vetype>1, %0"
 177   [(set_attr "type" "neon_store1_1reg<q>")]
 178 )
 179
 180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
 181   [(set (match_operand:DREG 0 "register_operand" "=w")
 182         (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
 183    (set (match_operand:DREG2 2 "register_operand" "=w")
 184         (match_operand:DREG2 3 "memory_operand" "m"))]
 185   "TARGET_SIMD
 186    && rtx_equal_p (XEXP (operands[3], 0),
 187                    plus_constant (Pmode,
 188                                   XEXP (operands[1], 0),
 189                                   GET_MODE_SIZE (<DREG:MODE>mode)))"
 190   "ldp\\t%d0, %d2, %1"
 191   [(set_attr "type" "neon_ldp")]
 192 )
 193
 194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
 195   [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
 196         (match_operand:DREG 1 "register_operand" "w"))
 197    (set (match_operand:DREG2 2 "memory_operand" "=m")
 198         (match_operand:DREG2 3 "register_operand" "w"))]
 199   "TARGET_SIMD
 200    && rtx_equal_p (XEXP (operands[2], 0),
 201                    plus_constant (Pmode,
 202                                   XEXP (operands[0], 0),
 203                                   GET_MODE_SIZE (<DREG:MODE>mode)))"
 204   "stp\\t%d1, %d3, %0"
 205   [(set_attr "type" "neon_stp")]
 206 )
 207
 208 (define_insn "load_pair<VQ:mode><VQ2:mode>"
 209   [(set (match_operand:VQ 0 "register_operand" "=w")
 210         (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
 211    (set (match_operand:VQ2 2 "register_operand" "=w")
 212         (match_operand:VQ2 3 "memory_operand" "m"))]
 213   "TARGET_SIMD
 214     && rtx_equal_p (XEXP (operands[3], 0),
 215                     plus_constant (Pmode,
 216                                XEXP (operands[1], 0),
 217                                GET_MODE_SIZE (<VQ:MODE>mode)))"
 218   "ldp\\t%q0, %q2, %1"
 219   [(set_attr "type" "neon_ldp_q")]
 220 )
 221
 222 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
 223   [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
 224         (match_operand:VQ 1 "register_operand" "w"))
 225    (set (match_operand:VQ2 2 "memory_operand" "=m")
 226         (match_operand:VQ2 3 "register_operand" "w"))]
 227   "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
 228                 plus_constant (Pmode,
 229                                XEXP (operands[0], 0),
 230                                GET_MODE_SIZE (<VQ:MODE>mode)))"
 231   "stp\\t%q1, %q3, %0"
 232   [(set_attr "type" "neon_stp_q")]
 233 )
 234
 235
 236 (define_split
 237   [(set (match_operand:VQ 0 "register_operand" "")
 238       (match_operand:VQ 1 "register_operand" ""))]
 239   "TARGET_SIMD && reload_completed
 240    && GP_REGNUM_P (REGNO (operands[0]))
 241    && GP_REGNUM_P (REGNO (operands[1]))"
 242   [(const_int 0)]
 243 {
 244   aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
 245   DONE;
 246 })
 247
 248 (define_split
 249   [(set (match_operand:VQ 0 "register_operand" "")
 250         (match_operand:VQ 1 "register_operand" ""))]
 251   "TARGET_SIMD && reload_completed
 252    && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
 253        || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
 254   [(const_int 0)]
 255 {
 256   aarch64_split_simd_move (operands[0], operands[1]);
 257   DONE;
 258 })
 259
 260 (define_expand "@aarch64_split_simd_mov<mode>"
 261   [(set (match_operand:VQ 0)
 262         (match_operand:VQ 1))]
 263   "TARGET_SIMD"
 264   {
 265     rtx dst = operands[0];
 266     rtx src = operands[1];
 267
 268     if (GP_REGNUM_P (REGNO (src)))
 269       {
 270         rtx src_low_part = gen_lowpart (<VHALF>mode, src);
 271         rtx src_high_part = gen_highpart (<VHALF>mode, src);
 272
 273         emit_insn
 274           (gen_move_lo_quad_<mode> (dst, src_low_part));
 275         emit_insn
 276           (gen_move_hi_quad_<mode> (dst, src_high_part));
 277       }
 278
 279     else
 280       {
 281         rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
 282         rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
 283         rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
 284         rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
 285
 286         emit_insn
 287           (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
 288         emit_insn
 289           (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
 290       }
 291     DONE;
 292   }
 293 )
 294
 295 (define_insn "aarch64_simd_mov_from_<mode>low"
 296   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
 297         (vec_select:<VHALF>
 298           (match_operand:VQ 1 "register_operand" "w")
 299           (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
 300   "TARGET_SIMD && reload_completed"
 301   "umov\t%0, %1.d[0]"
 302   [(set_attr "type" "neon_to_gp<q>")
 303    (set_attr "length" "4")
 304   ])
 305
 306 (define_insn "aarch64_simd_mov_from_<mode>high"
 307   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
 308         (vec_select:<VHALF>
 309           (match_operand:VQ 1 "register_operand" "w")
 310           (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
 311   "TARGET_SIMD && reload_completed"
 312   "umov\t%0, %1.d[1]"
 313   [(set_attr "type" "neon_to_gp<q>")
 314    (set_attr "length" "4")
 315   ])
 316
 317 (define_insn "orn<mode>3"
 318  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 319        (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 320                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 321  "TARGET_SIMD"
 322  "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 323   [(set_attr "type" "neon_logic<q>")]
 324 )
 325
 326 (define_insn "bic<mode>3"
 327  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 328        (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 329                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 330  "TARGET_SIMD"
 331  "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 332   [(set_attr "type" "neon_logic<q>")]
 333 )
 334
 335 (define_insn "add<mode>3"
 336   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 337         (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 338                   (match_operand:VDQ_I 2 "register_operand" "w")))]
 339   "TARGET_SIMD"
 340   "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 341   [(set_attr "type" "neon_add<q>")]
 342 )
 343
 344 (define_insn "sub<mode>3"
 345   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 346         (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 347                    (match_operand:VDQ_I 2 "register_operand" "w")))]
 348   "TARGET_SIMD"
 349   "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 350   [(set_attr "type" "neon_sub<q>")]
 351 )
 352
 353 (define_insn "mul<mode>3"
 354   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 355         (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
 356                    (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
 357   "TARGET_SIMD"
 358   "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 359   [(set_attr "type" "neon_mul_<Vetype><q>")]
 360 )
 361
 362 (define_insn "bswap<mode>2"
 363   [(set (match_operand:VDQHSD 0 "register_operand" "=w")
 364         (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
 365   "TARGET_SIMD"
 366   "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
 367   [(set_attr "type" "neon_rev<q>")]
 368 )
 369
 370 (define_insn "aarch64_rbit<mode>"
 371   [(set (match_operand:VB 0 "register_operand" "=w")
 372         (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
 373                    UNSPEC_RBIT))]
 374   "TARGET_SIMD"
 375   "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
 376   [(set_attr "type" "neon_rbit")]
 377 )
 378
 379 (define_expand "ctz<mode>2"
 380   [(set (match_operand:VS 0 "register_operand")
 381         (ctz:VS (match_operand:VS 1 "register_operand")))]
 382   "TARGET_SIMD"
 383   {
 384      emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
 385      rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
 386                                              <MODE>mode, 0);
 387      emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
 388      emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
 389      DONE;
 390   }
 391 )
 392
 393 (define_expand "xorsign<mode>3"
 394   [(match_operand:VHSDF 0 "register_operand")
 395    (match_operand:VHSDF 1 "register_operand")
 396    (match_operand:VHSDF 2 "register_operand")]
 397   "TARGET_SIMD"
 398 {
 399
 400   machine_mode imode = <V_INT_EQUIV>mode;
 401   rtx v_bitmask = gen_reg_rtx (imode);
 402   rtx op1x = gen_reg_rtx (imode);
 403   rtx op2x = gen_reg_rtx (imode);
 404
 405   rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
 406   rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
 407
 408   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 409
 410   emit_move_insn (v_bitmask,
 411                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 412                                                      HOST_WIDE_INT_M1U << bits));
 413
 414   emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
 415   emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
 416   emit_move_insn (operands[0],
 417                   lowpart_subreg (<MODE>mode, op1x, imode));
 418   DONE;
 419 }
 420 )
 421
 422 ;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
 423 ;; fact that their usage need to guarantee that the source vectors are
 424 ;; contiguous.  It would be wrong to describe the operation without being able
 425 ;; to describe the permute that is also required, but even if that is done
 426 ;; the permute would have been created as a LOAD_LANES which means the values
 427 ;; in the registers are in the wrong order.
 428 (define_insn "aarch64_fcadd<rot><mode>"
 429   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 430         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
 431                        (match_operand:VHSDF 2 "register_operand" "w")]
 432                        FCADD))]
 433   "TARGET_COMPLEX"
 434   "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
 435   [(set_attr "type" "neon_fcadd")]
 436 )
 437
 438 (define_insn "aarch64_fcmla<rot><mode>"
 439   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 440         (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
 441                     (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
 442                                    (match_operand:VHSDF 3 "register_operand" "w")]
 443                                    FCMLA)))]
 444   "TARGET_COMPLEX"
 445   "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
 446   [(set_attr "type" "neon_fcmla")]
 447 )
 448
 449
 450 (define_insn "aarch64_fcmla_lane<rot><mode>"
 451   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 452         (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
 453                     (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
 454                                    (match_operand:VHSDF 3 "register_operand" "w")
 455                                    (match_operand:SI 4 "const_int_operand" "n")]
 456                                    FCMLA)))]
 457   "TARGET_COMPLEX"
 458 {
 459   operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
 460   return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
 461 }
 462   [(set_attr "type" "neon_fcmla")]
 463 )
 464
 465 (define_insn "aarch64_fcmla_laneq<rot>v4hf"
 466   [(set (match_operand:V4HF 0 "register_operand" "=w")
 467         (plus:V4HF (match_operand:V4HF 1 "register_operand" "0")
 468                    (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
 469                                  (match_operand:V8HF 3 "register_operand" "w")
 470                                  (match_operand:SI 4 "const_int_operand" "n")]
 471                                  FCMLA)))]
 472   "TARGET_COMPLEX"
 473 {
 474   operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
 475   return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
 476 }
 477   [(set_attr "type" "neon_fcmla")]
 478 )
 479
 480 (define_insn "aarch64_fcmlaq_lane<rot><mode>"
 481   [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
 482         (plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0")
 483                      (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
 484                                      (match_operand:<VHALF> 3 "register_operand" "w")
 485                                      (match_operand:SI 4 "const_int_operand" "n")]
 486                                      FCMLA)))]
 487   "TARGET_COMPLEX"
 488 {
 489   int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
 490   operands[4]
 491     = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
 492   return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
 493 }
 494   [(set_attr "type" "neon_fcmla")]
 495 )
 496
 497 ;; These instructions map to the __builtins for the Dot Product operations.
 498 (define_insn "aarch64_<sur>dot<vsi2qi>"
 499   [(set (match_operand:VS 0 "register_operand" "=w")
 500         (plus:VS (match_operand:VS 1 "register_operand" "0")
 501                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 502                             (match_operand:<VSI2QI> 3 "register_operand" "w")]
 503                 DOTPROD)))]
 504   "TARGET_DOTPROD"
 505   "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
 506   [(set_attr "type" "neon_dot<q>")]
 507 )
 508
 509 ;; These expands map to the Dot Product optab the vectorizer checks for.
 510 ;; The auto-vectorizer expects a dot product builtin that also does an
 511 ;; accumulation into the provided register.
 512 ;; Given the following pattern
 513 ;;
 514 ;; for (i=0; i<len; i++) {
 515 ;;     c = a[i] * b[i];
 516 ;;     r += c;
 517 ;; }
 518 ;; return result;
 519 ;;
 520 ;; This can be auto-vectorized to
 521 ;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
 522 ;;
 523 ;; given enough iterations.  However the vectorizer can keep unrolling the loop
 524 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
 525 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
 526 ;; ...
 527 ;;
 528 ;; and so the vectorizer provides r, in which the result has to be accumulated.
 529 (define_expand "<sur>dot_prod<vsi2qi>"
 530   [(set (match_operand:VS 0 "register_operand")
 531         (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
 532                             (match_operand:<VSI2QI> 2 "register_operand")]
 533                  DOTPROD)
 534                 (match_operand:VS 3 "register_operand")))]
 535   "TARGET_DOTPROD"
 536 {
 537   emit_insn (
 538     gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
 539                                     operands[2]));
 540   emit_insn (gen_rtx_SET (operands[0], operands[3]));
 541   DONE;
 542 })
 543
 544 ;; These instructions map to the __builtins for the Dot Product
 545 ;; indexed operations.
 546 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
 547   [(set (match_operand:VS 0 "register_operand" "=w")
 548         (plus:VS (match_operand:VS 1 "register_operand" "0")
 549                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 550                             (match_operand:V8QI 3 "register_operand" "<h_con>")
 551                             (match_operand:SI 4 "immediate_operand" "i")]
 552                 DOTPROD)))]
 553   "TARGET_DOTPROD"
 554   {
 555     operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
 556     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 557   }
 558   [(set_attr "type" "neon_dot<q>")]
 559 )
 560
 561 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
 562   [(set (match_operand:VS 0 "register_operand" "=w")
 563         (plus:VS (match_operand:VS 1 "register_operand" "0")
 564                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 565                             (match_operand:V16QI 3 "register_operand" "<h_con>")
 566                             (match_operand:SI 4 "immediate_operand" "i")]
 567                 DOTPROD)))]
 568   "TARGET_DOTPROD"
 569   {
 570     operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
 571     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 572   }
 573   [(set_attr "type" "neon_dot<q>")]
 574 )
 575
 576 (define_expand "copysign<mode>3"
 577   [(match_operand:VHSDF 0 "register_operand")
 578    (match_operand:VHSDF 1 "register_operand")
 579    (match_operand:VHSDF 2 "register_operand")]
 580   "TARGET_FLOAT && TARGET_SIMD"
 581 {
 582   rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
 583   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 584
 585   emit_move_insn (v_bitmask,
 586                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 587                                                      HOST_WIDE_INT_M1U << bits));
 588   emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
 589                                          operands[2], operands[1]));
 590   DONE;
 591 }
 592 )
 593
 594 (define_insn "*aarch64_mul3_elt<mode>"
 595  [(set (match_operand:VMUL 0 "register_operand" "=w")
 596     (mult:VMUL
 597       (vec_duplicate:VMUL
 598           (vec_select:<VEL>
 599             (match_operand:VMUL 1 "register_operand" "<h_con>")
 600             (parallel [(match_operand:SI 2 "immediate_operand")])))
 601       (match_operand:VMUL 3 "register_operand" "w")))]
 602   "TARGET_SIMD"
 603   {
 604     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
 605     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 606   }
 607   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 608 )
 609
 610 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
 611   [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
 612      (mult:VMUL_CHANGE_NLANES
 613        (vec_duplicate:VMUL_CHANGE_NLANES
 614           (vec_select:<VEL>
 615             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
 616             (parallel [(match_operand:SI 2 "immediate_operand")])))
 617       (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
 618   "TARGET_SIMD"
 619   {
 620     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
 621     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 622   }
 623   [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
 624 )
 625
 626 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
 627  [(set (match_operand:VMUL 0 "register_operand" "=w")
 628     (mult:VMUL
 629       (vec_duplicate:VMUL
 630             (match_operand:<VEL> 1 "register_operand" "<h_con>"))
 631       (match_operand:VMUL 2 "register_operand" "w")))]
 632   "TARGET_SIMD"
 633   "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
 634   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 635 )
 636
 637 (define_insn "@aarch64_rsqrte<mode>"
 638   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 639         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
 640                      UNSPEC_RSQRTE))]
 641   "TARGET_SIMD"
 642   "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 643   [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
 644
 645 (define_insn "@aarch64_rsqrts<mode>"
 646   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 647         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
 648                             (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
 649          UNSPEC_RSQRTS))]
 650   "TARGET_SIMD"
 651   "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 652   [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
 653
 654 (define_expand "rsqrt<mode>2"
 655   [(set (match_operand:VALLF 0 "register_operand" "=w")
 656         (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
 657                      UNSPEC_RSQRT))]
 658   "TARGET_SIMD"
 659 {
 660   aarch64_emit_approx_sqrt (operands[0], operands[1], true);
 661   DONE;
 662 })
 663
 664 (define_insn "*aarch64_mul3_elt_to_64v2df"
 665   [(set (match_operand:DF 0 "register_operand" "=w")
 666      (mult:DF
 667        (vec_select:DF
 668          (match_operand:V2DF 1 "register_operand" "w")
 669          (parallel [(match_operand:SI 2 "immediate_operand")]))
 670        (match_operand:DF 3 "register_operand" "w")))]
 671   "TARGET_SIMD"
 672   {
 673     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
 674     return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
 675   }
 676   [(set_attr "type" "neon_fp_mul_d_scalar_q")]
 677 )
 678
 679 (define_insn "neg<mode>2"
 680   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 681         (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 682   "TARGET_SIMD"
 683   "neg\t%0.<Vtype>, %1.<Vtype>"
 684   [(set_attr "type" "neon_neg<q>")]
 685 )
 686
 687 (define_insn "abs<mode>2"
 688   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 689         (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 690   "TARGET_SIMD"
 691   "abs\t%0.<Vtype>, %1.<Vtype>"
 692   [(set_attr "type" "neon_abs<q>")]
 693 )
 694
 695 ;; The intrinsic version of integer ABS must not be allowed to
 696 ;; combine with any operation with an integerated ABS step, such
 697 ;; as SABD.
 698 (define_insn "aarch64_abs<mode>"
 699   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
 700           (unspec:VSDQ_I_DI
 701             [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
 702            UNSPEC_ABS))]
 703   "TARGET_SIMD"
 704   "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 705   [(set_attr "type" "neon_abs<q>")]
 706 )
 707
 708 ;; It's tempting to represent SABD as ABS (MINUS op1 op2).
 709 ;; This isn't accurate as ABS treats always its input as a signed value.
 710 ;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
 711 ;; Whereas SABD would return 192 (-64 signed) on the above example.
 712 ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
 713 (define_insn "aarch64_<su>abd<mode>_3"
 714   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 715         (minus:VDQ_BHSI
 716           (USMAX:VDQ_BHSI
 717             (match_operand:VDQ_BHSI 1 "register_operand" "w")
 718             (match_operand:VDQ_BHSI 2 "register_operand" "w"))
 719           (<max_opp>:VDQ_BHSI
 720             (match_dup 1)
 721             (match_dup 2))))]
 722   "TARGET_SIMD"
 723   "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 724   [(set_attr "type" "neon_abd<q>")]
 725 )
 726
 727 (define_insn "aarch64_<sur>abdl2<mode>_3"
 728   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 729         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 730                           (match_operand:VDQV_S 2 "register_operand" "w")]
 731         ABDL2))]
 732   "TARGET_SIMD"
 733   "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
 734   [(set_attr "type" "neon_abd<q>")]
 735 )
 736
 737 (define_insn "aarch64_<sur>abal<mode>_4"
 738   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 739         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 740                           (match_operand:VDQV_S 2 "register_operand" "w")
 741                          (match_operand:<VDBLW> 3 "register_operand" "0")]
 742         ABAL))]
 743   "TARGET_SIMD"
 744   "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
 745   [(set_attr "type" "neon_arith_acc<q>")]
 746 )
 747
 748 (define_insn "aarch64_<sur>adalp<mode>_3"
 749   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 750         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 751                           (match_operand:<VDBLW> 2 "register_operand" "0")]
 752         ADALP))]
 753   "TARGET_SIMD"
 754   "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
 755   [(set_attr "type" "neon_reduc_add<q>")]
 756 )
 757
 758 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
 759 ;; inputs in operands 1 and 2.  The sequence also has to perform a widening
 760 ;; reduction of the difference into a V4SI vector and accumulate that into
 761 ;; operand 3 before copying that into the result operand 0.
 762 ;; Perform that with a sequence of:
 763 ;; UABDL2       tmp.8h, op1.16b, op2.16b
 764 ;; UABAL        tmp.8h, op1.16b, op2.16b
 765 ;; UADALP       op3.4s, tmp.8h
 766 ;; MOV          op0, op3 // should be eliminated in later passes.
 767 ;;
 768 ;; For TARGET_DOTPROD we do:
 769 ;; MOV  tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
 770 ;; UABD tmp2.16b, op1.16b, op2.16b
 771 ;; UDOT op3.4s, tmp2.16b, tmp1.16b
 772 ;; MOV  op0, op3 // RA will tie the operands of UDOT appropriately.
 773 ;;
 774 ;; The signed version just uses the signed variants of the above instructions
 775 ;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
 776 ;; unsigned.
 777
 778 (define_expand "<sur>sadv16qi"
 779   [(use (match_operand:V4SI 0 "register_operand"))
 780    (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
 781                   (use (match_operand:V16QI 2 "register_operand"))] ABAL)
 782    (use (match_operand:V4SI 3 "register_operand"))]
 783   "TARGET_SIMD"
 784   {
 785     if (TARGET_DOTPROD)
 786       {
 787         rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
 788         rtx abd = gen_reg_rtx (V16QImode);
 789         emit_insn (gen_aarch64_<sur>abdv16qi_3 (abd, operands[1], operands[2]));
 790         emit_insn (gen_aarch64_udotv16qi (operands[0], operands[3],
 791                                           abd, ones));
 792         DONE;
 793       }
 794     rtx reduc = gen_reg_rtx (V8HImode);
 795     emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
 796                                                operands[2]));
 797     emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
 798                                               operands[2], reduc));
 799     emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
 800                                               operands[3]));
 801     emit_move_insn (operands[0], operands[3]);
 802     DONE;
 803   }
 804 )
 805
 806 (define_insn "aba<mode>_3"
 807   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 808         (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
 809                          (match_operand:VDQ_BHSI 1 "register_operand" "w")
 810                          (match_operand:VDQ_BHSI 2 "register_operand" "w")))
 811                        (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
 812   "TARGET_SIMD"
 813   "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 814   [(set_attr "type" "neon_arith_acc<q>")]
 815 )
 816
 817 (define_insn "fabd<mode>3"
 818   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 819         (abs:VHSDF_HSDF
 820           (minus:VHSDF_HSDF
 821             (match_operand:VHSDF_HSDF 1 "register_operand" "w")
 822             (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
 823   "TARGET_SIMD"
 824   "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 825   [(set_attr "type" "neon_fp_abd_<stype><q>")]
 826 )
 827
 828 ;; For AND (vector, register) and BIC (vector, immediate)
 829 (define_insn "and<mode>3"
 830   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
 831         (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
 832                    (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
 833   "TARGET_SIMD"
 834   {
 835     switch (which_alternative)
 836       {
 837       case 0:
 838         return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
 839       case 1:
 840         return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
 841                                                   AARCH64_CHECK_BIC);
 842       default:
 843         gcc_unreachable ();
 844       }
 845   }
 846   [(set_attr "type" "neon_logic<q>")]
 847 )
 848
 849 ;; For ORR (vector, register) and ORR (vector, immediate)
 850 (define_insn "ior<mode>3"
 851   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
 852         (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
 853                    (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
 854   "TARGET_SIMD"
 855   {
 856     switch (which_alternative)
 857       {
 858       case 0:
 859         return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
 860       case 1:
 861         return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
 862                                                   AARCH64_CHECK_ORR);
 863       default:
 864         gcc_unreachable ();
 865       }
 866   }
 867   [(set_attr "type" "neon_logic<q>")]
 868 )
 869
 870 (define_insn "xor<mode>3"
 871   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 872         (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 873                  (match_operand:VDQ_I 2 "register_operand" "w")))]
 874   "TARGET_SIMD"
 875   "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
 876   [(set_attr "type" "neon_logic<q>")]
 877 )
 878
 879 (define_insn "one_cmpl<mode>2"
 880   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 881         (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 882   "TARGET_SIMD"
 883   "not\t%0.<Vbtype>, %1.<Vbtype>"
 884   [(set_attr "type" "neon_logic<q>")]
 885 )
 886
 887 (define_insn "aarch64_simd_vec_set<mode>"
 888   [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
 889         (vec_merge:VALL_F16
 890             (vec_duplicate:VALL_F16
 891                 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
 892             (match_operand:VALL_F16 3 "register_operand" "0,0,0")
 893             (match_operand:SI 2 "immediate_operand" "i,i,i")))]
 894   "TARGET_SIMD"
 895   {
 896    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 897    operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
 898    switch (which_alternative)
 899      {
 900      case 0:
 901         return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
 902      case 1:
 903         return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
 904      case 2:
 905         return "ld1\\t{%0.<Vetype>}[%p2], %1";
 906      default:
 907         gcc_unreachable ();
 908      }
 909   }
 910   [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
 911 )
 912
 913 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
 914   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
 915         (vec_merge:VALL_F16
 916             (vec_duplicate:VALL_F16
 917               (vec_select:<VEL>
 918                 (match_operand:VALL_F16 3 "register_operand" "w")
 919                 (parallel
 920                   [(match_operand:SI 4 "immediate_operand" "i")])))
 921             (match_operand:VALL_F16 1 "register_operand" "0")
 922             (match_operand:SI 2 "immediate_operand" "i")))]
 923   "TARGET_SIMD"
 924   {
 925     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 926     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
 927     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
 928
 929     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
 930   }
 931   [(set_attr "type" "neon_ins<q>")]
 932 )
 933
 934 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
 935   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
 936         (vec_merge:VALL_F16_NO_V2Q
 937             (vec_duplicate:VALL_F16_NO_V2Q
 938               (vec_select:<VEL>
 939                 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
 940                 (parallel
 941                   [(match_operand:SI 4 "immediate_operand" "i")])))
 942             (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
 943             (match_operand:SI 2 "immediate_operand" "i")))]
 944   "TARGET_SIMD"
 945   {
 946     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 947     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
 948     operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
 949                                            INTVAL (operands[4]));
 950
 951     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
 952   }
 953   [(set_attr "type" "neon_ins<q>")]
 954 )
 955
 956 (define_expand "signbit<mode>2"
 957   [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
 958    (use (match_operand:VDQSF 1 "register_operand"))]
 959   "TARGET_SIMD"
 960 {
 961   int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
 962   rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 963                                                         shift_amount);
 964   operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
 965
 966   emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
 967                                                  shift_vector));
 968   DONE;
 969 })
 970
 971 (define_insn "aarch64_simd_lshr<mode>"
 972  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 973        (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 974                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
 975  "TARGET_SIMD"
 976  "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
 977   [(set_attr "type" "neon_shift_imm<q>")]
 978 )
 979
 980 (define_insn "aarch64_simd_ashr<mode>"
 981  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 982        (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 983                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
 984  "TARGET_SIMD"
 985  "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
 986   [(set_attr "type" "neon_shift_imm<q>")]
 987 )
 988
 989 (define_insn "aarch64_simd_imm_shl<mode>"
 990  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 991        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 992                    (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm" "Dl")))]
 993  "TARGET_SIMD"
 994   "shl\t%0.<Vtype>, %1.<Vtype>, %2"
 995   [(set_attr "type" "neon_shift_imm<q>")]
 996 )
 997
 998 (define_insn "aarch64_simd_reg_sshl<mode>"
 999  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1000        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1001                    (match_operand:VDQ_I 2 "register_operand" "w")))]
1002  "TARGET_SIMD"
1003  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1004   [(set_attr "type" "neon_shift_reg<q>")]
1005 )
1006
1007 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
1008  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1009        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1010                     (match_operand:VDQ_I 2 "register_operand" "w")]
1011                    UNSPEC_ASHIFT_UNSIGNED))]
1012  "TARGET_SIMD"
1013  "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1014   [(set_attr "type" "neon_shift_reg<q>")]
1015 )
1016
1017 (define_insn "aarch64_simd_reg_shl<mode>_signed"
1018  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1019        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1020                     (match_operand:VDQ_I 2 "register_operand" "w")]
1021                    UNSPEC_ASHIFT_SIGNED))]
1022  "TARGET_SIMD"
1023  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1024   [(set_attr "type" "neon_shift_reg<q>")]
1025 )
1026
1027 (define_expand "ashl<mode>3"
1028   [(match_operand:VDQ_I 0 "register_operand" "")
1029    (match_operand:VDQ_I 1 "register_operand" "")
1030    (match_operand:SI  2 "general_operand" "")]
1031  "TARGET_SIMD"
1032 {
1033   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1034   int shift_amount;
1035
1036   if (CONST_INT_P (operands[2]))
1037     {
1038       shift_amount = INTVAL (operands[2]);
1039       if (shift_amount >= 0 && shift_amount < bit_width)
1040         {
1041           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1042                                                        shift_amount);
1043           emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
1044                                                      operands[1],
1045                                                      tmp));
1046           DONE;
1047         }
1048       else
1049         {
1050           operands[2] = force_reg (SImode, operands[2]);
1051         }
1052     }
1053   else if (MEM_P (operands[2]))
1054     {
1055       operands[2] = force_reg (SImode, operands[2]);
1056     }
1057
1058   if (REG_P (operands[2]))
1059     {
1060       rtx tmp = gen_reg_rtx (<MODE>mode);
1061       emit_insn (gen_aarch64_simd_dup<mode> (tmp,
1062                                              convert_to_mode (<VEL>mode,
1063                                                               operands[2],
1064                                                               0)));
1065       emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1066                                                   tmp));
1067       DONE;
1068     }
1069   else
1070     FAIL;
1071 }
1072 )
1073
1074 (define_expand "lshr<mode>3"
1075   [(match_operand:VDQ_I 0 "register_operand" "")
1076    (match_operand:VDQ_I 1 "register_operand" "")
1077    (match_operand:SI  2 "general_operand" "")]
1078  "TARGET_SIMD"
1079 {
1080   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1081   int shift_amount;
1082
1083   if (CONST_INT_P (operands[2]))
1084     {
1085       shift_amount = INTVAL (operands[2]);
1086       if (shift_amount > 0 && shift_amount <= bit_width)
1087         {
1088           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1089                                                        shift_amount);
1090           emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
1091                                                   operands[1],
1092                                                   tmp));
1093           DONE;
1094         }
1095       else
1096         operands[2] = force_reg (SImode, operands[2]);
1097     }
1098   else if (MEM_P (operands[2]))
1099     {
1100       operands[2] = force_reg (SImode, operands[2]);
1101     }
1102
1103   if (REG_P (operands[2]))
1104     {
1105       rtx tmp = gen_reg_rtx (SImode);
1106       rtx tmp1 = gen_reg_rtx (<MODE>mode);
1107       emit_insn (gen_negsi2 (tmp, operands[2]));
1108       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1109                                              convert_to_mode (<VEL>mode,
1110                                                               tmp, 0)));
1111       emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
1112                                                           operands[1],
1113                                                           tmp1));
1114       DONE;
1115     }
1116   else
1117     FAIL;
1118 }
1119 )
1120
1121 (define_expand "ashr<mode>3"
1122   [(match_operand:VDQ_I 0 "register_operand" "")
1123    (match_operand:VDQ_I 1 "register_operand" "")
1124    (match_operand:SI  2 "general_operand" "")]
1125  "TARGET_SIMD"
1126 {
1127   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1128   int shift_amount;
1129
1130   if (CONST_INT_P (operands[2]))
1131     {
1132       shift_amount = INTVAL (operands[2]);
1133       if (shift_amount > 0 && shift_amount <= bit_width)
1134         {
1135           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1136                                                        shift_amount);
1137           emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1138                                                   operands[1],
1139                                                   tmp));
1140           DONE;
1141         }
1142       else
1143         operands[2] = force_reg (SImode, operands[2]);
1144     }
1145   else if (MEM_P (operands[2]))
1146     {
1147       operands[2] = force_reg (SImode, operands[2]);
1148     }
1149
1150   if (REG_P (operands[2]))
1151     {
1152       rtx tmp = gen_reg_rtx (SImode);
1153       rtx tmp1 = gen_reg_rtx (<MODE>mode);
1154       emit_insn (gen_negsi2 (tmp, operands[2]));
1155       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1156                                              convert_to_mode (<VEL>mode,
1157                                                               tmp, 0)));
1158       emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
1159                                                         operands[1],
1160                                                         tmp1));
1161       DONE;
1162     }
1163   else
1164     FAIL;
1165 }
1166 )
1167
1168 (define_expand "vashl<mode>3"
1169  [(match_operand:VDQ_I 0 "register_operand" "")
1170   (match_operand:VDQ_I 1 "register_operand" "")
1171   (match_operand:VDQ_I 2 "register_operand" "")]
1172  "TARGET_SIMD"
1173 {
1174   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1175                                               operands[2]));
1176   DONE;
1177 })
1178
1179 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1180 ;; Negating individual lanes most certainly offsets the
1181 ;; gain from vectorization.
1182 (define_expand "vashr<mode>3"
1183  [(match_operand:VDQ_BHSI 0 "register_operand" "")
1184   (match_operand:VDQ_BHSI 1 "register_operand" "")
1185   (match_operand:VDQ_BHSI 2 "register_operand" "")]
1186  "TARGET_SIMD"
1187 {
1188   rtx neg = gen_reg_rtx (<MODE>mode);
1189   emit (gen_neg<mode>2 (neg, operands[2]));
1190   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1191                                                     neg));
1192   DONE;
1193 })
1194
1195 ;; DI vector shift
1196 (define_expand "aarch64_ashr_simddi"
1197   [(match_operand:DI 0 "register_operand" "=w")
1198    (match_operand:DI 1 "register_operand" "w")
1199    (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1200   "TARGET_SIMD"
1201   {
1202     /* An arithmetic shift right by 64 fills the result with copies of the sign
1203        bit, just like asr by 63 - however the standard pattern does not handle
1204        a shift by 64.  */
1205     if (INTVAL (operands[2]) == 64)
1206       operands[2] = GEN_INT (63);
1207     emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1208     DONE;
1209   }
1210 )
1211
1212 (define_expand "vlshr<mode>3"
1213  [(match_operand:VDQ_BHSI 0 "register_operand" "")
1214   (match_operand:VDQ_BHSI 1 "register_operand" "")
1215   (match_operand:VDQ_BHSI 2 "register_operand" "")]
1216  "TARGET_SIMD"
1217 {
1218   rtx neg = gen_reg_rtx (<MODE>mode);
1219   emit (gen_neg<mode>2 (neg, operands[2]));
1220   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1221                                                       neg));
1222   DONE;
1223 })
1224
1225 (define_expand "aarch64_lshr_simddi"
1226   [(match_operand:DI 0 "register_operand" "=w")
1227    (match_operand:DI 1 "register_operand" "w")
1228    (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1229   "TARGET_SIMD"
1230   {
1231     if (INTVAL (operands[2]) == 64)
1232       emit_move_insn (operands[0], const0_rtx);
1233     else
1234       emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1235     DONE;
1236   }
1237 )
1238
1239 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1240 (define_insn "vec_shr_<mode>"
1241   [(set (match_operand:VD 0 "register_operand" "=w")
1242         (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1243                     (match_operand:SI 2 "immediate_operand" "i")]
1244                    UNSPEC_VEC_SHR))]
1245   "TARGET_SIMD"
1246   {
1247     if (BYTES_BIG_ENDIAN)
1248       return "shl %d0, %d1, %2";
1249     else
1250       return "ushr %d0, %d1, %2";
1251   }
1252   [(set_attr "type" "neon_shift_imm")]
1253 )
1254
1255 (define_expand "vec_set<mode>"
1256   [(match_operand:VALL_F16 0 "register_operand" "+w")
1257    (match_operand:<VEL> 1 "register_operand" "w")
1258    (match_operand:SI 2 "immediate_operand" "")]
1259   "TARGET_SIMD"
1260   {
1261     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1262     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1263                                           GEN_INT (elem), operands[0]));
1264     DONE;
1265   }
1266 )
1267
1268
1269 (define_insn "aarch64_mla<mode>"
1270  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1271        (plus:VDQ_BHSI (mult:VDQ_BHSI
1272                         (match_operand:VDQ_BHSI 2 "register_operand" "w")
1273                         (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1274                       (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1275  "TARGET_SIMD"
1276  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1277   [(set_attr "type" "neon_mla_<Vetype><q>")]
1278 )
1279
1280 (define_insn "*aarch64_mla_elt<mode>"
1281  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1282        (plus:VDQHS
1283          (mult:VDQHS
1284            (vec_duplicate:VDQHS
1285               (vec_select:<VEL>
1286                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1287                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1288            (match_operand:VDQHS 3 "register_operand" "w"))
1289          (match_operand:VDQHS 4 "register_operand" "0")))]
1290  "TARGET_SIMD"
1291   {
1292     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1293     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1294   }
1295   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1296 )
1297
1298 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1299  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1300        (plus:VDQHS
1301          (mult:VDQHS
1302            (vec_duplicate:VDQHS
1303               (vec_select:<VEL>
1304                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1305                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1306            (match_operand:VDQHS 3 "register_operand" "w"))
1307          (match_operand:VDQHS 4 "register_operand" "0")))]
1308  "TARGET_SIMD"
1309   {
1310     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1311     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1312   }
1313   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1314 )
1315
1316 (define_insn "*aarch64_mla_elt_merge<mode>"
1317   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1318         (plus:VDQHS
1319           (mult:VDQHS (vec_duplicate:VDQHS
1320                   (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1321                 (match_operand:VDQHS 2 "register_operand" "w"))
1322           (match_operand:VDQHS 3 "register_operand" "0")))]
1323  "TARGET_SIMD"
1324  "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1325   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1326 )
1327
1328 (define_insn "aarch64_mls<mode>"
1329  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1330        (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1331                    (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1332                               (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1333  "TARGET_SIMD"
1334  "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1335   [(set_attr "type" "neon_mla_<Vetype><q>")]
1336 )
1337
1338 (define_insn "*aarch64_mls_elt<mode>"
1339  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1340        (minus:VDQHS
1341          (match_operand:VDQHS 4 "register_operand" "0")
1342          (mult:VDQHS
1343            (vec_duplicate:VDQHS
1344               (vec_select:<VEL>
1345                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1346                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1347            (match_operand:VDQHS 3 "register_operand" "w"))))]
1348  "TARGET_SIMD"
1349   {
1350     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1351     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1352   }
1353   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1354 )
1355
1356 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1357  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1358        (minus:VDQHS
1359          (match_operand:VDQHS 4 "register_operand" "0")
1360          (mult:VDQHS
1361            (vec_duplicate:VDQHS
1362               (vec_select:<VEL>
1363                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1364                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1365            (match_operand:VDQHS 3 "register_operand" "w"))))]
1366  "TARGET_SIMD"
1367   {
1368     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1369     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1370   }
1371   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1372 )
1373
1374 (define_insn "*aarch64_mls_elt_merge<mode>"
1375   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1376         (minus:VDQHS
1377           (match_operand:VDQHS 1 "register_operand" "0")
1378           (mult:VDQHS (vec_duplicate:VDQHS
1379                   (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1380                 (match_operand:VDQHS 3 "register_operand" "w"))))]
1381   "TARGET_SIMD"
1382   "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1383   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1384 )
1385
1386 ;; Max/Min operations.
1387 (define_insn "<su><maxmin><mode>3"
1388  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1389        (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1390                     (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1391  "TARGET_SIMD"
1392  "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1393   [(set_attr "type" "neon_minmax<q>")]
1394 )
1395
1396 (define_expand "<su><maxmin>v2di3"
1397  [(set (match_operand:V2DI 0 "register_operand" "")
1398        (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1399                     (match_operand:V2DI 2 "register_operand" "")))]
1400  "TARGET_SIMD"
1401 {
1402   enum rtx_code cmp_operator;
1403   rtx cmp_fmt;
1404
1405   switch (<CODE>)
1406     {
1407     case UMIN:
1408       cmp_operator = LTU;
1409       break;
1410     case SMIN:
1411       cmp_operator = LT;
1412       break;
1413     case UMAX:
1414       cmp_operator = GTU;
1415       break;
1416     case SMAX:
1417       cmp_operator = GT;
1418       break;
1419     default:
1420       gcc_unreachable ();
1421     }
1422
1423   cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1424   emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1425               operands[2], cmp_fmt, operands[1], operands[2]));
1426   DONE;
1427 })
1428
1429 ;; Pairwise Integer Max/Min operations.
1430 (define_insn "aarch64_<maxmin_uns>p<mode>"
1431  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1432        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1433                          (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1434                         MAXMINV))]
1435  "TARGET_SIMD"
1436  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1437   [(set_attr "type" "neon_minmax<q>")]
1438 )
1439
1440 ;; Pairwise FP Max/Min operations.
1441 (define_insn "aarch64_<maxmin_uns>p<mode>"
1442  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1443        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1444                       (match_operand:VHSDF 2 "register_operand" "w")]
1445                       FMAXMINV))]
1446  "TARGET_SIMD"
1447  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1448   [(set_attr "type" "neon_minmax<q>")]
1449 )
1450
1451 ;; vec_concat gives a new vector with the low elements from operand 1, and
1452 ;; the high elements from operand 2.  That is to say, given op1 = { a, b }
1453 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1454 ;; What that means, is that the RTL descriptions of the below patterns
1455 ;; need to change depending on endianness.
1456
1457 ;; Move to the low architectural bits of the register.
1458 ;; On little-endian this is { operand, zeroes }
1459 ;; On big-endian this is { zeroes, operand }
1460
1461 (define_insn "move_lo_quad_internal_<mode>"
1462   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1463         (vec_concat:VQ_NO2E
1464           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1465           (vec_duplicate:<VHALF> (const_int 0))))]
1466   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1467   "@
1468    dup\\t%d0, %1.d[0]
1469    fmov\\t%d0, %1
1470    dup\\t%d0, %1"
1471   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1472    (set_attr "length" "4")
1473    (set_attr "arch" "simd,fp,simd")]
1474 )
1475
1476 (define_insn "move_lo_quad_internal_<mode>"
1477   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1478         (vec_concat:VQ_2E
1479           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1480           (const_int 0)))]
1481   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1482   "@
1483    dup\\t%d0, %1.d[0]
1484    fmov\\t%d0, %1
1485    dup\\t%d0, %1"
1486   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1487    (set_attr "length" "4")
1488    (set_attr "arch" "simd,fp,simd")]
1489 )
1490
1491 (define_insn "move_lo_quad_internal_be_<mode>"
1492   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1493         (vec_concat:VQ_NO2E
1494           (vec_duplicate:<VHALF> (const_int 0))
1495           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1496   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1497   "@
1498    dup\\t%d0, %1.d[0]
1499    fmov\\t%d0, %1
1500    dup\\t%d0, %1"
1501   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1502    (set_attr "length" "4")
1503    (set_attr "arch" "simd,fp,simd")]
1504 )
1505
1506 (define_insn "move_lo_quad_internal_be_<mode>"
1507   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1508         (vec_concat:VQ_2E
1509           (const_int 0)
1510           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1511   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1512   "@
1513    dup\\t%d0, %1.d[0]
1514    fmov\\t%d0, %1
1515    dup\\t%d0, %1"
1516   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1517    (set_attr "length" "4")
1518    (set_attr "arch" "simd,fp,simd")]
1519 )
1520
1521 (define_expand "move_lo_quad_<mode>"
1522   [(match_operand:VQ 0 "register_operand")
1523    (match_operand:VQ 1 "register_operand")]
1524   "TARGET_SIMD"
1525 {
1526   if (BYTES_BIG_ENDIAN)
1527     emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1528   else
1529     emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1530   DONE;
1531 }
1532 )
1533
1534 ;; Move operand1 to the high architectural bits of the register, keeping
1535 ;; the low architectural bits of operand2.
1536 ;; For little-endian this is { operand2, operand1 }
1537 ;; For big-endian this is { operand1, operand2 }
1538
1539 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1540   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1541         (vec_concat:VQ
1542           (vec_select:<VHALF>
1543                 (match_dup 0)
1544                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1545           (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1546   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1547   "@
1548    ins\\t%0.d[1], %1.d[0]
1549    ins\\t%0.d[1], %1"
1550   [(set_attr "type" "neon_ins")]
1551 )
1552
1553 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1554   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1555         (vec_concat:VQ
1556           (match_operand:<VHALF> 1 "register_operand" "w,r")
1557           (vec_select:<VHALF>
1558                 (match_dup 0)
1559                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1560   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1561   "@
1562    ins\\t%0.d[1], %1.d[0]
1563    ins\\t%0.d[1], %1"
1564   [(set_attr "type" "neon_ins")]
1565 )
1566
1567 (define_expand "move_hi_quad_<mode>"
1568  [(match_operand:VQ 0 "register_operand" "")
1569   (match_operand:<VHALF> 1 "register_operand" "")]
1570  "TARGET_SIMD"
1571 {
1572   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1573   if (BYTES_BIG_ENDIAN)
1574     emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1575                     operands[1], p));
1576   else
1577     emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1578                     operands[1], p));
1579   DONE;
1580 })
1581
1582 ;; Narrowing operations.
1583
1584 ;; For doubles.
1585 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1586  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1587        (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1588  "TARGET_SIMD"
1589  "xtn\\t%0.<Vntype>, %1.<Vtype>"
1590   [(set_attr "type" "neon_shift_imm_narrow_q")]
1591 )
1592
1593 (define_expand "vec_pack_trunc_<mode>"
1594  [(match_operand:<VNARROWD> 0 "register_operand" "")
1595   (match_operand:VDN 1 "register_operand" "")
1596   (match_operand:VDN 2 "register_operand" "")]
1597  "TARGET_SIMD"
1598 {
1599   rtx tempreg = gen_reg_rtx (<VDBL>mode);
1600   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1601   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1602
1603   emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1604   emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1605   emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1606   DONE;
1607 })
1608
1609 ;; For quads.
1610
1611 (define_insn "vec_pack_trunc_<mode>"
1612  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1613        (vec_concat:<VNARROWQ2>
1614          (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1615          (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1616  "TARGET_SIMD"
1617  {
1618    if (BYTES_BIG_ENDIAN)
1619      return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1620    else
1621      return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1622  }
1623   [(set_attr "type" "multiple")
1624    (set_attr "length" "8")]
1625 )
1626
1627 ;; Widening operations.
1628
1629 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1630   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1631         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1632                                (match_operand:VQW 1 "register_operand" "w")
1633                                (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1634                             )))]
1635   "TARGET_SIMD"
1636   "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1637   [(set_attr "type" "neon_shift_imm_long")]
1638 )
1639
1640 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1641   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1642         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1643                                (match_operand:VQW 1 "register_operand" "w")
1644                                (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1645                             )))]
1646   "TARGET_SIMD"
1647   "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1648   [(set_attr "type" "neon_shift_imm_long")]
1649 )
1650
1651 (define_expand "vec_unpack<su>_hi_<mode>"
1652   [(match_operand:<VWIDE> 0 "register_operand" "")
1653    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1654   "TARGET_SIMD"
1655   {
1656     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1657     emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1658                                                           operands[1], p));
1659     DONE;
1660   }
1661 )
1662
1663 (define_expand "vec_unpack<su>_lo_<mode>"
1664   [(match_operand:<VWIDE> 0 "register_operand" "")
1665    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1666   "TARGET_SIMD"
1667   {
1668     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1669     emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1670                                                           operands[1], p));
1671     DONE;
1672   }
1673 )
1674
1675 ;; Widening arithmetic.
1676
1677 (define_insn "*aarch64_<su>mlal_lo<mode>"
1678   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1679         (plus:<VWIDE>
1680           (mult:<VWIDE>
1681               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1682                  (match_operand:VQW 2 "register_operand" "w")
1683                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1684               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1685                  (match_operand:VQW 4 "register_operand" "w")
1686                  (match_dup 3))))
1687           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1688   "TARGET_SIMD"
1689   "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1690   [(set_attr "type" "neon_mla_<Vetype>_long")]
1691 )
1692
1693 (define_insn "*aarch64_<su>mlal_hi<mode>"
1694   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1695         (plus:<VWIDE>
1696           (mult:<VWIDE>
1697               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1698                  (match_operand:VQW 2 "register_operand" "w")
1699                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1700               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1701                  (match_operand:VQW 4 "register_operand" "w")
1702                  (match_dup 3))))
1703           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1704   "TARGET_SIMD"
1705   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1706   [(set_attr "type" "neon_mla_<Vetype>_long")]
1707 )
1708
1709 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1710   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1711         (minus:<VWIDE>
1712           (match_operand:<VWIDE> 1 "register_operand" "0")
1713           (mult:<VWIDE>
1714               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1715                  (match_operand:VQW 2 "register_operand" "w")
1716                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1717               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1718                  (match_operand:VQW 4 "register_operand" "w")
1719                  (match_dup 3))))))]
1720   "TARGET_SIMD"
1721   "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1722   [(set_attr "type" "neon_mla_<Vetype>_long")]
1723 )
1724
1725 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1726   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1727         (minus:<VWIDE>
1728           (match_operand:<VWIDE> 1 "register_operand" "0")
1729           (mult:<VWIDE>
1730               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1731                  (match_operand:VQW 2 "register_operand" "w")
1732                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1733               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1734                  (match_operand:VQW 4 "register_operand" "w")
1735                  (match_dup 3))))))]
1736   "TARGET_SIMD"
1737   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1738   [(set_attr "type" "neon_mla_<Vetype>_long")]
1739 )
1740
1741 (define_insn "*aarch64_<su>mlal<mode>"
1742   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1743         (plus:<VWIDE>
1744           (mult:<VWIDE>
1745             (ANY_EXTEND:<VWIDE>
1746               (match_operand:VD_BHSI 1 "register_operand" "w"))
1747             (ANY_EXTEND:<VWIDE>
1748               (match_operand:VD_BHSI 2 "register_operand" "w")))
1749           (match_operand:<VWIDE> 3 "register_operand" "0")))]
1750   "TARGET_SIMD"
1751   "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1752   [(set_attr "type" "neon_mla_<Vetype>_long")]
1753 )
1754
1755 (define_insn "*aarch64_<su>mlsl<mode>"
1756   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1757         (minus:<VWIDE>
1758           (match_operand:<VWIDE> 1 "register_operand" "0")
1759           (mult:<VWIDE>
1760             (ANY_EXTEND:<VWIDE>
1761               (match_operand:VD_BHSI 2 "register_operand" "w"))
1762             (ANY_EXTEND:<VWIDE>
1763               (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1764   "TARGET_SIMD"
1765   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1766   [(set_attr "type" "neon_mla_<Vetype>_long")]
1767 )
1768
1769 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1770  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1771        (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1772                            (match_operand:VQW 1 "register_operand" "w")
1773                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1774                      (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1775                            (match_operand:VQW 2 "register_operand" "w")
1776                            (match_dup 3)))))]
1777   "TARGET_SIMD"
1778   "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1779   [(set_attr "type" "neon_mul_<Vetype>_long")]
1780 )
1781
1782 (define_expand "vec_widen_<su>mult_lo_<mode>"
1783   [(match_operand:<VWIDE> 0 "register_operand" "")
1784    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1785    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1786  "TARGET_SIMD"
1787  {
1788    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1789    emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1790                                                        operands[1],
1791                                                        operands[2], p));
1792    DONE;
1793  }
1794 )
1795
1796 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1797  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1798       (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1799                             (match_operand:VQW 1 "register_operand" "w")
1800                             (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1801                     (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1802                             (match_operand:VQW 2 "register_operand" "w")
1803                             (match_dup 3)))))]
1804   "TARGET_SIMD"
1805   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1806   [(set_attr "type" "neon_mul_<Vetype>_long")]
1807 )
1808
1809 (define_expand "vec_widen_<su>mult_hi_<mode>"
1810   [(match_operand:<VWIDE> 0 "register_operand" "")
1811    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1812    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1813  "TARGET_SIMD"
1814  {
1815    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1816    emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1817                                                        operands[1],
1818                                                        operands[2], p));
1819    DONE;
1820
1821  }
1822 )
1823
1824 ;; FP vector operations.
1825 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1826 ;; double-precision (64-bit) floating-point data types and arithmetic as
1827 ;; defined by the IEEE 754-2008 standard.  This makes them vectorizable
1828 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1829 ;;
1830 ;; Floating-point operations can raise an exception.  Vectorizing such
1831 ;; operations are safe because of reasons explained below.
1832 ;;
1833 ;; ARMv8 permits an extension to enable trapped floating-point
1834 ;; exception handling, however this is an optional feature.  In the
1835 ;; event of a floating-point exception being raised by vectorised
1836 ;; code then:
1837 ;; 1.  If trapped floating-point exceptions are available, then a trap
1838 ;;     will be taken when any lane raises an enabled exception.  A trap
1839 ;;     handler may determine which lane raised the exception.
1840 ;; 2.  Alternatively a sticky exception flag is set in the
1841 ;;     floating-point status register (FPSR).  Software may explicitly
1842 ;;     test the exception flags, in which case the tests will either
1843 ;;     prevent vectorisation, allowing precise identification of the
1844 ;;     failing operation, or if tested outside of vectorisable regions
1845 ;;     then the specific operation and lane are not of interest.
1846
1847 ;; FP arithmetic operations.
1848
1849 (define_insn "add<mode>3"
1850  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1851        (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1852                    (match_operand:VHSDF 2 "register_operand" "w")))]
1853  "TARGET_SIMD"
1854  "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1855   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1856 )
1857
1858 (define_insn "sub<mode>3"
1859  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1860        (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1861                     (match_operand:VHSDF 2 "register_operand" "w")))]
1862  "TARGET_SIMD"
1863  "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1864   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1865 )
1866
1867 (define_insn "mul<mode>3"
1868  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1869        (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1870                    (match_operand:VHSDF 2 "register_operand" "w")))]
1871  "TARGET_SIMD"
1872  "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1873   [(set_attr "type" "neon_fp_mul_<stype><q>")]
1874 )
1875
1876 (define_expand "div<mode>3"
1877  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1878        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1879                   (match_operand:VHSDF 2 "register_operand" "w")))]
1880  "TARGET_SIMD"
1881 {
1882   if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1883     DONE;
1884
1885   operands[1] = force_reg (<MODE>mode, operands[1]);
1886 })
1887
1888 (define_insn "*div<mode>3"
1889  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1890        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1891                  (match_operand:VHSDF 2 "register_operand" "w")))]
1892  "TARGET_SIMD"
1893  "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1894   [(set_attr "type" "neon_fp_div_<stype><q>")]
1895 )
1896
1897 (define_insn "neg<mode>2"
1898  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1899        (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1900  "TARGET_SIMD"
1901  "fneg\\t%0.<Vtype>, %1.<Vtype>"
1902   [(set_attr "type" "neon_fp_neg_<stype><q>")]
1903 )
1904
1905 (define_insn "abs<mode>2"
1906  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1907        (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1908  "TARGET_SIMD"
1909  "fabs\\t%0.<Vtype>, %1.<Vtype>"
1910   [(set_attr "type" "neon_fp_abs_<stype><q>")]
1911 )
1912
1913 (define_insn "fma<mode>4"
1914   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1915        (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1916                   (match_operand:VHSDF 2 "register_operand" "w")
1917                   (match_operand:VHSDF 3 "register_operand" "0")))]
1918   "TARGET_SIMD"
1919  "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1920   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1921 )
1922
1923 (define_insn "*aarch64_fma4_elt<mode>"
1924   [(set (match_operand:VDQF 0 "register_operand" "=w")
1925     (fma:VDQF
1926       (vec_duplicate:VDQF
1927         (vec_select:<VEL>
1928           (match_operand:VDQF 1 "register_operand" "<h_con>")
1929           (parallel [(match_operand:SI 2 "immediate_operand")])))
1930       (match_operand:VDQF 3 "register_operand" "w")
1931       (match_operand:VDQF 4 "register_operand" "0")))]
1932   "TARGET_SIMD"
1933   {
1934     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1935     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1936   }
1937   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1938 )
1939
1940 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1941   [(set (match_operand:VDQSF 0 "register_operand" "=w")
1942     (fma:VDQSF
1943       (vec_duplicate:VDQSF
1944         (vec_select:<VEL>
1945           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1946           (parallel [(match_operand:SI 2 "immediate_operand")])))
1947       (match_operand:VDQSF 3 "register_operand" "w")
1948       (match_operand:VDQSF 4 "register_operand" "0")))]
1949   "TARGET_SIMD"
1950   {
1951     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1952     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1953   }
1954   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1955 )
1956
1957 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1958   [(set (match_operand:VMUL 0 "register_operand" "=w")
1959     (fma:VMUL
1960       (vec_duplicate:VMUL
1961           (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1962       (match_operand:VMUL 2 "register_operand" "w")
1963       (match_operand:VMUL 3 "register_operand" "0")))]
1964   "TARGET_SIMD"
1965   "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1966   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1967 )
1968
1969 (define_insn "*aarch64_fma4_elt_to_64v2df"
1970   [(set (match_operand:DF 0 "register_operand" "=w")
1971     (fma:DF
1972         (vec_select:DF
1973           (match_operand:V2DF 1 "register_operand" "w")
1974           (parallel [(match_operand:SI 2 "immediate_operand")]))
1975       (match_operand:DF 3 "register_operand" "w")
1976       (match_operand:DF 4 "register_operand" "0")))]
1977   "TARGET_SIMD"
1978   {
1979     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1980     return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1981   }
1982   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1983 )
1984
1985 (define_insn "fnma<mode>4"
1986   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1987         (fma:VHSDF
1988           (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1989           (match_operand:VHSDF 2 "register_operand" "w")
1990           (match_operand:VHSDF 3 "register_operand" "0")))]
1991   "TARGET_SIMD"
1992   "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1993   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1994 )
1995
1996 (define_insn "*aarch64_fnma4_elt<mode>"
1997   [(set (match_operand:VDQF 0 "register_operand" "=w")
1998     (fma:VDQF
1999       (neg:VDQF
2000         (match_operand:VDQF 3 "register_operand" "w"))
2001       (vec_duplicate:VDQF
2002         (vec_select:<VEL>
2003           (match_operand:VDQF 1 "register_operand" "<h_con>")
2004           (parallel [(match_operand:SI 2 "immediate_operand")])))
2005       (match_operand:VDQF 4 "register_operand" "0")))]
2006   "TARGET_SIMD"
2007   {
2008     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2009     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
2010   }
2011   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2012 )
2013
2014 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
2015   [(set (match_operand:VDQSF 0 "register_operand" "=w")
2016     (fma:VDQSF
2017       (neg:VDQSF
2018         (match_operand:VDQSF 3 "register_operand" "w"))
2019       (vec_duplicate:VDQSF
2020         (vec_select:<VEL>
2021           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2022           (parallel [(match_operand:SI 2 "immediate_operand")])))
2023       (match_operand:VDQSF 4 "register_operand" "0")))]
2024   "TARGET_SIMD"
2025   {
2026     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2027     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
2028   }
2029   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2030 )
2031
2032 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
2033   [(set (match_operand:VMUL 0 "register_operand" "=w")
2034     (fma:VMUL
2035       (neg:VMUL
2036         (match_operand:VMUL 2 "register_operand" "w"))
2037       (vec_duplicate:VMUL
2038         (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2039       (match_operand:VMUL 3 "register_operand" "0")))]
2040   "TARGET_SIMD"
2041   "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2042   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2043 )
2044
2045 (define_insn "*aarch64_fnma4_elt_to_64v2df"
2046   [(set (match_operand:DF 0 "register_operand" "=w")
2047     (fma:DF
2048       (vec_select:DF
2049         (match_operand:V2DF 1 "register_operand" "w")
2050         (parallel [(match_operand:SI 2 "immediate_operand")]))
2051       (neg:DF
2052         (match_operand:DF 3 "register_operand" "w"))
2053       (match_operand:DF 4 "register_operand" "0")))]
2054   "TARGET_SIMD"
2055   {
2056     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2057     return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
2058   }
2059   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2060 )
2061
2062 ;; Vector versions of the floating-point frint patterns.
2063 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
2064 (define_insn "<frint_pattern><mode>2"
2065   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2066         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2067                        FRINT))]
2068   "TARGET_SIMD"
2069   "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
2070   [(set_attr "type" "neon_fp_round_<stype><q>")]
2071 )
2072
2073 ;; Vector versions of the fcvt standard patterns.
2074 ;; Expands to lbtrunc, lround, lceil, lfloor
2075 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
2076   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2077         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2078                                [(match_operand:VHSDF 1 "register_operand" "w")]
2079                                FCVT)))]
2080   "TARGET_SIMD"
2081   "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
2082   [(set_attr "type" "neon_fp_to_int_<stype><q>")]
2083 )
2084
2085 ;; HF Scalar variants of related SIMD instructions.
2086 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
2087   [(set (match_operand:HI 0 "register_operand" "=w")
2088         (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
2089                       FCVT)))]
2090   "TARGET_SIMD_F16INST"
2091   "fcvt<frint_suffix><su>\t%h0, %h1"
2092   [(set_attr "type" "neon_fp_to_int_s")]
2093 )
2094
2095 (define_insn "<optab>_trunchfhi2"
2096   [(set (match_operand:HI 0 "register_operand" "=w")
2097         (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
2098   "TARGET_SIMD_F16INST"
2099   "fcvtz<su>\t%h0, %h1"
2100   [(set_attr "type" "neon_fp_to_int_s")]
2101 )
2102
2103 (define_insn "<optab>hihf2"
2104   [(set (match_operand:HF 0 "register_operand" "=w")
2105         (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
2106   "TARGET_SIMD_F16INST"
2107   "<su_optab>cvtf\t%h0, %h1"
2108   [(set_attr "type" "neon_int_to_fp_s")]
2109 )
2110
2111 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
2112   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2113         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2114                                [(mult:VDQF
2115          (match_operand:VDQF 1 "register_operand" "w")
2116          (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
2117                                UNSPEC_FRINTZ)))]
2118   "TARGET_SIMD
2119    && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
2120                 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
2121   {
2122     int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
2123     char buf[64];
2124     snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
2125     output_asm_insn (buf, operands);
2126     return "";
2127   }
2128   [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
2129 )
2130
2131 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
2132   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2133         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2134                                [(match_operand:VHSDF 1 "register_operand")]
2135                                 UNSPEC_FRINTZ)))]
2136   "TARGET_SIMD"
2137   {})
2138
2139 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2140   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2141         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2142                                [(match_operand:VHSDF 1 "register_operand")]
2143                                 UNSPEC_FRINTZ)))]
2144   "TARGET_SIMD"
2145   {})
2146
2147 (define_expand "ftrunc<VHSDF:mode>2"
2148   [(set (match_operand:VHSDF 0 "register_operand")
2149         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2150                        UNSPEC_FRINTZ))]
2151   "TARGET_SIMD"
2152   {})
2153
2154 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2155   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2156         (FLOATUORS:VHSDF
2157           (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2158   "TARGET_SIMD"
2159   "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2160   [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2161 )
2162
2163 ;; Conversions between vectors of floats and doubles.
2164 ;; Contains a mix of patterns to match standard pattern names
2165 ;; and those for intrinsics.
2166
2167 ;; Float widening operations.
2168
2169 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2170   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2171         (float_extend:<VWIDE> (vec_select:<VHALF>
2172                                (match_operand:VQ_HSF 1 "register_operand" "w")
2173                                (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2174                             )))]
2175   "TARGET_SIMD"
2176   "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2177   [(set_attr "type" "neon_fp_cvt_widen_s")]
2178 )
2179
2180 ;; Convert between fixed-point and floating-point (vector modes)
2181
2182 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2183   [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2184         (unspec:<VHSDF:FCVT_TARGET>
2185           [(match_operand:VHSDF 1 "register_operand" "w")
2186            (match_operand:SI 2 "immediate_operand" "i")]
2187          FCVT_F2FIXED))]
2188   "TARGET_SIMD"
2189   "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2190   [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2191 )
2192
2193 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2194   [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2195         (unspec:<VDQ_HSDI:FCVT_TARGET>
2196           [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2197            (match_operand:SI 2 "immediate_operand" "i")]
2198          FCVT_FIXED2F))]
2199   "TARGET_SIMD"
2200   "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2201   [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2202 )
2203
2204 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2205 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2206 ;; the meaning of HI and LO changes depending on the target endianness.
2207 ;; While elsewhere we map the higher numbered elements of a vector to
2208 ;; the lower architectural lanes of the vector, for these patterns we want
2209 ;; to always treat "hi" as referring to the higher architectural lanes.
2210 ;; Consequently, while the patterns below look inconsistent with our
2211 ;; other big-endian patterns their behavior is as required.
2212
2213 (define_expand "vec_unpacks_lo_<mode>"
2214   [(match_operand:<VWIDE> 0 "register_operand" "")
2215    (match_operand:VQ_HSF 1 "register_operand" "")]
2216   "TARGET_SIMD"
2217   {
2218     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2219     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2220                                                        operands[1], p));
2221     DONE;
2222   }
2223 )
2224
2225 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2226   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2227         (float_extend:<VWIDE> (vec_select:<VHALF>
2228                                (match_operand:VQ_HSF 1 "register_operand" "w")
2229                                (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2230                             )))]
2231   "TARGET_SIMD"
2232   "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2233   [(set_attr "type" "neon_fp_cvt_widen_s")]
2234 )
2235
2236 (define_expand "vec_unpacks_hi_<mode>"
2237   [(match_operand:<VWIDE> 0 "register_operand" "")
2238    (match_operand:VQ_HSF 1 "register_operand" "")]
2239   "TARGET_SIMD"
2240   {
2241     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2242     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2243                                                        operands[1], p));
2244     DONE;
2245   }
2246 )
2247 (define_insn "aarch64_float_extend_lo_<Vwide>"
2248   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2249         (float_extend:<VWIDE>
2250           (match_operand:VDF 1 "register_operand" "w")))]
2251   "TARGET_SIMD"
2252   "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2253   [(set_attr "type" "neon_fp_cvt_widen_s")]
2254 )
2255
2256 ;; Float narrowing operations.
2257
2258 (define_insn "aarch64_float_truncate_lo_<mode>"
2259   [(set (match_operand:VDF 0 "register_operand" "=w")
2260       (float_truncate:VDF
2261         (match_operand:<VWIDE> 1 "register_operand" "w")))]
2262   "TARGET_SIMD"
2263   "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2264   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2265 )
2266
2267 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2268   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2269     (vec_concat:<VDBL>
2270       (match_operand:VDF 1 "register_operand" "0")
2271       (float_truncate:VDF
2272         (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2273   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2274   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2275   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2276 )
2277
2278 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2279   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2280     (vec_concat:<VDBL>
2281       (float_truncate:VDF
2282         (match_operand:<VWIDE> 2 "register_operand" "w"))
2283       (match_operand:VDF 1 "register_operand" "0")))]
2284   "TARGET_SIMD && BYTES_BIG_ENDIAN"
2285   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2286   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2287 )
2288
2289 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2290   [(match_operand:<VDBL> 0 "register_operand" "=w")
2291    (match_operand:VDF 1 "register_operand" "0")
2292    (match_operand:<VWIDE> 2 "register_operand" "w")]
2293   "TARGET_SIMD"
2294 {
2295   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2296                              ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2297                              : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2298   emit_insn (gen (operands[0], operands[1], operands[2]));
2299   DONE;
2300 }
2301 )
2302
2303 (define_expand "vec_pack_trunc_v2df"
2304   [(set (match_operand:V4SF 0 "register_operand")
2305       (vec_concat:V4SF
2306         (float_truncate:V2SF
2307             (match_operand:V2DF 1 "register_operand"))
2308         (float_truncate:V2SF
2309             (match_operand:V2DF 2 "register_operand"))
2310           ))]
2311   "TARGET_SIMD"
2312   {
2313     rtx tmp = gen_reg_rtx (V2SFmode);
2314     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2315     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2316
2317     emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2318     emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2319                                                    tmp, operands[hi]));
2320     DONE;
2321   }
2322 )
2323
2324 (define_expand "vec_pack_trunc_df"
2325   [(set (match_operand:V2SF 0 "register_operand")
2326       (vec_concat:V2SF
2327         (float_truncate:SF
2328             (match_operand:DF 1 "register_operand"))
2329         (float_truncate:SF
2330             (match_operand:DF 2 "register_operand"))
2331           ))]
2332   "TARGET_SIMD"
2333   {
2334     rtx tmp = gen_reg_rtx (V2SFmode);
2335     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2336     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2337
2338     emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2339     emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2340     emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2341     DONE;
2342   }
2343 )
2344
2345 ;; FP Max/Min
2346 ;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
2347 ;; expression like:
2348 ;;      a = (b < c) ? b : c;
2349 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
2350 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
2351 ;; -ffast-math.
2352 ;;
2353 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2354 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2355 ;; operand will be returned when both operands are zero (i.e. they may not
2356 ;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
2357 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2358 ;; NaNs.
2359
2360 (define_insn "<su><maxmin><mode>3"
2361   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2362         (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2363                        (match_operand:VHSDF 2 "register_operand" "w")))]
2364   "TARGET_SIMD"
2365   "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2366   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2367 )
2368
2369 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2370 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2371 ;; which implement the IEEE fmax ()/fmin () functions.
2372 (define_insn "<maxmin_uns><mode>3"
2373   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2374        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2375                       (match_operand:VHSDF 2 "register_operand" "w")]
2376                       FMAXMIN_UNS))]
2377   "TARGET_SIMD"
2378   "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2379   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2380 )
2381
2382 ;; 'across lanes' add.
2383
2384 (define_expand "reduc_plus_scal_<mode>"
2385   [(match_operand:<VEL> 0 "register_operand" "=w")
2386    (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2387                UNSPEC_ADDV)]
2388   "TARGET_SIMD"
2389   {
2390     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2391     rtx scratch = gen_reg_rtx (<MODE>mode);
2392     emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2393     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2394     DONE;
2395   }
2396 )
2397
2398 (define_insn "aarch64_faddp<mode>"
2399  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2400        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2401                       (match_operand:VHSDF 2 "register_operand" "w")]
2402         UNSPEC_FADDV))]
2403  "TARGET_SIMD"
2404  "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2405   [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2406 )
2407
2408 (define_insn "aarch64_reduc_plus_internal<mode>"
2409  [(set (match_operand:VDQV 0 "register_operand" "=w")
2410        (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2411                     UNSPEC_ADDV))]
2412  "TARGET_SIMD"
2413  "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2414   [(set_attr "type" "neon_reduc_add<q>")]
2415 )
2416
2417 (define_insn "aarch64_reduc_plus_internalv2si"
2418  [(set (match_operand:V2SI 0 "register_operand" "=w")
2419        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2420                     UNSPEC_ADDV))]
2421  "TARGET_SIMD"
2422  "addp\\t%0.2s, %1.2s, %1.2s"
2423   [(set_attr "type" "neon_reduc_add")]
2424 )
2425
2426 (define_insn "reduc_plus_scal_<mode>"
2427  [(set (match_operand:<VEL> 0 "register_operand" "=w")
2428        (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2429                    UNSPEC_FADDV))]
2430  "TARGET_SIMD"
2431  "faddp\\t%<Vetype>0, %1.<Vtype>"
2432   [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2433 )
2434
2435 (define_expand "reduc_plus_scal_v4sf"
2436  [(set (match_operand:SF 0 "register_operand")
2437        (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2438                     UNSPEC_FADDV))]
2439  "TARGET_SIMD"
2440 {
2441   rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2442   rtx scratch = gen_reg_rtx (V4SFmode);
2443   emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2444   emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2445   emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2446   DONE;
2447 })
2448
2449 (define_insn "clrsb<mode>2"
2450   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2451         (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2452   "TARGET_SIMD"
2453   "cls\\t%0.<Vtype>, %1.<Vtype>"
2454   [(set_attr "type" "neon_cls<q>")]
2455 )
2456
2457 (define_insn "clz<mode>2"
2458  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2459        (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2460  "TARGET_SIMD"
2461  "clz\\t%0.<Vtype>, %1.<Vtype>"
2462   [(set_attr "type" "neon_cls<q>")]
2463 )
2464
2465 (define_insn "popcount<mode>2"
2466   [(set (match_operand:VB 0 "register_operand" "=w")
2467         (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2468   "TARGET_SIMD"
2469   "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2470   [(set_attr "type" "neon_cnt<q>")]
2471 )
2472
2473 ;; 'across lanes' max and min ops.
2474
2475 ;; Template for outputting a scalar, so we can create __builtins which can be
2476 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function.  (This is FP smax/smin).
2477 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2478   [(match_operand:<VEL> 0 "register_operand")
2479    (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2480                   FMAXMINV)]
2481   "TARGET_SIMD"
2482   {
2483     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2484     rtx scratch = gen_reg_rtx (<MODE>mode);
2485     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2486                                                               operands[1]));
2487     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2488     DONE;
2489   }
2490 )
2491
2492 ;; Likewise for integer cases, signed and unsigned.
2493 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2494   [(match_operand:<VEL> 0 "register_operand")
2495    (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2496                     MAXMINV)]
2497   "TARGET_SIMD"
2498   {
2499     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2500     rtx scratch = gen_reg_rtx (<MODE>mode);
2501     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2502                                                               operands[1]));
2503     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2504     DONE;
2505   }
2506 )
2507
2508 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2509  [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2510        (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2511                     MAXMINV))]
2512  "TARGET_SIMD"
2513  "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2514   [(set_attr "type" "neon_reduc_minmax<q>")]
2515 )
2516
2517 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2518  [(set (match_operand:V2SI 0 "register_operand" "=w")
2519        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2520                     MAXMINV))]
2521  "TARGET_SIMD"
2522  "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2523   [(set_attr "type" "neon_reduc_minmax")]
2524 )
2525
2526 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2527  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2528        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2529                       FMAXMINV))]
2530  "TARGET_SIMD"
2531  "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2532   [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2533 )
2534
2535 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2536 ;; allocation.
2537 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2538 ;; to select.
2539 ;;
2540 ;; Thus our BSL is of the form:
2541 ;;   op0 = bsl (mask, op2, op3)
2542 ;; We can use any of:
2543 ;;
2544 ;;   if (op0 = mask)
2545 ;;     bsl mask, op1, op2
2546 ;;   if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2547 ;;     bit op0, op2, mask
2548 ;;   if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2549 ;;     bif op0, op1, mask
2550 ;;
2551 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2552 ;; Some forms of straight-line code may generate the equivalent form
2553 ;; in *aarch64_simd_bsl<mode>_alt.
2554
2555 (define_insn "aarch64_simd_bsl<mode>_internal"
2556   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2557         (xor:VDQ_I
2558            (and:VDQ_I
2559              (xor:VDQ_I
2560                (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2561                (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2562              (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2563           (match_dup:<V_INT_EQUIV> 3)
2564         ))]
2565   "TARGET_SIMD"
2566   "@
2567   bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2568   bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2569   bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2570   [(set_attr "type" "neon_bsl<q>")]
2571 )
2572
2573 ;; We need this form in addition to the above pattern to match the case
2574 ;; when combine tries merging three insns such that the second operand of
2575 ;; the outer XOR matches the second operand of the inner XOR rather than
2576 ;; the first.  The two are equivalent but since recog doesn't try all
2577 ;; permutations of commutative operations, we have to have a separate pattern.
2578
2579 (define_insn "*aarch64_simd_bsl<mode>_alt"
2580   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2581         (xor:VDQ_I
2582            (and:VDQ_I
2583              (xor:VDQ_I
2584                (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2585                (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2586               (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2587           (match_dup:<V_INT_EQUIV> 2)))]
2588   "TARGET_SIMD"
2589   "@
2590   bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2591   bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2592   bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2593   [(set_attr "type" "neon_bsl<q>")]
2594 )
2595
2596 ;; DImode is special, we want to avoid computing operations which are
2597 ;; more naturally computed in general purpose registers in the vector
2598 ;; registers.  If we do that, we need to move all three operands from general
2599 ;; purpose registers to vector registers, then back again.  However, we
2600 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2601 ;; optimizations based on the component operations of a BSL.
2602 ;;
2603 ;; That means we need a splitter back to the individual operations, if they
2604 ;; would be better calculated on the integer side.
2605
2606 (define_insn_and_split "aarch64_simd_bsldi_internal"
2607   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2608         (xor:DI
2609            (and:DI
2610              (xor:DI
2611                (match_operand:DI 3 "register_operand" "w,0,w,r")
2612                (match_operand:DI 2 "register_operand" "w,w,0,r"))
2613              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2614           (match_dup:DI 3)
2615         ))]
2616   "TARGET_SIMD"
2617   "@
2618   bsl\\t%0.8b, %2.8b, %3.8b
2619   bit\\t%0.8b, %2.8b, %1.8b
2620   bif\\t%0.8b, %3.8b, %1.8b
2621   #"
2622   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2623   [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2624 {
2625   /* Split back to individual operations.  If we're before reload, and
2626      able to create a temporary register, do so.  If we're after reload,
2627      we've got an early-clobber destination register, so use that.
2628      Otherwise, we can't create pseudos and we can't yet guarantee that
2629      operands[0] is safe to write, so FAIL to split.  */
2630
2631   rtx scratch;
2632   if (reload_completed)
2633     scratch = operands[0];
2634   else if (can_create_pseudo_p ())
2635     scratch = gen_reg_rtx (DImode);
2636   else
2637     FAIL;
2638
2639   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2640   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2641   emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2642   DONE;
2643 }
2644   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2645    (set_attr "length" "4,4,4,12")]
2646 )
2647
2648 (define_insn_and_split "aarch64_simd_bsldi_alt"
2649   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2650         (xor:DI
2651            (and:DI
2652              (xor:DI
2653                (match_operand:DI 3 "register_operand" "w,w,0,r")
2654                (match_operand:DI 2 "register_operand" "w,0,w,r"))
2655              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2656           (match_dup:DI 2)
2657         ))]
2658   "TARGET_SIMD"
2659   "@
2660   bsl\\t%0.8b, %3.8b, %2.8b
2661   bit\\t%0.8b, %3.8b, %1.8b
2662   bif\\t%0.8b, %2.8b, %1.8b
2663   #"
2664   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2665   [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2666 {
2667   /* Split back to individual operations.  If we're before reload, and
2668      able to create a temporary register, do so.  If we're after reload,
2669      we've got an early-clobber destination register, so use that.
2670      Otherwise, we can't create pseudos and we can't yet guarantee that
2671      operands[0] is safe to write, so FAIL to split.  */
2672
2673   rtx scratch;
2674   if (reload_completed)
2675     scratch = operands[0];
2676   else if (can_create_pseudo_p ())
2677     scratch = gen_reg_rtx (DImode);
2678   else
2679     FAIL;
2680
2681   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2682   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2683   emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2684   DONE;
2685 }
2686   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2687    (set_attr "length" "4,4,4,12")]
2688 )
2689
2690 (define_expand "aarch64_simd_bsl<mode>"
2691   [(match_operand:VALLDIF 0 "register_operand")
2692    (match_operand:<V_INT_EQUIV> 1 "register_operand")
2693    (match_operand:VALLDIF 2 "register_operand")
2694    (match_operand:VALLDIF 3 "register_operand")]
2695  "TARGET_SIMD"
2696 {
2697   /* We can't alias operands together if they have different modes.  */
2698   rtx tmp = operands[0];
2699   if (FLOAT_MODE_P (<MODE>mode))
2700     {
2701       operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2702       operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2703       tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2704     }
2705   operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2706   emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2707                                                          operands[1],
2708                                                          operands[2],
2709                                                          operands[3]));
2710   if (tmp != operands[0])
2711     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2712
2713   DONE;
2714 })
2715
2716 (define_expand "vcond_mask_<mode><v_int_equiv>"
2717   [(match_operand:VALLDI 0 "register_operand")
2718    (match_operand:VALLDI 1 "nonmemory_operand")
2719    (match_operand:VALLDI 2 "nonmemory_operand")
2720    (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2721   "TARGET_SIMD"
2722 {
2723   /* If we have (a = (P) ? -1 : 0);
2724      Then we can simply move the generated mask (result must be int).  */
2725   if (operands[1] == CONSTM1_RTX (<MODE>mode)
2726       && operands[2] == CONST0_RTX (<MODE>mode))
2727     emit_move_insn (operands[0], operands[3]);
2728   /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask.  */
2729   else if (operands[1] == CONST0_RTX (<MODE>mode)
2730            && operands[2] == CONSTM1_RTX (<MODE>mode))
2731     emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2732   else
2733     {
2734       if (!REG_P (operands[1]))
2735         operands[1] = force_reg (<MODE>mode, operands[1]);
2736       if (!REG_P (operands[2]))
2737         operands[2] = force_reg (<MODE>mode, operands[2]);
2738       emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2739                                              operands[1], operands[2]));
2740     }
2741
2742   DONE;
2743 })
2744
2745 ;; Patterns comparing two vectors to produce a mask.
2746
2747 (define_expand "vec_cmp<mode><mode>"
2748   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2749           (match_operator 1 "comparison_operator"
2750             [(match_operand:VSDQ_I_DI 2 "register_operand")
2751              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2752   "TARGET_SIMD"
2753 {
2754   rtx mask = operands[0];
2755   enum rtx_code code = GET_CODE (operands[1]);
2756
2757   switch (code)
2758     {
2759     case NE:
2760     case LE:
2761     case LT:
2762     case GE:
2763     case GT:
2764     case EQ:
2765       if (operands[3] == CONST0_RTX (<MODE>mode))
2766         break;
2767
2768       /* Fall through.  */
2769     default:
2770       if (!REG_P (operands[3]))
2771         operands[3] = force_reg (<MODE>mode, operands[3]);
2772
2773       break;
2774     }
2775
2776   switch (code)
2777     {
2778     case LT:
2779       emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2780       break;
2781
2782     case GE:
2783       emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2784       break;
2785
2786     case LE:
2787       emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2788       break;
2789
2790     case GT:
2791       emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2792       break;
2793
2794     case LTU:
2795       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2796       break;
2797
2798     case GEU:
2799       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2800       break;
2801
2802     case LEU:
2803       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2804       break;
2805
2806     case GTU:
2807       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2808       break;
2809
2810     case NE:
2811       /* Handle NE as !EQ.  */
2812       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2813       emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2814       break;
2815
2816     case EQ:
2817       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2818       break;
2819
2820     default:
2821       gcc_unreachable ();
2822     }
2823
2824   DONE;
2825 })
2826
2827 (define_expand "vec_cmp<mode><v_int_equiv>"
2828   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2829         (match_operator 1 "comparison_operator"
2830             [(match_operand:VDQF 2 "register_operand")
2831              (match_operand:VDQF 3 "nonmemory_operand")]))]
2832   "TARGET_SIMD"
2833 {
2834   int use_zero_form = 0;
2835   enum rtx_code code = GET_CODE (operands[1]);
2836   rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2837
2838   rtx (*comparison) (rtx, rtx, rtx) = NULL;
2839
2840   switch (code)
2841     {
2842     case LE:
2843     case LT:
2844     case GE:
2845     case GT:
2846     case EQ:
2847       if (operands[3] == CONST0_RTX (<MODE>mode))
2848         {
2849           use_zero_form = 1;
2850           break;
2851         }
2852       /* Fall through.  */
2853     default:
2854       if (!REG_P (operands[3]))
2855         operands[3] = force_reg (<MODE>mode, operands[3]);
2856
2857       break;
2858     }
2859
2860   switch (code)
2861     {
2862     case LT:
2863       if (use_zero_form)
2864         {
2865           comparison = gen_aarch64_cmlt<mode>;
2866           break;
2867         }
2868       /* Fall through.  */
2869     case UNLT:
2870       std::swap (operands[2], operands[3]);
2871       /* Fall through.  */
2872     case UNGT:
2873     case GT:
2874       comparison = gen_aarch64_cmgt<mode>;
2875       break;
2876     case LE:
2877       if (use_zero_form)
2878         {
2879           comparison = gen_aarch64_cmle<mode>;
2880           break;
2881         }
2882       /* Fall through.  */
2883     case UNLE:
2884       std::swap (operands[2], operands[3]);
2885       /* Fall through.  */
2886     case UNGE:
2887     case GE:
2888       comparison = gen_aarch64_cmge<mode>;
2889       break;
2890     case NE:
2891     case EQ:
2892       comparison = gen_aarch64_cmeq<mode>;
2893       break;
2894     case UNEQ:
2895     case ORDERED:
2896     case UNORDERED:
2897     case LTGT:
2898       break;
2899     default:
2900       gcc_unreachable ();
2901     }
2902
2903   switch (code)
2904     {
2905     case UNGE:
2906     case UNGT:
2907     case UNLE:
2908     case UNLT:
2909       {
2910         /* All of the above must not raise any FP exceptions.  Thus we first
2911            check each operand for NaNs and force any elements containing NaN to
2912            zero before using them in the compare.
2913            Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2914                                      (cm<cc> (isnan (a) ? 0.0 : a,
2915                                               isnan (b) ? 0.0 : b))
2916            We use the following transformations for doing the comparisions:
2917            a UNGE b -> a GE b
2918            a UNGT b -> a GT b
2919            a UNLE b -> b GE a
2920            a UNLT b -> b GT a.  */
2921
2922         rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2923         rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2924         rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2925         emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2926         emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2927         emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2928         emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2929                                           lowpart_subreg (<V_INT_EQUIV>mode,
2930                                                           operands[2],
2931                                                           <MODE>mode)));
2932         emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2933                                           lowpart_subreg (<V_INT_EQUIV>mode,
2934                                                           operands[3],
2935                                                           <MODE>mode)));
2936         gcc_assert (comparison != NULL);
2937         emit_insn (comparison (operands[0],
2938                                lowpart_subreg (<MODE>mode,
2939                                                tmp0, <V_INT_EQUIV>mode),
2940                                lowpart_subreg (<MODE>mode,
2941                                                tmp1, <V_INT_EQUIV>mode)));
2942         emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2943       }
2944       break;
2945
2946     case LT:
2947     case LE:
2948     case GT:
2949     case GE:
2950     case EQ:
2951     case NE:
2952       /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
2953          As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
2954          a GE b -> a GE b
2955          a GT b -> a GT b
2956          a LE b -> b GE a
2957          a LT b -> b GT a
2958          a EQ b -> a EQ b
2959          a NE b -> ~(a EQ b)  */
2960       gcc_assert (comparison != NULL);
2961       emit_insn (comparison (operands[0], operands[2], operands[3]));
2962       if (code == NE)
2963         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2964       break;
2965
2966     case LTGT:
2967       /* LTGT is not guranteed to not generate a FP exception.  So let's
2968          go the faster way : ((a > b) || (b > a)).  */
2969       emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2970                                          operands[2], operands[3]));
2971       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2972       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2973       break;
2974
2975     case ORDERED:
2976     case UNORDERED:
2977     case UNEQ:
2978       /* cmeq (a, a) & cmeq (b, b).  */
2979       emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2980                                          operands[2], operands[2]));
2981       emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2982       emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2983
2984       if (code == UNORDERED)
2985         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2986       else if (code == UNEQ)
2987         {
2988           emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
2989           emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
2990         }
2991       break;
2992
2993     default:
2994       gcc_unreachable ();
2995     }
2996
2997   DONE;
2998 })
2999
3000 (define_expand "vec_cmpu<mode><mode>"
3001   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3002           (match_operator 1 "comparison_operator"
3003             [(match_operand:VSDQ_I_DI 2 "register_operand")
3004              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
3005   "TARGET_SIMD"
3006 {
3007   emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
3008                                       operands[2], operands[3]));
3009   DONE;
3010 })
3011
3012 (define_expand "vcond<mode><mode>"
3013   [(set (match_operand:VALLDI 0 "register_operand")
3014         (if_then_else:VALLDI
3015           (match_operator 3 "comparison_operator"
3016             [(match_operand:VALLDI 4 "register_operand")
3017              (match_operand:VALLDI 5 "nonmemory_operand")])
3018           (match_operand:VALLDI 1 "nonmemory_operand")
3019           (match_operand:VALLDI 2 "nonmemory_operand")))]
3020   "TARGET_SIMD"
3021 {
3022   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3023   enum rtx_code code = GET_CODE (operands[3]);
3024
3025   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3026      it as well as switch operands 1/2 in order to avoid the additional
3027      NOT instruction.  */
3028   if (code == NE)
3029     {
3030       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3031                                     operands[4], operands[5]);
3032       std::swap (operands[1], operands[2]);
3033     }
3034   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3035                                              operands[4], operands[5]));
3036   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3037                                                  operands[2], mask));
3038
3039   DONE;
3040 })
3041
3042 (define_expand "vcond<v_cmp_mixed><mode>"
3043   [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
3044         (if_then_else:<V_cmp_mixed>
3045           (match_operator 3 "comparison_operator"
3046             [(match_operand:VDQF_COND 4 "register_operand")
3047              (match_operand:VDQF_COND 5 "nonmemory_operand")])
3048           (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
3049           (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
3050   "TARGET_SIMD"
3051 {
3052   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3053   enum rtx_code code = GET_CODE (operands[3]);
3054
3055   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3056      it as well as switch operands 1/2 in order to avoid the additional
3057      NOT instruction.  */
3058   if (code == NE)
3059     {
3060       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3061                                     operands[4], operands[5]);
3062       std::swap (operands[1], operands[2]);
3063     }
3064   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3065                                              operands[4], operands[5]));
3066   emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
3067                                                 operands[0], operands[1],
3068                                                 operands[2], mask));
3069
3070   DONE;
3071 })
3072
3073 (define_expand "vcondu<mode><mode>"
3074   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3075         (if_then_else:VSDQ_I_DI
3076           (match_operator 3 "comparison_operator"
3077             [(match_operand:VSDQ_I_DI 4 "register_operand")
3078              (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
3079           (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
3080           (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
3081   "TARGET_SIMD"
3082 {
3083   rtx mask = gen_reg_rtx (<MODE>mode);
3084   enum rtx_code code = GET_CODE (operands[3]);
3085
3086   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3087      it as well as switch operands 1/2 in order to avoid the additional
3088      NOT instruction.  */
3089   if (code == NE)
3090     {
3091       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3092                                     operands[4], operands[5]);
3093       std::swap (operands[1], operands[2]);
3094     }
3095   emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
3096                                       operands[4], operands[5]));
3097   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3098                                                  operands[2], mask));
3099   DONE;
3100 })
3101
3102 (define_expand "vcondu<mode><v_cmp_mixed>"
3103   [(set (match_operand:VDQF 0 "register_operand")
3104         (if_then_else:VDQF
3105           (match_operator 3 "comparison_operator"
3106             [(match_operand:<V_cmp_mixed> 4 "register_operand")
3107              (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
3108           (match_operand:VDQF 1 "nonmemory_operand")
3109           (match_operand:VDQF 2 "nonmemory_operand")))]
3110   "TARGET_SIMD"
3111 {
3112   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3113   enum rtx_code code = GET_CODE (operands[3]);
3114
3115   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3116      it as well as switch operands 1/2 in order to avoid the additional
3117      NOT instruction.  */
3118   if (code == NE)
3119     {
3120       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3121                                     operands[4], operands[5]);
3122       std::swap (operands[1], operands[2]);
3123     }
3124   emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
3125                                                   mask, operands[3],
3126                                                   operands[4], operands[5]));
3127   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3128                                                  operands[2], mask));
3129   DONE;
3130 })
3131
3132 ;; Patterns for AArch64 SIMD Intrinsics.
3133
3134 ;; Lane extraction with sign extension to general purpose register.
3135 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3136   [(set (match_operand:GPI 0 "register_operand" "=r")
3137         (sign_extend:GPI
3138           (vec_select:<VEL>
3139             (match_operand:VDQQH 1 "register_operand" "w")
3140             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3141   "TARGET_SIMD"
3142   {
3143     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3144     return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3145   }
3146   [(set_attr "type" "neon_to_gp<q>")]
3147 )
3148
3149 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
3150   [(set (match_operand:GPI 0 "register_operand" "=r")
3151         (zero_extend:GPI
3152           (vec_select:<VEL>
3153             (match_operand:VDQQH 1 "register_operand" "w")
3154             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3155   "TARGET_SIMD"
3156   {
3157     operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3158                                            INTVAL (operands[2]));
3159     return "umov\\t%w0, %1.<Vetype>[%2]";
3160   }
3161   [(set_attr "type" "neon_to_gp<q>")]
3162 )
3163
3164 ;; Lane extraction of a value, neither sign nor zero extension
3165 ;; is guaranteed so upper bits should be considered undefined.
3166 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3167 (define_insn "aarch64_get_lane<mode>"
3168   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3169         (vec_select:<VEL>
3170           (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3171           (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3172   "TARGET_SIMD"
3173   {
3174     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3175     switch (which_alternative)
3176       {
3177         case 0:
3178           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3179         case 1:
3180           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3181         case 2:
3182           return "st1\\t{%1.<Vetype>}[%2], %0";
3183         default:
3184           gcc_unreachable ();
3185       }
3186   }
3187   [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3188 )
3189
3190 (define_insn "load_pair_lanes<mode>"
3191   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3192         (vec_concat:<VDBL>
3193            (match_operand:VDC 1 "memory_operand" "Utq")
3194            (match_operand:VDC 2 "memory_operand" "m")))]
3195   "TARGET_SIMD && !STRICT_ALIGNMENT
3196    && rtx_equal_p (XEXP (operands[2], 0),
3197                    plus_constant (Pmode,
3198                                   XEXP (operands[1], 0),
3199                                   GET_MODE_SIZE (<MODE>mode)))"
3200   "ldr\\t%q0, %1"
3201   [(set_attr "type" "neon_load1_1reg_q")]
3202 )
3203
3204 (define_insn "store_pair_lanes<mode>"
3205   [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
3206         (vec_concat:<VDBL>
3207            (match_operand:VDC 1 "register_operand" "w, r")
3208            (match_operand:VDC 2 "register_operand" "w, r")))]
3209   "TARGET_SIMD"
3210   "@
3211    stp\\t%d1, %d2, %y0
3212    stp\\t%x1, %x2, %y0"
3213   [(set_attr "type" "neon_stp, store_16")]
3214 )
3215
3216 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3217 ;; dest vector.
3218
3219 (define_insn "@aarch64_combinez<mode>"
3220   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3221         (vec_concat:<VDBL>
3222           (match_operand:VDC 1 "general_operand" "w,?r,m")
3223           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3224   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3225   "@
3226    mov\\t%0.8b, %1.8b
3227    fmov\t%d0, %1
3228    ldr\\t%d0, %1"
3229   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3230    (set_attr "arch" "simd,fp,simd")]
3231 )
3232
3233 (define_insn "@aarch64_combinez_be<mode>"
3234   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3235         (vec_concat:<VDBL>
3236           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3237           (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3238   "TARGET_SIMD && BYTES_BIG_ENDIAN"
3239   "@
3240    mov\\t%0.8b, %1.8b
3241    fmov\t%d0, %1
3242    ldr\\t%d0, %1"
3243   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3244    (set_attr "arch" "simd,fp,simd")]
3245 )
3246
3247 (define_expand "aarch64_combine<mode>"
3248   [(match_operand:<VDBL> 0 "register_operand")
3249    (match_operand:VDC 1 "register_operand")
3250    (match_operand:VDC 2 "register_operand")]
3251   "TARGET_SIMD"
3252 {
3253   aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3254
3255   DONE;
3256 }
3257 )
3258
3259 (define_expand "@aarch64_simd_combine<mode>"
3260   [(match_operand:<VDBL> 0 "register_operand")
3261    (match_operand:VDC 1 "register_operand")
3262    (match_operand:VDC 2 "register_operand")]
3263   "TARGET_SIMD"
3264   {
3265     emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3266     emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3267     DONE;
3268   }
3269 [(set_attr "type" "multiple")]
3270 )
3271
3272 ;; <su><addsub>l<q>.
3273
3274 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3275  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3276        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3277                            (match_operand:VQW 1 "register_operand" "w")
3278                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3279                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3280                            (match_operand:VQW 2 "register_operand" "w")
3281                            (match_dup 3)))))]
3282   "TARGET_SIMD"
3283   "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3284   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3285 )
3286
3287 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3288  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3289        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3290                            (match_operand:VQW 1 "register_operand" "w")
3291                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3292                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3293                            (match_operand:VQW 2 "register_operand" "w")
3294                            (match_dup 3)))))]
3295   "TARGET_SIMD"
3296   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3297   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3298 )
3299
3300
3301 (define_expand "aarch64_saddl2<mode>"
3302   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3303    (match_operand:VQW 1 "register_operand" "w")
3304    (match_operand:VQW 2 "register_operand" "w")]
3305   "TARGET_SIMD"
3306 {
3307   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3308   emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3309                                                   operands[2], p));
3310   DONE;
3311 })
3312
3313 (define_expand "aarch64_uaddl2<mode>"
3314   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3315    (match_operand:VQW 1 "register_operand" "w")
3316    (match_operand:VQW 2 "register_operand" "w")]
3317   "TARGET_SIMD"
3318 {
3319   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3320   emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3321                                                   operands[2], p));
3322   DONE;
3323 })
3324
3325 (define_expand "aarch64_ssubl2<mode>"
3326   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3327    (match_operand:VQW 1 "register_operand" "w")
3328    (match_operand:VQW 2 "register_operand" "w")]
3329   "TARGET_SIMD"
3330 {
3331   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3332   emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3333                                                 operands[2], p));
3334   DONE;
3335 })
3336
3337 (define_expand "aarch64_usubl2<mode>"
3338   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3339    (match_operand:VQW 1 "register_operand" "w")
3340    (match_operand:VQW 2 "register_operand" "w")]
3341   "TARGET_SIMD"
3342 {
3343   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3344   emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3345                                                 operands[2], p));
3346   DONE;
3347 })
3348
3349 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3350  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3351        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3352                            (match_operand:VD_BHSI 1 "register_operand" "w"))
3353                        (ANY_EXTEND:<VWIDE>
3354                            (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3355   "TARGET_SIMD"
3356   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3357   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3358 )
3359
3360 ;; <su><addsub>w<q>.
3361
3362 (define_expand "widen_ssum<mode>3"
3363   [(set (match_operand:<VDBLW> 0 "register_operand" "")
3364         (plus:<VDBLW> (sign_extend:<VDBLW>
3365                         (match_operand:VQW 1 "register_operand" ""))
3366                       (match_operand:<VDBLW> 2 "register_operand" "")))]
3367   "TARGET_SIMD"
3368   {
3369     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3370     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3371
3372     emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3373                                                 operands[1], p));
3374     emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3375     DONE;
3376   }
3377 )
3378
3379 (define_expand "widen_ssum<mode>3"
3380   [(set (match_operand:<VWIDE> 0 "register_operand" "")
3381         (plus:<VWIDE> (sign_extend:<VWIDE>
3382                         (match_operand:VD_BHSI 1 "register_operand" ""))
3383                       (match_operand:<VWIDE> 2 "register_operand" "")))]
3384   "TARGET_SIMD"
3385 {
3386   emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3387   DONE;
3388 })
3389
3390 (define_expand "widen_usum<mode>3"
3391   [(set (match_operand:<VDBLW> 0 "register_operand" "")
3392         (plus:<VDBLW> (zero_extend:<VDBLW>
3393                         (match_operand:VQW 1 "register_operand" ""))
3394                       (match_operand:<VDBLW> 2 "register_operand" "")))]
3395   "TARGET_SIMD"
3396   {
3397     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3398     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3399
3400     emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3401                                                  operands[1], p));
3402     emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3403     DONE;
3404   }
3405 )
3406
3407 (define_expand "widen_usum<mode>3"
3408   [(set (match_operand:<VWIDE> 0 "register_operand" "")
3409         (plus:<VWIDE> (zero_extend:<VWIDE>
3410                         (match_operand:VD_BHSI 1 "register_operand" ""))
3411                       (match_operand:<VWIDE> 2 "register_operand" "")))]
3412   "TARGET_SIMD"
3413 {
3414   emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3415   DONE;
3416 })
3417
3418 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
3419   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3420         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3421           (ANY_EXTEND:<VWIDE>
3422             (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3423   "TARGET_SIMD"
3424   "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3425   [(set_attr "type" "neon_sub_widen")]
3426 )
3427
3428 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
3429   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3430         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3431           (ANY_EXTEND:<VWIDE>
3432             (vec_select:<VHALF>
3433               (match_operand:VQW 2 "register_operand" "w")
3434               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3435   "TARGET_SIMD"
3436   "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3437   [(set_attr "type" "neon_sub_widen")]
3438 )
3439
3440 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
3441   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3442         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3443           (ANY_EXTEND:<VWIDE>
3444             (vec_select:<VHALF>
3445               (match_operand:VQW 2 "register_operand" "w")
3446               (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3447   "TARGET_SIMD"
3448   "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3449   [(set_attr "type" "neon_sub_widen")]
3450 )
3451
3452 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
3453   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3454         (plus:<VWIDE>
3455           (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
3456           (match_operand:<VWIDE> 1 "register_operand" "w")))]
3457   "TARGET_SIMD"
3458   "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3459   [(set_attr "type" "neon_add_widen")]
3460 )
3461
3462 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
3463   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3464         (plus:<VWIDE>
3465           (ANY_EXTEND:<VWIDE>
3466             (vec_select:<VHALF>
3467               (match_operand:VQW 2 "register_operand" "w")
3468               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3469           (match_operand:<VWIDE> 1 "register_operand" "w")))]
3470   "TARGET_SIMD"
3471   "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3472   [(set_attr "type" "neon_add_widen")]
3473 )
3474
3475 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
3476   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3477         (plus:<VWIDE>
3478           (ANY_EXTEND:<VWIDE>
3479             (vec_select:<VHALF>
3480               (match_operand:VQW 2 "register_operand" "w")
3481               (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3482           (match_operand:<VWIDE> 1 "register_operand" "w")))]
3483   "TARGET_SIMD"
3484   "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3485   [(set_attr "type" "neon_add_widen")]
3486 )
3487
3488 (define_expand "aarch64_saddw2<mode>"
3489   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3490    (match_operand:<VWIDE> 1 "register_operand" "w")
3491    (match_operand:VQW 2 "register_operand" "w")]
3492   "TARGET_SIMD"
3493 {
3494   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3495   emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3496                                                 operands[2], p));
3497   DONE;
3498 })
3499
3500 (define_expand "aarch64_uaddw2<mode>"
3501   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3502    (match_operand:<VWIDE> 1 "register_operand" "w")
3503    (match_operand:VQW 2 "register_operand" "w")]
3504   "TARGET_SIMD"
3505 {
3506   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3507   emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3508                                                 operands[2], p));
3509   DONE;
3510 })
3511
3512
3513 (define_expand "aarch64_ssubw2<mode>"
3514   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3515    (match_operand:<VWIDE> 1 "register_operand" "w")
3516    (match_operand:VQW 2 "register_operand" "w")]
3517   "TARGET_SIMD"
3518 {
3519   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3520   emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3521                                                 operands[2], p));
3522   DONE;
3523 })
3524
3525 (define_expand "aarch64_usubw2<mode>"
3526   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3527    (match_operand:<VWIDE> 1 "register_operand" "w")
3528    (match_operand:VQW 2 "register_operand" "w")]
3529   "TARGET_SIMD"
3530 {
3531   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3532   emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3533                                                 operands[2], p));
3534   DONE;
3535 })
3536
3537 ;; <su><r>h<addsub>.
3538
3539 (define_expand "<u>avg<mode>3_floor"
3540   [(set (match_operand:VDQ_BHSI 0 "register_operand")
3541         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3542                           (match_operand:VDQ_BHSI 2 "register_operand")]
3543                          HADD))]
3544   "TARGET_SIMD"
3545 )
3546
3547 (define_expand "<u>avg<mode>3_ceil"
3548   [(set (match_operand:VDQ_BHSI 0 "register_operand")
3549         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3550                           (match_operand:VDQ_BHSI 2 "register_operand")]
3551                          RHADD))]
3552   "TARGET_SIMD"
3553 )
3554
3555 (define_insn "aarch64_<sur>h<addsub><mode>"
3556   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3557         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3558                       (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3559                      HADDSUB))]
3560   "TARGET_SIMD"
3561   "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3562   [(set_attr "type" "neon_<addsub>_halve<q>")]
3563 )
3564
3565 ;; <r><addsub>hn<q>.
3566
3567 (define_insn "aarch64_<sur><addsub>hn<mode>"
3568   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3569         (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3570                             (match_operand:VQN 2 "register_operand" "w")]
3571                            ADDSUBHN))]
3572   "TARGET_SIMD"
3573   "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3574   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3575 )
3576
3577 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3578   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3579         (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3580                              (match_operand:VQN 2 "register_operand" "w")
3581                              (match_operand:VQN 3 "register_operand" "w")]
3582                             ADDSUBHN2))]
3583   "TARGET_SIMD"
3584   "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3585   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3586 )
3587
3588 ;; pmul.
3589
3590 (define_insn "aarch64_pmul<mode>"
3591   [(set (match_operand:VB 0 "register_operand" "=w")
3592         (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3593                     (match_operand:VB 2 "register_operand" "w")]
3594                    UNSPEC_PMUL))]
3595  "TARGET_SIMD"
3596  "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3597   [(set_attr "type" "neon_mul_<Vetype><q>")]
3598 )
3599
3600 ;; fmulx.
3601
3602 (define_insn "aarch64_fmulx<mode>"
3603   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3604         (unspec:VHSDF_HSDF
3605           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3606            (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3607            UNSPEC_FMULX))]
3608  "TARGET_SIMD"
3609  "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3610  [(set_attr "type" "neon_fp_mul_<stype>")]
3611 )
3612
3613 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3614
3615 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3616   [(set (match_operand:VDQSF 0 "register_operand" "=w")
3617         (unspec:VDQSF
3618          [(match_operand:VDQSF 1 "register_operand" "w")
3619           (vec_duplicate:VDQSF
3620            (vec_select:<VEL>
3621             (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3622             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3623          UNSPEC_FMULX))]
3624   "TARGET_SIMD"
3625   {
3626     operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3627     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3628   }
3629   [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3630 )
3631
3632 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3633
3634 (define_insn "*aarch64_mulx_elt<mode>"
3635   [(set (match_operand:VDQF 0 "register_operand" "=w")
3636         (unspec:VDQF
3637          [(match_operand:VDQF 1 "register_operand" "w")
3638           (vec_duplicate:VDQF
3639            (vec_select:<VEL>
3640             (match_operand:VDQF 2 "register_operand" "w")
3641             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3642          UNSPEC_FMULX))]
3643   "TARGET_SIMD"
3644   {
3645     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3646     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3647   }
3648   [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3649 )
3650
3651 ;; vmulxq_lane
3652
3653 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3654   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3655         (unspec:VHSDF
3656          [(match_operand:VHSDF 1 "register_operand" "w")
3657           (vec_duplicate:VHSDF
3658             (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3659          UNSPEC_FMULX))]
3660   "TARGET_SIMD"
3661   "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3662   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3663 )
3664
3665 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3666 ;; vmulxd_lane_f64 ==  vmulx_lane_f64
3667 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3668
3669 (define_insn "*aarch64_vgetfmulx<mode>"
3670   [(set (match_operand:<VEL> 0 "register_operand" "=w")
3671         (unspec:<VEL>
3672          [(match_operand:<VEL> 1 "register_operand" "w")
3673           (vec_select:<VEL>
3674            (match_operand:VDQF 2 "register_operand" "w")
3675             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3676          UNSPEC_FMULX))]
3677   "TARGET_SIMD"
3678   {
3679     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3680     return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3681   }
3682   [(set_attr "type" "fmul<Vetype>")]
3683 )
3684 ;; <su>q<addsub>
3685
3686 (define_insn "aarch64_<su_optab><optab><mode>"
3687   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3688         (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3689                           (match_operand:VSDQ_I 2 "register_operand" "w")))]
3690   "TARGET_SIMD"
3691   "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3692   [(set_attr "type" "neon_<optab><q>")]
3693 )
3694
3695 ;; suqadd and usqadd
3696
3697 (define_insn "aarch64_<sur>qadd<mode>"
3698   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3699         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3700                         (match_operand:VSDQ_I 2 "register_operand" "w")]
3701                        USSUQADD))]
3702   "TARGET_SIMD"
3703   "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3704   [(set_attr "type" "neon_qadd<q>")]
3705 )
3706
3707 ;; sqmovun
3708
3709 (define_insn "aarch64_sqmovun<mode>"
3710   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3711         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3712                             UNSPEC_SQXTUN))]
3713    "TARGET_SIMD"
3714    "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3715    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3716 )
3717
3718 ;; sqmovn and uqmovn
3719
3720 (define_insn "aarch64_<sur>qmovn<mode>"
3721   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3722         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3723                             SUQMOVN))]
3724   "TARGET_SIMD"
3725   "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3726    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3727 )
3728
3729 ;; <su>q<absneg>
3730
3731 (define_insn "aarch64_s<optab><mode>"
3732   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3733         (UNQOPS:VSDQ_I
3734           (match_operand:VSDQ_I 1 "register_operand" "w")))]
3735   "TARGET_SIMD"
3736   "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3737   [(set_attr "type" "neon_<optab><q>")]
3738 )
3739
3740 ;; sq<r>dmulh.
3741
3742 (define_insn "aarch64_sq<r>dmulh<mode>"
3743   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3744         (unspec:VSDQ_HSI
3745           [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3746            (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3747          VQDMULH))]
3748   "TARGET_SIMD"
3749   "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3750   [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3751 )
3752
3753 ;; sq<r>dmulh_lane
3754
3755 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3756   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3757         (unspec:VDQHS
3758           [(match_operand:VDQHS 1 "register_operand" "w")
3759            (vec_select:<VEL>
3760              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3761              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3762          VQDMULH))]
3763   "TARGET_SIMD"
3764   "*
3765    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3766    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3767   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3768 )
3769
3770 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3771   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3772         (unspec:VDQHS
3773           [(match_operand:VDQHS 1 "register_operand" "w")
3774            (vec_select:<VEL>
3775              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3776              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3777          VQDMULH))]
3778   "TARGET_SIMD"
3779   "*
3780    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3781    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3782   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3783 )
3784
3785 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3786   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3787         (unspec:SD_HSI
3788           [(match_operand:SD_HSI 1 "register_operand" "w")
3789            (vec_select:<VEL>
3790              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3791              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3792          VQDMULH))]
3793   "TARGET_SIMD"
3794   "*
3795    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3796    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3797   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3798 )
3799
3800 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3801   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3802         (unspec:SD_HSI
3803           [(match_operand:SD_HSI 1 "register_operand" "w")
3804            (vec_select:<VEL>
3805              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3806              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3807          VQDMULH))]
3808   "TARGET_SIMD"
3809   "*
3810    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3811    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3812   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3813 )
3814
3815 ;; sqrdml[as]h.
3816
3817 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3818   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3819         (unspec:VSDQ_HSI
3820           [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3821            (match_operand:VSDQ_HSI 2 "register_operand" "w")
3822            (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3823           SQRDMLH_AS))]
3824    "TARGET_SIMD_RDMA"
3825    "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3826    [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3827 )
3828
3829 ;; sqrdml[as]h_lane.
3830
3831 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3832   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3833         (unspec:VDQHS
3834           [(match_operand:VDQHS 1 "register_operand" "0")
3835            (match_operand:VDQHS 2 "register_operand" "w")
3836            (vec_select:<VEL>
3837              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3838              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3839           SQRDMLH_AS))]
3840    "TARGET_SIMD_RDMA"
3841    {
3842      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3843      return
3844       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3845    }
3846    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3847 )
3848
3849 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3850   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3851         (unspec:SD_HSI
3852           [(match_operand:SD_HSI 1 "register_operand" "0")
3853            (match_operand:SD_HSI 2 "register_operand" "w")
3854            (vec_select:<VEL>
3855              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3856              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3857           SQRDMLH_AS))]
3858    "TARGET_SIMD_RDMA"
3859    {
3860      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3861      return
3862       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3863    }
3864    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3865 )
3866
3867 ;; sqrdml[as]h_laneq.
3868
3869 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3870   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3871         (unspec:VDQHS
3872           [(match_operand:VDQHS 1 "register_operand" "0")
3873            (match_operand:VDQHS 2 "register_operand" "w")
3874            (vec_select:<VEL>
3875              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3876              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3877           SQRDMLH_AS))]
3878    "TARGET_SIMD_RDMA"
3879    {
3880      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3881      return
3882       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3883    }
3884    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3885 )
3886
3887 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3888   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3889         (unspec:SD_HSI
3890           [(match_operand:SD_HSI 1 "register_operand" "0")
3891            (match_operand:SD_HSI 2 "register_operand" "w")
3892            (vec_select:<VEL>
3893              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3894              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3895           SQRDMLH_AS))]
3896    "TARGET_SIMD_RDMA"
3897    {
3898      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3899      return
3900       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3901    }
3902    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3903 )
3904
3905 ;; vqdml[sa]l
3906
3907 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3908   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3909         (SBINQOPS:<VWIDE>
3910           (match_operand:<VWIDE> 1 "register_operand" "0")
3911           (ss_ashift:<VWIDE>
3912               (mult:<VWIDE>
3913                 (sign_extend:<VWIDE>
3914                       (match_operand:VSD_HSI 2 "register_operand" "w"))
3915                 (sign_extend:<VWIDE>
3916                       (match_operand:VSD_HSI 3 "register_operand" "w")))
3917               (const_int 1))))]
3918   "TARGET_SIMD"
3919   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3920   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3921 )
3922
3923 ;; vqdml[sa]l_lane
3924
3925 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3926   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3927         (SBINQOPS:<VWIDE>
3928           (match_operand:<VWIDE> 1 "register_operand" "0")
3929           (ss_ashift:<VWIDE>
3930             (mult:<VWIDE>
3931               (sign_extend:<VWIDE>
3932                 (match_operand:VD_HSI 2 "register_operand" "w"))
3933               (sign_extend:<VWIDE>
3934                 (vec_duplicate:VD_HSI
3935                   (vec_select:<VEL>
3936                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3937                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3938               ))
3939             (const_int 1))))]
3940   "TARGET_SIMD"
3941   {
3942     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3943     return
3944       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3945   }
3946   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3947 )
3948
3949 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3950   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3951         (SBINQOPS:<VWIDE>
3952           (match_operand:<VWIDE> 1 "register_operand" "0")
3953           (ss_ashift:<VWIDE>
3954             (mult:<VWIDE>
3955               (sign_extend:<VWIDE>
3956                 (match_operand:VD_HSI 2 "register_operand" "w"))
3957               (sign_extend:<VWIDE>
3958                 (vec_duplicate:VD_HSI
3959                   (vec_select:<VEL>
3960                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3961                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3962               ))
3963             (const_int 1))))]
3964   "TARGET_SIMD"
3965   {
3966     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3967     return
3968       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3969   }
3970   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3971 )
3972
3973 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3974   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3975         (SBINQOPS:<VWIDE>
3976           (match_operand:<VWIDE> 1 "register_operand" "0")
3977           (ss_ashift:<VWIDE>
3978             (mult:<VWIDE>
3979               (sign_extend:<VWIDE>
3980                 (match_operand:SD_HSI 2 "register_operand" "w"))
3981               (sign_extend:<VWIDE>
3982                 (vec_select:<VEL>
3983                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3984                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3985               )
3986             (const_int 1))))]
3987   "TARGET_SIMD"
3988   {
3989     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3990     return
3991       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3992   }
3993   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3994 )
3995
3996 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3997   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3998         (SBINQOPS:<VWIDE>
3999           (match_operand:<VWIDE> 1 "register_operand" "0")
4000           (ss_ashift:<VWIDE>
4001             (mult:<VWIDE>
4002               (sign_extend:<VWIDE>
4003                 (match_operand:SD_HSI 2 "register_operand" "w"))
4004               (sign_extend:<VWIDE>
4005                 (vec_select:<VEL>
4006                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4007                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4008               )
4009             (const_int 1))))]
4010   "TARGET_SIMD"
4011   {
4012     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4013     return
4014       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4015   }
4016   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4017 )
4018
4019 ;; vqdml[sa]l_n
4020
4021 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
4022   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4023         (SBINQOPS:<VWIDE>
4024           (match_operand:<VWIDE> 1 "register_operand" "0")
4025           (ss_ashift:<VWIDE>
4026               (mult:<VWIDE>
4027                 (sign_extend:<VWIDE>
4028                       (match_operand:VD_HSI 2 "register_operand" "w"))
4029                 (sign_extend:<VWIDE>
4030                   (vec_duplicate:VD_HSI
4031                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4032               (const_int 1))))]
4033   "TARGET_SIMD"
4034   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4035   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4036 )
4037
4038 ;; sqdml[as]l2
4039
4040 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
4041   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4042         (SBINQOPS:<VWIDE>
4043          (match_operand:<VWIDE> 1 "register_operand" "0")
4044          (ss_ashift:<VWIDE>
4045              (mult:<VWIDE>
4046                (sign_extend:<VWIDE>
4047                  (vec_select:<VHALF>
4048                      (match_operand:VQ_HSI 2 "register_operand" "w")
4049                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4050                (sign_extend:<VWIDE>
4051                  (vec_select:<VHALF>
4052                      (match_operand:VQ_HSI 3 "register_operand" "w")
4053                      (match_dup 4))))
4054              (const_int 1))))]
4055   "TARGET_SIMD"
4056   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4057   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4058 )
4059
4060 (define_expand "aarch64_sqdmlal2<mode>"
4061   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4062    (match_operand:<VWIDE> 1 "register_operand" "w")
4063    (match_operand:VQ_HSI 2 "register_operand" "w")
4064    (match_operand:VQ_HSI 3 "register_operand" "w")]
4065   "TARGET_SIMD"
4066 {
4067   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4068   emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
4069                                                   operands[2], operands[3], p));
4070   DONE;
4071 })
4072
4073 (define_expand "aarch64_sqdmlsl2<mode>"
4074   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4075    (match_operand:<VWIDE> 1 "register_operand" "w")
4076    (match_operand:VQ_HSI 2 "register_operand" "w")
4077    (match_operand:VQ_HSI 3 "register_operand" "w")]
4078   "TARGET_SIMD"
4079 {
4080   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4081   emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
4082                                                   operands[2], operands[3], p));
4083   DONE;
4084 })
4085
4086 ;; vqdml[sa]l2_lane
4087
4088 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
4089   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4090         (SBINQOPS:<VWIDE>
4091           (match_operand:<VWIDE> 1 "register_operand" "0")
4092           (ss_ashift:<VWIDE>
4093               (mult:<VWIDE>
4094                 (sign_extend:<VWIDE>
4095                   (vec_select:<VHALF>
4096                     (match_operand:VQ_HSI 2 "register_operand" "w")
4097                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4098                 (sign_extend:<VWIDE>
4099                   (vec_duplicate:<VHALF>
4100                     (vec_select:<VEL>
4101                       (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4102                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4103                     ))))
4104               (const_int 1))))]
4105   "TARGET_SIMD"
4106   {
4107     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4108     return
4109      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4110   }
4111   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4112 )
4113
4114 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
4115   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4116         (SBINQOPS:<VWIDE>
4117           (match_operand:<VWIDE> 1 "register_operand" "0")
4118           (ss_ashift:<VWIDE>
4119               (mult:<VWIDE>
4120                 (sign_extend:<VWIDE>
4121                   (vec_select:<VHALF>
4122                     (match_operand:VQ_HSI 2 "register_operand" "w")
4123                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4124                 (sign_extend:<VWIDE>
4125                   (vec_duplicate:<VHALF>
4126                     (vec_select:<VEL>
4127                       (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4128                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4129                     ))))
4130               (const_int 1))))]
4131   "TARGET_SIMD"
4132   {
4133     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4134     return
4135      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4136   }
4137   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4138 )
4139
4140 (define_expand "aarch64_sqdmlal2_lane<mode>"
4141   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4142    (match_operand:<VWIDE> 1 "register_operand" "w")
4143    (match_operand:VQ_HSI 2 "register_operand" "w")
4144    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4145    (match_operand:SI 4 "immediate_operand" "i")]
4146   "TARGET_SIMD"
4147 {
4148   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4149   emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
4150                                                        operands[2], operands[3],
4151                                                        operands[4], p));
4152   DONE;
4153 })
4154
4155 (define_expand "aarch64_sqdmlal2_laneq<mode>"
4156   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4157    (match_operand:<VWIDE> 1 "register_operand" "w")
4158    (match_operand:VQ_HSI 2 "register_operand" "w")
4159    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4160    (match_operand:SI 4 "immediate_operand" "i")]
4161   "TARGET_SIMD"
4162 {
4163   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4164   emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
4165                                                        operands[2], operands[3],
4166                                                        operands[4], p));
4167   DONE;
4168 })
4169
4170 (define_expand "aarch64_sqdmlsl2_lane<mode>"
4171   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4172    (match_operand:<VWIDE> 1 "register_operand" "w")
4173    (match_operand:VQ_HSI 2 "register_operand" "w")
4174    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4175    (match_operand:SI 4 "immediate_operand" "i")]
4176   "TARGET_SIMD"
4177 {
4178   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4179   emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
4180                                                        operands[2], operands[3],
4181                                                        operands[4], p));
4182   DONE;
4183 })
4184
4185 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
4186   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4187    (match_operand:<VWIDE> 1 "register_operand" "w")
4188    (match_operand:VQ_HSI 2 "register_operand" "w")
4189    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4190    (match_operand:SI 4 "immediate_operand" "i")]
4191   "TARGET_SIMD"
4192 {
4193   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4194   emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4195                                                        operands[2], operands[3],
4196                                                        operands[4], p));
4197   DONE;
4198 })
4199
4200 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4201   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4202         (SBINQOPS:<VWIDE>
4203           (match_operand:<VWIDE> 1 "register_operand" "0")
4204           (ss_ashift:<VWIDE>
4205             (mult:<VWIDE>
4206               (sign_extend:<VWIDE>
4207                 (vec_select:<VHALF>
4208                   (match_operand:VQ_HSI 2 "register_operand" "w")
4209                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4210               (sign_extend:<VWIDE>
4211                 (vec_duplicate:<VHALF>
4212                   (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4213             (const_int 1))))]
4214   "TARGET_SIMD"
4215   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4216   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4217 )
4218
4219 (define_expand "aarch64_sqdmlal2_n<mode>"
4220   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4221    (match_operand:<VWIDE> 1 "register_operand" "w")
4222    (match_operand:VQ_HSI 2 "register_operand" "w")
4223    (match_operand:<VEL> 3 "register_operand" "w")]
4224   "TARGET_SIMD"
4225 {
4226   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4227   emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4228                                                     operands[2], operands[3],
4229                                                     p));
4230   DONE;
4231 })
4232
4233 (define_expand "aarch64_sqdmlsl2_n<mode>"
4234   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4235    (match_operand:<VWIDE> 1 "register_operand" "w")
4236    (match_operand:VQ_HSI 2 "register_operand" "w")
4237    (match_operand:<VEL> 3 "register_operand" "w")]
4238   "TARGET_SIMD"
4239 {
4240   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4241   emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4242                                                     operands[2], operands[3],
4243                                                     p));
4244   DONE;
4245 })
4246
4247 ;; vqdmull
4248
4249 (define_insn "aarch64_sqdmull<mode>"
4250   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4251         (ss_ashift:<VWIDE>
4252              (mult:<VWIDE>
4253                (sign_extend:<VWIDE>
4254                      (match_operand:VSD_HSI 1 "register_operand" "w"))
4255                (sign_extend:<VWIDE>
4256                      (match_operand:VSD_HSI 2 "register_operand" "w")))
4257              (const_int 1)))]
4258   "TARGET_SIMD"
4259   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4260   [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4261 )
4262
4263 ;; vqdmull_lane
4264
4265 (define_insn "aarch64_sqdmull_lane<mode>"
4266   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4267         (ss_ashift:<VWIDE>
4268              (mult:<VWIDE>
4269                (sign_extend:<VWIDE>
4270                  (match_operand:VD_HSI 1 "register_operand" "w"))
4271                (sign_extend:<VWIDE>
4272                  (vec_duplicate:VD_HSI
4273                    (vec_select:<VEL>
4274                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4275                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4276                ))
4277              (const_int 1)))]
4278   "TARGET_SIMD"
4279   {
4280     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4281     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4282   }
4283   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4284 )
4285
4286 (define_insn "aarch64_sqdmull_laneq<mode>"
4287   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4288         (ss_ashift:<VWIDE>
4289              (mult:<VWIDE>
4290                (sign_extend:<VWIDE>
4291                  (match_operand:VD_HSI 1 "register_operand" "w"))
4292                (sign_extend:<VWIDE>
4293                  (vec_duplicate:VD_HSI
4294                    (vec_select:<VEL>
4295                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4296                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4297                ))
4298              (const_int 1)))]
4299   "TARGET_SIMD"
4300   {
4301     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4302     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4303   }
4304   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4305 )
4306
4307 (define_insn "aarch64_sqdmull_lane<mode>"
4308   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4309         (ss_ashift:<VWIDE>
4310              (mult:<VWIDE>
4311                (sign_extend:<VWIDE>
4312                  (match_operand:SD_HSI 1 "register_operand" "w"))
4313                (sign_extend:<VWIDE>
4314                  (vec_select:<VEL>
4315                    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4316                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4317                ))
4318              (const_int 1)))]
4319   "TARGET_SIMD"
4320   {
4321     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4322     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4323   }
4324   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4325 )
4326
4327 (define_insn "aarch64_sqdmull_laneq<mode>"
4328   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4329         (ss_ashift:<VWIDE>
4330              (mult:<VWIDE>
4331                (sign_extend:<VWIDE>
4332                  (match_operand:SD_HSI 1 "register_operand" "w"))
4333                (sign_extend:<VWIDE>
4334                  (vec_select:<VEL>
4335                    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4336                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4337                ))
4338              (const_int 1)))]
4339   "TARGET_SIMD"
4340   {
4341     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4342     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4343   }
4344   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4345 )
4346
4347 ;; vqdmull_n
4348
4349 (define_insn "aarch64_sqdmull_n<mode>"
4350   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4351         (ss_ashift:<VWIDE>
4352              (mult:<VWIDE>
4353                (sign_extend:<VWIDE>
4354                  (match_operand:VD_HSI 1 "register_operand" "w"))
4355                (sign_extend:<VWIDE>
4356                  (vec_duplicate:VD_HSI
4357                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4358                )
4359              (const_int 1)))]
4360   "TARGET_SIMD"
4361   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4362   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4363 )
4364
4365 ;; vqdmull2
4366
4367
4368
4369 (define_insn "aarch64_sqdmull2<mode>_internal"
4370   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4371         (ss_ashift:<VWIDE>
4372              (mult:<VWIDE>
4373                (sign_extend:<VWIDE>
4374                  (vec_select:<VHALF>
4375                    (match_operand:VQ_HSI 1 "register_operand" "w")
4376                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4377                (sign_extend:<VWIDE>
4378                  (vec_select:<VHALF>
4379                    (match_operand:VQ_HSI 2 "register_operand" "w")
4380                    (match_dup 3)))
4381                )
4382              (const_int 1)))]
4383   "TARGET_SIMD"
4384   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4385   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4386 )
4387
4388 (define_expand "aarch64_sqdmull2<mode>"
4389   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4390    (match_operand:VQ_HSI 1 "register_operand" "w")
4391    (match_operand:VQ_HSI 2 "register_operand" "w")]
4392   "TARGET_SIMD"
4393 {
4394   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4395   emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4396                                                   operands[2], p));
4397   DONE;
4398 })
4399
4400 ;; vqdmull2_lane
4401
4402 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4403   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4404         (ss_ashift:<VWIDE>
4405              (mult:<VWIDE>
4406                (sign_extend:<VWIDE>
4407                  (vec_select:<VHALF>
4408                    (match_operand:VQ_HSI 1 "register_operand" "w")
4409                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4410                (sign_extend:<VWIDE>
4411                  (vec_duplicate:<VHALF>
4412                    (vec_select:<VEL>
4413                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4414                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4415                ))
4416              (const_int 1)))]
4417   "TARGET_SIMD"
4418   {
4419     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4420     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4421   }
4422   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4423 )
4424
4425 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4426   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4427         (ss_ashift:<VWIDE>
4428              (mult:<VWIDE>
4429                (sign_extend:<VWIDE>
4430                  (vec_select:<VHALF>
4431                    (match_operand:VQ_HSI 1 "register_operand" "w")
4432                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4433                (sign_extend:<VWIDE>
4434                  (vec_duplicate:<VHALF>
4435                    (vec_select:<VEL>
4436                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4437                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4438                ))
4439              (const_int 1)))]
4440   "TARGET_SIMD"
4441   {
4442     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4443     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4444   }
4445   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4446 )
4447
4448 (define_expand "aarch64_sqdmull2_lane<mode>"
4449   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4450    (match_operand:VQ_HSI 1 "register_operand" "w")
4451    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4452    (match_operand:SI 3 "immediate_operand" "i")]
4453   "TARGET_SIMD"
4454 {
4455   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4456   emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4457                                                        operands[2], operands[3],
4458                                                        p));
4459   DONE;
4460 })
4461
4462 (define_expand "aarch64_sqdmull2_laneq<mode>"
4463   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4464    (match_operand:VQ_HSI 1 "register_operand" "w")
4465    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4466    (match_operand:SI 3 "immediate_operand" "i")]
4467   "TARGET_SIMD"
4468 {
4469   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4470   emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4471                                                        operands[2], operands[3],
4472                                                        p));
4473   DONE;
4474 })
4475
4476 ;; vqdmull2_n
4477
4478 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4479   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4480         (ss_ashift:<VWIDE>
4481              (mult:<VWIDE>
4482                (sign_extend:<VWIDE>
4483                  (vec_select:<VHALF>
4484                    (match_operand:VQ_HSI 1 "register_operand" "w")
4485                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4486                (sign_extend:<VWIDE>
4487                  (vec_duplicate:<VHALF>
4488                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4489                )
4490              (const_int 1)))]
4491   "TARGET_SIMD"
4492   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4493   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4494 )
4495
4496 (define_expand "aarch64_sqdmull2_n<mode>"
4497   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4498    (match_operand:VQ_HSI 1 "register_operand" "w")
4499    (match_operand:<VEL> 2 "register_operand" "w")]
4500   "TARGET_SIMD"
4501 {
4502   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4503   emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4504                                                     operands[2], p));
4505   DONE;
4506 })
4507
4508 ;; vshl
4509
4510 (define_insn "aarch64_<sur>shl<mode>"
4511   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4512         (unspec:VSDQ_I_DI
4513           [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4514            (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4515          VSHL))]
4516   "TARGET_SIMD"
4517   "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4518   [(set_attr "type" "neon_shift_reg<q>")]
4519 )
4520
4521
4522 ;; vqshl
4523
4524 (define_insn "aarch64_<sur>q<r>shl<mode>"
4525   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4526         (unspec:VSDQ_I
4527           [(match_operand:VSDQ_I 1 "register_operand" "w")
4528            (match_operand:VSDQ_I 2 "register_operand" "w")]
4529          VQSHL))]
4530   "TARGET_SIMD"
4531   "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4532   [(set_attr "type" "neon_sat_shift_reg<q>")]
4533 )
4534
4535 ;; vshll_n
4536
4537 (define_insn "aarch64_<sur>shll_n<mode>"
4538   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4539         (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4540                          (match_operand:SI 2
4541                            "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4542                          VSHLL))]
4543   "TARGET_SIMD"
4544   {
4545     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4546       return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4547     else
4548       return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4549   }
4550   [(set_attr "type" "neon_shift_imm_long")]
4551 )
4552
4553 ;; vshll_high_n
4554
4555 (define_insn "aarch64_<sur>shll2_n<mode>"
4556   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4557         (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4558                          (match_operand:SI 2 "immediate_operand" "i")]
4559                          VSHLL))]
4560   "TARGET_SIMD"
4561   {
4562     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4563       return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4564     else
4565       return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4566   }
4567   [(set_attr "type" "neon_shift_imm_long")]
4568 )
4569
4570 ;; vrshr_n
4571
4572 (define_insn "aarch64_<sur>shr_n<mode>"
4573   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4574         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4575                            (match_operand:SI 2
4576                              "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4577                           VRSHR_N))]
4578   "TARGET_SIMD"
4579   "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4580   [(set_attr "type" "neon_sat_shift_imm<q>")]
4581 )
4582
4583 ;; v(r)sra_n
4584
4585 (define_insn "aarch64_<sur>sra_n<mode>"
4586   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4587         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4588                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4589                        (match_operand:SI 3
4590                          "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4591                       VSRA))]
4592   "TARGET_SIMD"
4593   "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4594   [(set_attr "type" "neon_shift_acc<q>")]
4595 )
4596
4597 ;; vs<lr>i_n
4598
4599 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4600   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4601         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4602                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4603                        (match_operand:SI 3
4604                          "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4605                       VSLRI))]
4606   "TARGET_SIMD"
4607   "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4608   [(set_attr "type" "neon_shift_imm<q>")]
4609 )
4610
4611 ;; vqshl(u)
4612
4613 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4614   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4615         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4616                        (match_operand:SI 2
4617                          "aarch64_simd_shift_imm_<ve_mode>" "i")]
4618                       VQSHL_N))]
4619   "TARGET_SIMD"
4620   "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4621   [(set_attr "type" "neon_sat_shift_imm<q>")]
4622 )
4623
4624
4625 ;; vq(r)shr(u)n_n
4626
4627 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4628   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4629         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4630                             (match_operand:SI 2
4631                               "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4632                            VQSHRN_N))]
4633   "TARGET_SIMD"
4634   "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4635   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4636 )
4637
4638
4639 ;; cm(eq|ge|gt|lt|le)
4640 ;; Note, we have constraints for Dz and Z as different expanders
4641 ;; have different ideas of what should be passed to this pattern.
4642
4643 (define_insn "aarch64_cm<optab><mode>"
4644   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4645         (neg:<V_INT_EQUIV>
4646           (COMPARISONS:<V_INT_EQUIV>
4647             (match_operand:VDQ_I 1 "register_operand" "w,w")
4648             (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4649           )))]
4650   "TARGET_SIMD"
4651   "@
4652   cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4653   cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4654   [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4655 )
4656
4657 (define_insn_and_split "aarch64_cm<optab>di"
4658   [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4659         (neg:DI
4660           (COMPARISONS:DI
4661             (match_operand:DI 1 "register_operand" "w,w,r")
4662             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4663           )))
4664      (clobber (reg:CC CC_REGNUM))]
4665   "TARGET_SIMD"
4666   "#"
4667   "&& reload_completed"
4668   [(set (match_operand:DI 0 "register_operand")
4669         (neg:DI
4670           (COMPARISONS:DI
4671             (match_operand:DI 1 "register_operand")
4672             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4673           )))]
4674   {
4675     /* If we are in the general purpose register file,
4676        we split to a sequence of comparison and store.  */
4677     if (GP_REGNUM_P (REGNO (operands[0]))
4678         && GP_REGNUM_P (REGNO (operands[1])))
4679       {
4680         machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4681         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4682         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4683         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4684         DONE;
4685       }
4686     /* Otherwise, we expand to a similar pattern which does not
4687        clobber CC_REGNUM.  */
4688   }
4689   [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4690 )
4691
4692 (define_insn "*aarch64_cm<optab>di"
4693   [(set (match_operand:DI 0 "register_operand" "=w,w")
4694         (neg:DI
4695           (COMPARISONS:DI
4696             (match_operand:DI 1 "register_operand" "w,w")
4697             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4698           )))]
4699   "TARGET_SIMD && reload_completed"
4700   "@
4701   cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4702   cm<optab>\t%d0, %d1, #0"
4703   [(set_attr "type" "neon_compare, neon_compare_zero")]
4704 )
4705
4706 ;; cm(hs|hi)
4707
4708 (define_insn "aarch64_cm<optab><mode>"
4709   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4710         (neg:<V_INT_EQUIV>
4711           (UCOMPARISONS:<V_INT_EQUIV>
4712             (match_operand:VDQ_I 1 "register_operand" "w")
4713             (match_operand:VDQ_I 2 "register_operand" "w")
4714           )))]
4715   "TARGET_SIMD"
4716   "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4717   [(set_attr "type" "neon_compare<q>")]
4718 )
4719
4720 (define_insn_and_split "aarch64_cm<optab>di"
4721   [(set (match_operand:DI 0 "register_operand" "=w,r")
4722         (neg:DI
4723           (UCOMPARISONS:DI
4724             (match_operand:DI 1 "register_operand" "w,r")
4725             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4726           )))
4727     (clobber (reg:CC CC_REGNUM))]
4728   "TARGET_SIMD"
4729   "#"
4730   "&& reload_completed"
4731   [(set (match_operand:DI 0 "register_operand")
4732         (neg:DI
4733           (UCOMPARISONS:DI
4734             (match_operand:DI 1 "register_operand")
4735             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4736           )))]
4737   {
4738     /* If we are in the general purpose register file,
4739        we split to a sequence of comparison and store.  */
4740     if (GP_REGNUM_P (REGNO (operands[0]))
4741         && GP_REGNUM_P (REGNO (operands[1])))
4742       {
4743         machine_mode mode = CCmode;
4744         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4745         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4746         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4747         DONE;
4748       }
4749     /* Otherwise, we expand to a similar pattern which does not
4750        clobber CC_REGNUM.  */
4751   }
4752   [(set_attr "type" "neon_compare,multiple")]
4753 )
4754
4755 (define_insn "*aarch64_cm<optab>di"
4756   [(set (match_operand:DI 0 "register_operand" "=w")
4757         (neg:DI
4758           (UCOMPARISONS:DI
4759             (match_operand:DI 1 "register_operand" "w")
4760             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4761           )))]
4762   "TARGET_SIMD && reload_completed"
4763   "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4764   [(set_attr "type" "neon_compare")]
4765 )
4766
4767 ;; cmtst
4768
4769 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4770 ;; we don't have any insns using ne, and aarch64_vcond outputs
4771 ;; not (neg (eq (and x y) 0))
4772 ;; which is rewritten by simplify_rtx as
4773 ;; plus (eq (and x y) 0) -1.
4774
4775 (define_insn "aarch64_cmtst<mode>"
4776   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4777         (plus:<V_INT_EQUIV>
4778           (eq:<V_INT_EQUIV>
4779             (and:VDQ_I
4780               (match_operand:VDQ_I 1 "register_operand" "w")
4781               (match_operand:VDQ_I 2 "register_operand" "w"))
4782             (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4783           (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4784   ]
4785   "TARGET_SIMD"
4786   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4787   [(set_attr "type" "neon_tst<q>")]
4788 )
4789
4790 (define_insn_and_split "aarch64_cmtstdi"
4791   [(set (match_operand:DI 0 "register_operand" "=w,r")
4792         (neg:DI
4793           (ne:DI
4794             (and:DI
4795               (match_operand:DI 1 "register_operand" "w,r")
4796               (match_operand:DI 2 "register_operand" "w,r"))
4797             (const_int 0))))
4798     (clobber (reg:CC CC_REGNUM))]
4799   "TARGET_SIMD"
4800   "#"
4801   "&& reload_completed"
4802   [(set (match_operand:DI 0 "register_operand")
4803         (neg:DI
4804           (ne:DI
4805             (and:DI
4806               (match_operand:DI 1 "register_operand")
4807               (match_operand:DI 2 "register_operand"))
4808             (const_int 0))))]
4809   {
4810     /* If we are in the general purpose register file,
4811        we split to a sequence of comparison and store.  */
4812     if (GP_REGNUM_P (REGNO (operands[0]))
4813         && GP_REGNUM_P (REGNO (operands[1])))
4814       {
4815         rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4816         machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4817         rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4818         rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4819         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4820         DONE;
4821       }
4822     /* Otherwise, we expand to a similar pattern which does not
4823        clobber CC_REGNUM.  */
4824   }
4825   [(set_attr "type" "neon_tst,multiple")]
4826 )
4827
4828 (define_insn "*aarch64_cmtstdi"
4829   [(set (match_operand:DI 0 "register_operand" "=w")
4830         (neg:DI
4831           (ne:DI
4832             (and:DI
4833               (match_operand:DI 1 "register_operand" "w")
4834               (match_operand:DI 2 "register_operand" "w"))
4835             (const_int 0))))]
4836   "TARGET_SIMD"
4837   "cmtst\t%d0, %d1, %d2"
4838   [(set_attr "type" "neon_tst")]
4839 )
4840
4841 ;; fcm(eq|ge|gt|le|lt)
4842
4843 (define_insn "aarch64_cm<optab><mode>"
4844   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4845         (neg:<V_INT_EQUIV>
4846           (COMPARISONS:<V_INT_EQUIV>
4847             (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4848             (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4849           )))]
4850   "TARGET_SIMD"
4851   "@
4852   fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4853   fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4854   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4855 )
4856
4857 ;; fac(ge|gt)
4858 ;; Note we can also handle what would be fac(le|lt) by
4859 ;; generating fac(ge|gt).
4860
4861 (define_insn "aarch64_fac<optab><mode>"
4862   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4863         (neg:<V_INT_EQUIV>
4864           (FAC_COMPARISONS:<V_INT_EQUIV>
4865             (abs:VHSDF_HSDF
4866               (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4867             (abs:VHSDF_HSDF
4868               (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4869   )))]
4870   "TARGET_SIMD"
4871   "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4872   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4873 )
4874
4875 ;; addp
4876
4877 (define_insn "aarch64_addp<mode>"
4878   [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4879         (unspec:VD_BHSI
4880           [(match_operand:VD_BHSI 1 "register_operand" "w")
4881            (match_operand:VD_BHSI 2 "register_operand" "w")]
4882           UNSPEC_ADDP))]
4883   "TARGET_SIMD"
4884   "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4885   [(set_attr "type" "neon_reduc_add<q>")]
4886 )
4887
4888 (define_insn "aarch64_addpdi"
4889   [(set (match_operand:DI 0 "register_operand" "=w")
4890         (unspec:DI
4891           [(match_operand:V2DI 1 "register_operand" "w")]
4892           UNSPEC_ADDP))]
4893   "TARGET_SIMD"
4894   "addp\t%d0, %1.2d"
4895   [(set_attr "type" "neon_reduc_add")]
4896 )
4897
4898 ;; sqrt
4899
4900 (define_expand "sqrt<mode>2"
4901   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4902         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4903   "TARGET_SIMD"
4904 {
4905   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4906     DONE;
4907 })
4908
4909 (define_insn "*sqrt<mode>2"
4910   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4911         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4912   "TARGET_SIMD"
4913   "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4914   [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4915 )
4916
4917 ;; Patterns for vector struct loads and stores.
4918
4919 (define_insn "aarch64_simd_ld2<mode>"
4920   [(set (match_operand:OI 0 "register_operand" "=w")
4921         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4922                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4923                    UNSPEC_LD2))]
4924   "TARGET_SIMD"
4925   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4926   [(set_attr "type" "neon_load2_2reg<q>")]
4927 )
4928
4929 (define_insn "aarch64_simd_ld2r<mode>"
4930   [(set (match_operand:OI 0 "register_operand" "=w")
4931        (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4932                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4933                   UNSPEC_LD2_DUP))]
4934   "TARGET_SIMD"
4935   "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4936   [(set_attr "type" "neon_load2_all_lanes<q>")]
4937 )
4938
4939 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4940   [(set (match_operand:OI 0 "register_operand" "=w")
4941         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4942                     (match_operand:OI 2 "register_operand" "0")
4943                     (match_operand:SI 3 "immediate_operand" "i")
4944                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4945                    UNSPEC_LD2_LANE))]
4946   "TARGET_SIMD"
4947   {
4948     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4949     return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4950   }
4951   [(set_attr "type" "neon_load2_one_lane")]
4952 )
4953
4954 (define_expand "vec_load_lanesoi<mode>"
4955   [(set (match_operand:OI 0 "register_operand" "=w")
4956         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4957                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4958                    UNSPEC_LD2))]
4959   "TARGET_SIMD"
4960 {
4961   if (BYTES_BIG_ENDIAN)
4962     {
4963       rtx tmp = gen_reg_rtx (OImode);
4964       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4965       emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4966       emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4967     }
4968   else
4969     emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4970   DONE;
4971 })
4972
4973 (define_insn "aarch64_simd_st2<mode>"
4974   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4975         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4976                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4977                    UNSPEC_ST2))]
4978   "TARGET_SIMD"
4979   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4980   [(set_attr "type" "neon_store2_2reg<q>")]
4981 )
4982
4983 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4984 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4985   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4986         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4987                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4988                     (match_operand:SI 2 "immediate_operand" "i")]
4989                    UNSPEC_ST2_LANE))]
4990   "TARGET_SIMD"
4991   {
4992     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4993     return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4994   }
4995   [(set_attr "type" "neon_store2_one_lane<q>")]
4996 )
4997
4998 (define_expand "vec_store_lanesoi<mode>"
4999   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5000         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
5001                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5002                    UNSPEC_ST2))]
5003   "TARGET_SIMD"
5004 {
5005   if (BYTES_BIG_ENDIAN)
5006     {
5007       rtx tmp = gen_reg_rtx (OImode);
5008       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5009       emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
5010       emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
5011     }
5012   else
5013     emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
5014   DONE;
5015 })
5016
5017 (define_insn "aarch64_simd_ld3<mode>"
5018   [(set (match_operand:CI 0 "register_operand" "=w")
5019         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5020                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5021                    UNSPEC_LD3))]
5022   "TARGET_SIMD"
5023   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5024   [(set_attr "type" "neon_load3_3reg<q>")]
5025 )
5026
5027 (define_insn "aarch64_simd_ld3r<mode>"
5028   [(set (match_operand:CI 0 "register_operand" "=w")
5029        (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5030                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5031                   UNSPEC_LD3_DUP))]
5032   "TARGET_SIMD"
5033   "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5034   [(set_attr "type" "neon_load3_all_lanes<q>")]
5035 )
5036
5037 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
5038   [(set (match_operand:CI 0 "register_operand" "=w")
5039         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5040                     (match_operand:CI 2 "register_operand" "0")
5041                     (match_operand:SI 3 "immediate_operand" "i")
5042                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5043                    UNSPEC_LD3_LANE))]
5044   "TARGET_SIMD"
5045 {
5046     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5047     return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
5048 }
5049   [(set_attr "type" "neon_load3_one_lane")]
5050 )
5051
5052 (define_expand "vec_load_lanesci<mode>"
5053   [(set (match_operand:CI 0 "register_operand" "=w")
5054         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5055                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5056                    UNSPEC_LD3))]
5057   "TARGET_SIMD"
5058 {
5059   if (BYTES_BIG_ENDIAN)
5060     {
5061       rtx tmp = gen_reg_rtx (CImode);
5062       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5063       emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
5064       emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
5065     }
5066   else
5067     emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
5068   DONE;
5069 })
5070
5071 (define_insn "aarch64_simd_st3<mode>"
5072   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5073         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
5074                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5075                    UNSPEC_ST3))]
5076   "TARGET_SIMD"
5077   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5078   [(set_attr "type" "neon_store3_3reg<q>")]
5079 )
5080
5081 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5082 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
5083   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5084         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5085                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5086                      (match_operand:SI 2 "immediate_operand" "i")]
5087                     UNSPEC_ST3_LANE))]
5088   "TARGET_SIMD"
5089   {
5090     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5091     return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
5092   }
5093   [(set_attr "type" "neon_store3_one_lane<q>")]
5094 )
5095
5096 (define_expand "vec_store_lanesci<mode>"
5097   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5098         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
5099                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5100                    UNSPEC_ST3))]
5101   "TARGET_SIMD"
5102 {
5103   if (BYTES_BIG_ENDIAN)
5104     {
5105       rtx tmp = gen_reg_rtx (CImode);
5106       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5107       emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
5108       emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
5109     }
5110   else
5111     emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
5112   DONE;
5113 })
5114
5115 (define_insn "aarch64_simd_ld4<mode>"
5116   [(set (match_operand:XI 0 "register_operand" "=w")
5117         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5118                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5119                    UNSPEC_LD4))]
5120   "TARGET_SIMD"
5121   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5122   [(set_attr "type" "neon_load4_4reg<q>")]
5123 )
5124
5125 (define_insn "aarch64_simd_ld4r<mode>"
5126   [(set (match_operand:XI 0 "register_operand" "=w")
5127        (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5128                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5129                   UNSPEC_LD4_DUP))]
5130   "TARGET_SIMD"
5131   "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5132   [(set_attr "type" "neon_load4_all_lanes<q>")]
5133 )
5134
5135 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
5136   [(set (match_operand:XI 0 "register_operand" "=w")
5137         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5138                     (match_operand:XI 2 "register_operand" "0")
5139                     (match_operand:SI 3 "immediate_operand" "i")
5140                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5141                    UNSPEC_LD4_LANE))]
5142   "TARGET_SIMD"
5143 {
5144     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5145     return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
5146 }
5147   [(set_attr "type" "neon_load4_one_lane")]
5148 )
5149
5150 (define_expand "vec_load_lanesxi<mode>"
5151   [(set (match_operand:XI 0 "register_operand" "=w")
5152         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5153                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5154                    UNSPEC_LD4))]
5155   "TARGET_SIMD"
5156 {
5157   if (BYTES_BIG_ENDIAN)
5158     {
5159       rtx tmp = gen_reg_rtx (XImode);
5160       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5161       emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
5162       emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
5163     }
5164   else
5165     emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
5166   DONE;
5167 })
5168
5169 (define_insn "aarch64_simd_st4<mode>"
5170   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5171         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5172                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5173                    UNSPEC_ST4))]
5174   "TARGET_SIMD"
5175   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5176   [(set_attr "type" "neon_store4_4reg<q>")]
5177 )
5178
5179 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5180 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
5181   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5182         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5183                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5184                      (match_operand:SI 2 "immediate_operand" "i")]
5185                     UNSPEC_ST4_LANE))]
5186   "TARGET_SIMD"
5187   {
5188     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5189     return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5190   }
5191   [(set_attr "type" "neon_store4_one_lane<q>")]
5192 )
5193
5194 (define_expand "vec_store_lanesxi<mode>"
5195   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5196         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5197                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5198                    UNSPEC_ST4))]
5199   "TARGET_SIMD"
5200 {
5201   if (BYTES_BIG_ENDIAN)
5202     {
5203       rtx tmp = gen_reg_rtx (XImode);
5204       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5205       emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5206       emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5207     }
5208   else
5209     emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5210   DONE;
5211 })
5212
5213 (define_insn_and_split "aarch64_rev_reglist<mode>"
5214 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5215         (unspec:VSTRUCT
5216                    [(match_operand:VSTRUCT 1 "register_operand" "w")
5217                     (match_operand:V16QI 2 "register_operand" "w")]
5218                    UNSPEC_REV_REGLIST))]
5219   "TARGET_SIMD"
5220   "#"
5221   "&& reload_completed"
5222   [(const_int 0)]
5223 {
5224   int i;
5225   int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5226   for (i = 0; i < nregs; i++)
5227     {
5228       rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5229       rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5230       emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5231     }
5232   DONE;
5233 }
5234   [(set_attr "type" "neon_tbl1_q")
5235    (set_attr "length" "<insn_count>")]
5236 )
5237
5238 ;; Reload patterns for AdvSIMD register list operands.
5239
5240 (define_expand "mov<mode>"
5241   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
5242         (match_operand:VSTRUCT 1 "general_operand" ""))]
5243   "TARGET_SIMD"
5244 {
5245   if (can_create_pseudo_p ())
5246     {
5247       if (GET_CODE (operands[0]) != REG)
5248         operands[1] = force_reg (<MODE>mode, operands[1]);
5249     }
5250 })
5251
5252
5253 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5254   [(match_operand:CI 0 "register_operand" "=w")
5255    (match_operand:DI 1 "register_operand" "r")
5256    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5257   "TARGET_SIMD"
5258 {
5259   rtx mem = gen_rtx_MEM (CImode, operands[1]);
5260   emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5261   DONE;
5262 })
5263
5264 (define_insn "aarch64_ld1_x3_<mode>"
5265   [(set (match_operand:CI 0 "register_operand" "=w")
5266         (unspec:CI
5267           [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5268            (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5269   "TARGET_SIMD"
5270   "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5271   [(set_attr "type" "neon_load1_3reg<q>")]
5272 )
5273
5274 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5275   [(match_operand:DI 0 "register_operand" "")
5276    (match_operand:OI 1 "register_operand" "")
5277    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5278   "TARGET_SIMD"
5279 {
5280   rtx mem = gen_rtx_MEM (OImode, operands[0]);
5281   emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5282   DONE;
5283 })
5284
5285 (define_insn "aarch64_st1_x2_<mode>"
5286    [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5287          (unspec:OI
5288           [(match_operand:OI 1 "register_operand" "w")
5289           (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5290   "TARGET_SIMD"
5291   "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5292   [(set_attr "type" "neon_store1_2reg<q>")]
5293 )
5294
5295 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5296   [(match_operand:DI 0 "register_operand" "")
5297    (match_operand:CI 1 "register_operand" "")
5298    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5299   "TARGET_SIMD"
5300 {
5301   rtx mem = gen_rtx_MEM (CImode, operands[0]);
5302   emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5303   DONE;
5304 })
5305
5306 (define_insn "aarch64_st1_x3_<mode>"
5307    [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5308         (unspec:CI
5309          [(match_operand:CI 1 "register_operand" "w")
5310           (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5311   "TARGET_SIMD"
5312   "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5313   [(set_attr "type" "neon_store1_3reg<q>")]
5314 )
5315
5316 (define_insn "*aarch64_mov<mode>"
5317   [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5318         (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5319   "TARGET_SIMD && !BYTES_BIG_ENDIAN
5320    && (register_operand (operands[0], <MODE>mode)
5321        || register_operand (operands[1], <MODE>mode))"
5322   "@
5323    #
5324    st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5325    ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5326   [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5327                      neon_load<nregs>_<nregs>reg_q")
5328    (set_attr "length" "<insn_count>,4,4")]
5329 )
5330
5331 (define_insn "aarch64_be_ld1<mode>"
5332   [(set (match_operand:VALLDI_F16 0     "register_operand" "=w")
5333         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5334                              "aarch64_simd_struct_operand" "Utv")]
5335         UNSPEC_LD1))]
5336   "TARGET_SIMD"
5337   "ld1\\t{%0<Vmtype>}, %1"
5338   [(set_attr "type" "neon_load1_1reg<q>")]
5339 )
5340
5341 (define_insn "aarch64_be_st1<mode>"
5342   [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5343         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5344         UNSPEC_ST1))]
5345   "TARGET_SIMD"
5346   "st1\\t{%1<Vmtype>}, %0"
5347   [(set_attr "type" "neon_store1_1reg<q>")]
5348 )
5349
5350 (define_insn "*aarch64_be_movoi"
5351   [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5352         (match_operand:OI 1 "general_operand"      " w,w,m"))]
5353   "TARGET_SIMD && BYTES_BIG_ENDIAN
5354    && (register_operand (operands[0], OImode)
5355        || register_operand (operands[1], OImode))"
5356   "@
5357    #
5358    stp\\t%q1, %R1, %0
5359    ldp\\t%q0, %R0, %1"
5360   [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5361    (set_attr "length" "8,4,4")]
5362 )
5363
5364 (define_insn "*aarch64_be_movci"
5365   [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5366         (match_operand:CI 1 "general_operand"      " w,w,o"))]
5367   "TARGET_SIMD && BYTES_BIG_ENDIAN
5368    && (register_operand (operands[0], CImode)
5369        || register_operand (operands[1], CImode))"
5370   "#"
5371   [(set_attr "type" "multiple")
5372    (set_attr "length" "12,4,4")]
5373 )
5374
5375 (define_insn "*aarch64_be_movxi"
5376   [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5377         (match_operand:XI 1 "general_operand"      " w,w,o"))]
5378   "TARGET_SIMD && BYTES_BIG_ENDIAN
5379    && (register_operand (operands[0], XImode)
5380        || register_operand (operands[1], XImode))"
5381   "#"
5382   [(set_attr "type" "multiple")
5383    (set_attr "length" "16,4,4")]
5384 )
5385
5386 (define_split
5387   [(set (match_operand:OI 0 "register_operand")
5388         (match_operand:OI 1 "register_operand"))]
5389   "TARGET_SIMD && reload_completed"
5390   [(const_int 0)]
5391 {
5392   aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5393   DONE;
5394 })
5395
5396 (define_split
5397   [(set (match_operand:CI 0 "nonimmediate_operand")
5398         (match_operand:CI 1 "general_operand"))]
5399   "TARGET_SIMD && reload_completed"
5400   [(const_int 0)]
5401 {
5402   if (register_operand (operands[0], CImode)
5403       && register_operand (operands[1], CImode))
5404     {
5405       aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5406       DONE;
5407     }
5408   else if (BYTES_BIG_ENDIAN)
5409     {
5410       emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5411                       simplify_gen_subreg (OImode, operands[1], CImode, 0));
5412       emit_move_insn (gen_lowpart (V16QImode,
5413                                    simplify_gen_subreg (TImode, operands[0],
5414                                                         CImode, 32)),
5415                       gen_lowpart (V16QImode,
5416                                    simplify_gen_subreg (TImode, operands[1],
5417                                                         CImode, 32)));
5418       DONE;
5419     }
5420   else
5421     FAIL;
5422 })
5423
5424 (define_split
5425   [(set (match_operand:XI 0 "nonimmediate_operand")
5426         (match_operand:XI 1 "general_operand"))]
5427   "TARGET_SIMD && reload_completed"
5428   [(const_int 0)]
5429 {
5430   if (register_operand (operands[0], XImode)
5431       && register_operand (operands[1], XImode))
5432     {
5433       aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5434       DONE;
5435     }
5436   else if (BYTES_BIG_ENDIAN)
5437     {
5438       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5439                       simplify_gen_subreg (OImode, operands[1], XImode, 0));
5440       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5441                       simplify_gen_subreg (OImode, operands[1], XImode, 32));
5442       DONE;
5443     }
5444   else
5445     FAIL;
5446 })
5447
5448 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5449   [(match_operand:VSTRUCT 0 "register_operand" "=w")
5450    (match_operand:DI 1 "register_operand" "w")
5451    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5452   "TARGET_SIMD"
5453 {
5454   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5455   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5456                      * <VSTRUCT:nregs>);
5457
5458   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5459                                                                 mem));
5460   DONE;
5461 })
5462
5463 (define_insn "aarch64_ld2<mode>_dreg"
5464   [(set (match_operand:OI 0 "register_operand" "=w")
5465         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5466                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5467                    UNSPEC_LD2_DREG))]
5468   "TARGET_SIMD"
5469   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5470   [(set_attr "type" "neon_load2_2reg<q>")]
5471 )
5472
5473 (define_insn "aarch64_ld2<mode>_dreg"
5474   [(set (match_operand:OI 0 "register_operand" "=w")
5475         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5476                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5477                    UNSPEC_LD2_DREG))]
5478   "TARGET_SIMD"
5479   "ld1\\t{%S0.1d - %T0.1d}, %1"
5480   [(set_attr "type" "neon_load1_2reg<q>")]
5481 )
5482
5483 (define_insn "aarch64_ld3<mode>_dreg"
5484   [(set (match_operand:CI 0 "register_operand" "=w")
5485         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5486                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5487                    UNSPEC_LD3_DREG))]
5488   "TARGET_SIMD"
5489   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5490   [(set_attr "type" "neon_load3_3reg<q>")]
5491 )
5492
5493 (define_insn "aarch64_ld3<mode>_dreg"
5494   [(set (match_operand:CI 0 "register_operand" "=w")
5495         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5496                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5497                    UNSPEC_LD3_DREG))]
5498   "TARGET_SIMD"
5499   "ld1\\t{%S0.1d - %U0.1d}, %1"
5500   [(set_attr "type" "neon_load1_3reg<q>")]
5501 )
5502
5503 (define_insn "aarch64_ld4<mode>_dreg"
5504   [(set (match_operand:XI 0 "register_operand" "=w")
5505         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5506                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5507                    UNSPEC_LD4_DREG))]
5508   "TARGET_SIMD"
5509   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5510   [(set_attr "type" "neon_load4_4reg<q>")]
5511 )
5512
5513 (define_insn "aarch64_ld4<mode>_dreg"
5514   [(set (match_operand:XI 0 "register_operand" "=w")
5515         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5516                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5517                    UNSPEC_LD4_DREG))]
5518   "TARGET_SIMD"
5519   "ld1\\t{%S0.1d - %V0.1d}, %1"
5520   [(set_attr "type" "neon_load1_4reg<q>")]
5521 )
5522
5523 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5524  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5525   (match_operand:DI 1 "register_operand" "r")
5526   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5527   "TARGET_SIMD"
5528 {
5529   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5530   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5531
5532   emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5533   DONE;
5534 })
5535
5536 (define_expand "aarch64_ld1<VALL_F16:mode>"
5537  [(match_operand:VALL_F16 0 "register_operand")
5538   (match_operand:DI 1 "register_operand")]
5539   "TARGET_SIMD"
5540 {
5541   machine_mode mode = <VALL_F16:MODE>mode;
5542   rtx mem = gen_rtx_MEM (mode, operands[1]);
5543
5544   if (BYTES_BIG_ENDIAN)
5545     emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5546   else
5547     emit_move_insn (operands[0], mem);
5548   DONE;
5549 })
5550
5551 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5552  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5553   (match_operand:DI 1 "register_operand" "r")
5554   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5555   "TARGET_SIMD"
5556 {
5557   machine_mode mode = <VSTRUCT:MODE>mode;
5558   rtx mem = gen_rtx_MEM (mode, operands[1]);
5559
5560   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5561   DONE;
5562 })
5563
5564 (define_expand "aarch64_ld1x2<VQ:mode>"
5565  [(match_operand:OI 0 "register_operand" "=w")
5566   (match_operand:DI 1 "register_operand" "r")
5567   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5568   "TARGET_SIMD"
5569 {
5570   machine_mode mode = OImode;
5571   rtx mem = gen_rtx_MEM (mode, operands[1]);
5572
5573   emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5574   DONE;
5575 })
5576
5577 (define_expand "aarch64_ld1x2<VDC:mode>"
5578  [(match_operand:OI 0 "register_operand" "=w")
5579   (match_operand:DI 1 "register_operand" "r")
5580   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5581   "TARGET_SIMD"
5582 {
5583   machine_mode mode = OImode;
5584   rtx mem = gen_rtx_MEM (mode, operands[1]);
5585
5586   emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5587   DONE;
5588 })
5589
5590
5591 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5592   [(match_operand:VSTRUCT 0 "register_operand" "=w")
5593         (match_operand:DI 1 "register_operand" "w")
5594         (match_operand:VSTRUCT 2 "register_operand" "0")
5595         (match_operand:SI 3 "immediate_operand" "i")
5596         (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5597   "TARGET_SIMD"
5598 {
5599   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5600   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5601                      * <VSTRUCT:nregs>);
5602
5603   aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5604   emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5605         operands[0], mem, operands[2], operands[3]));
5606   DONE;
5607 })
5608
5609 ;; Expanders for builtins to extract vector registers from large
5610 ;; opaque integer modes.
5611
5612 ;; D-register list.
5613
5614 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5615  [(match_operand:VDC 0 "register_operand" "=w")
5616   (match_operand:VSTRUCT 1 "register_operand" "w")
5617   (match_operand:SI 2 "immediate_operand" "i")]
5618   "TARGET_SIMD"
5619 {
5620   int part = INTVAL (operands[2]);
5621   rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5622   int offset = part * 16;
5623
5624   emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5625   emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5626   DONE;
5627 })
5628
5629 ;; Q-register list.
5630
5631 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5632  [(match_operand:VQ 0 "register_operand" "=w")
5633   (match_operand:VSTRUCT 1 "register_operand" "w")
5634   (match_operand:SI 2 "immediate_operand" "i")]
5635   "TARGET_SIMD"
5636 {
5637   int part = INTVAL (operands[2]);
5638   int offset = part * 16;
5639
5640   emit_move_insn (operands[0],
5641                   gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5642   DONE;
5643 })
5644
5645 ;; Permuted-store expanders for neon intrinsics.
5646
5647 ;; Permute instructions
5648
5649 ;; vec_perm support
5650
5651 (define_expand "vec_perm<mode>"
5652   [(match_operand:VB 0 "register_operand")
5653    (match_operand:VB 1 "register_operand")
5654    (match_operand:VB 2 "register_operand")
5655    (match_operand:VB 3 "register_operand")]
5656   "TARGET_SIMD"
5657 {
5658   aarch64_expand_vec_perm (operands[0], operands[1],
5659                            operands[2], operands[3], <nunits>);
5660   DONE;
5661 })
5662
5663 (define_insn "aarch64_tbl1<mode>"
5664   [(set (match_operand:VB 0 "register_operand" "=w")
5665         (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5666                     (match_operand:VB 2 "register_operand" "w")]
5667                    UNSPEC_TBL))]
5668   "TARGET_SIMD"
5669   "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5670   [(set_attr "type" "neon_tbl1<q>")]
5671 )
5672
5673 ;; Two source registers.
5674
5675 (define_insn "aarch64_tbl2v16qi"
5676   [(set (match_operand:V16QI 0 "register_operand" "=w")
5677         (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5678                        (match_operand:V16QI 2 "register_operand" "w")]
5679                       UNSPEC_TBL))]
5680   "TARGET_SIMD"
5681   "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5682   [(set_attr "type" "neon_tbl2_q")]
5683 )
5684
5685 (define_insn "aarch64_tbl3<mode>"
5686   [(set (match_operand:VB 0 "register_operand" "=w")
5687         (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5688                       (match_operand:VB 2 "register_operand" "w")]
5689                       UNSPEC_TBL))]
5690   "TARGET_SIMD"
5691   "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5692   [(set_attr "type" "neon_tbl3")]
5693 )
5694
5695 (define_insn "aarch64_tbx4<mode>"
5696   [(set (match_operand:VB 0 "register_operand" "=w")
5697         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5698                       (match_operand:OI 2 "register_operand" "w")
5699                       (match_operand:VB 3 "register_operand" "w")]
5700                       UNSPEC_TBX))]
5701   "TARGET_SIMD"
5702   "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5703   [(set_attr "type" "neon_tbl4")]
5704 )
5705
5706 ;; Three source registers.
5707
5708 (define_insn "aarch64_qtbl3<mode>"
5709   [(set (match_operand:VB 0 "register_operand" "=w")
5710         (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5711                       (match_operand:VB 2 "register_operand" "w")]
5712                       UNSPEC_TBL))]
5713   "TARGET_SIMD"
5714   "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5715   [(set_attr "type" "neon_tbl3")]
5716 )
5717
5718 (define_insn "aarch64_qtbx3<mode>"
5719   [(set (match_operand:VB 0 "register_operand" "=w")
5720         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5721                       (match_operand:CI 2 "register_operand" "w")
5722                       (match_operand:VB 3 "register_operand" "w")]
5723                       UNSPEC_TBX))]
5724   "TARGET_SIMD"
5725   "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5726   [(set_attr "type" "neon_tbl3")]
5727 )
5728
5729 ;; Four source registers.
5730
5731 (define_insn "aarch64_qtbl4<mode>"
5732   [(set (match_operand:VB 0 "register_operand" "=w")
5733         (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5734                       (match_operand:VB 2 "register_operand" "w")]
5735                       UNSPEC_TBL))]
5736   "TARGET_SIMD"
5737   "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5738   [(set_attr "type" "neon_tbl4")]
5739 )
5740
5741 (define_insn "aarch64_qtbx4<mode>"
5742   [(set (match_operand:VB 0 "register_operand" "=w")
5743         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5744                       (match_operand:XI 2 "register_operand" "w")
5745                       (match_operand:VB 3 "register_operand" "w")]
5746                       UNSPEC_TBX))]
5747   "TARGET_SIMD"
5748   "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5749   [(set_attr "type" "neon_tbl4")]
5750 )
5751
5752 (define_insn_and_split "aarch64_combinev16qi"
5753   [(set (match_operand:OI 0 "register_operand" "=w")
5754         (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5755                     (match_operand:V16QI 2 "register_operand" "w")]
5756                    UNSPEC_CONCAT))]
5757   "TARGET_SIMD"
5758   "#"
5759   "&& reload_completed"
5760   [(const_int 0)]
5761 {
5762   aarch64_split_combinev16qi (operands);
5763   DONE;
5764 }
5765 [(set_attr "type" "multiple")]
5766 )
5767
5768 ;; This instruction's pattern is generated directly by
5769 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5770 ;; need corresponding changes there.
5771 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5772   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5773         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5774                           (match_operand:VALL_F16 2 "register_operand" "w")]
5775          PERMUTE))]
5776   "TARGET_SIMD"
5777   "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5778   [(set_attr "type" "neon_permute<q>")]
5779 )
5780
5781 ;; This instruction's pattern is generated directly by
5782 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5783 ;; need corresponding changes there.  Note that the immediate (third)
5784 ;; operand is a lane index not a byte index.
5785 (define_insn "aarch64_ext<mode>"
5786   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5787         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5788                           (match_operand:VALL_F16 2 "register_operand" "w")
5789                           (match_operand:SI 3 "immediate_operand" "i")]
5790          UNSPEC_EXT))]
5791   "TARGET_SIMD"
5792 {
5793   operands[3] = GEN_INT (INTVAL (operands[3])
5794       * GET_MODE_UNIT_SIZE (<MODE>mode));
5795   return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5796 }
5797   [(set_attr "type" "neon_ext<q>")]
5798 )
5799
5800 ;; This instruction's pattern is generated directly by
5801 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5802 ;; need corresponding changes there.
5803 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5804   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5805         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5806                     REVERSE))]
5807   "TARGET_SIMD"
5808   "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5809   [(set_attr "type" "neon_rev<q>")]
5810 )
5811
5812 (define_insn "aarch64_st2<mode>_dreg"
5813   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5814         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5815                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5816                    UNSPEC_ST2))]
5817   "TARGET_SIMD"
5818   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5819   [(set_attr "type" "neon_store2_2reg")]
5820 )
5821
5822 (define_insn "aarch64_st2<mode>_dreg"
5823   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5824         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5825                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5826                    UNSPEC_ST2))]
5827   "TARGET_SIMD"
5828   "st1\\t{%S1.1d - %T1.1d}, %0"
5829   [(set_attr "type" "neon_store1_2reg")]
5830 )
5831
5832 (define_insn "aarch64_st3<mode>_dreg"
5833   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5834         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5835                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5836                    UNSPEC_ST3))]
5837   "TARGET_SIMD"
5838   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5839   [(set_attr "type" "neon_store3_3reg")]
5840 )
5841
5842 (define_insn "aarch64_st3<mode>_dreg"
5843   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5844         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5845                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5846                    UNSPEC_ST3))]
5847   "TARGET_SIMD"
5848   "st1\\t{%S1.1d - %U1.1d}, %0"
5849   [(set_attr "type" "neon_store1_3reg")]
5850 )
5851
5852 (define_insn "aarch64_st4<mode>_dreg"
5853   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5854         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5855                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5856                    UNSPEC_ST4))]
5857   "TARGET_SIMD"
5858   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5859   [(set_attr "type" "neon_store4_4reg")]
5860 )
5861
5862 (define_insn "aarch64_st4<mode>_dreg"
5863   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5864         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5865                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5866                    UNSPEC_ST4))]
5867   "TARGET_SIMD"
5868   "st1\\t{%S1.1d - %V1.1d}, %0"
5869   [(set_attr "type" "neon_store1_4reg")]
5870 )
5871
5872 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5873  [(match_operand:DI 0 "register_operand" "r")
5874   (match_operand:VSTRUCT 1 "register_operand" "w")
5875   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5876   "TARGET_SIMD"
5877 {
5878   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5879   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5880
5881   emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5882   DONE;
5883 })
5884
5885 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5886  [(match_operand:DI 0 "register_operand" "r")
5887   (match_operand:VSTRUCT 1 "register_operand" "w")
5888   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5889   "TARGET_SIMD"
5890 {
5891   machine_mode mode = <VSTRUCT:MODE>mode;
5892   rtx mem = gen_rtx_MEM (mode, operands[0]);
5893
5894   emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5895   DONE;
5896 })
5897
5898 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5899  [(match_operand:DI 0 "register_operand" "r")
5900   (match_operand:VSTRUCT 1 "register_operand" "w")
5901   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5902   (match_operand:SI 2 "immediate_operand")]
5903   "TARGET_SIMD"
5904 {
5905   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5906   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5907                      * <VSTRUCT:nregs>);
5908
5909   emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5910                 mem, operands[1], operands[2]));
5911   DONE;
5912 })
5913
5914 (define_expand "aarch64_st1<VALL_F16:mode>"
5915  [(match_operand:DI 0 "register_operand")
5916   (match_operand:VALL_F16 1 "register_operand")]
5917   "TARGET_SIMD"
5918 {
5919   machine_mode mode = <VALL_F16:MODE>mode;
5920   rtx mem = gen_rtx_MEM (mode, operands[0]);
5921
5922   if (BYTES_BIG_ENDIAN)
5923     emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5924   else
5925     emit_move_insn (mem, operands[1]);
5926   DONE;
5927 })
5928
5929 ;; Expander for builtins to insert vector registers into large
5930 ;; opaque integer modes.
5931
5932 ;; Q-register list.  We don't need a D-reg inserter as we zero
5933 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5934
5935 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5936  [(match_operand:VSTRUCT 0 "register_operand" "+w")
5937   (match_operand:VSTRUCT 1 "register_operand" "0")
5938   (match_operand:VQ 2 "register_operand" "w")
5939   (match_operand:SI 3 "immediate_operand" "i")]
5940   "TARGET_SIMD"
5941 {
5942   int part = INTVAL (operands[3]);
5943   int offset = part * 16;
5944
5945   emit_move_insn (operands[0], operands[1]);
5946   emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5947                   operands[2]);
5948   DONE;
5949 })
5950
5951 ;; Standard pattern name vec_init<mode><Vel>.
5952
5953 (define_expand "vec_init<mode><Vel>"
5954   [(match_operand:VALL_F16 0 "register_operand" "")
5955    (match_operand 1 "" "")]
5956   "TARGET_SIMD"
5957 {
5958   aarch64_expand_vector_init (operands[0], operands[1]);
5959   DONE;
5960 })
5961
5962 (define_expand "vec_init<mode><Vhalf>"
5963   [(match_operand:VQ_NO2E 0 "register_operand" "")
5964    (match_operand 1 "" "")]
5965   "TARGET_SIMD"
5966 {
5967   aarch64_expand_vector_init (operands[0], operands[1]);
5968   DONE;
5969 })
5970
5971 (define_insn "*aarch64_simd_ld1r<mode>"
5972   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5973         (vec_duplicate:VALL_F16
5974           (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5975   "TARGET_SIMD"
5976   "ld1r\\t{%0.<Vtype>}, %1"
5977   [(set_attr "type" "neon_load1_all_lanes")]
5978 )
5979
5980 (define_insn "aarch64_simd_ld1<mode>_x2"
5981   [(set (match_operand:OI 0 "register_operand" "=w")
5982         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5983                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5984                    UNSPEC_LD1))]
5985   "TARGET_SIMD"
5986   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5987   [(set_attr "type" "neon_load1_2reg<q>")]
5988 )
5989
5990 (define_insn "aarch64_simd_ld1<mode>_x2"
5991   [(set (match_operand:OI 0 "register_operand" "=w")
5992         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5993                     (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5994                    UNSPEC_LD1))]
5995   "TARGET_SIMD"
5996   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5997   [(set_attr "type" "neon_load1_2reg<q>")]
5998 )
5999
6000
6001 (define_insn "@aarch64_frecpe<mode>"
6002   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
6003         (unspec:VHSDF_HSDF
6004          [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
6005          UNSPEC_FRECPE))]
6006   "TARGET_SIMD"
6007   "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
6008   [(set_attr "type" "neon_fp_recpe_<stype><q>")]
6009 )
6010
6011 (define_insn "aarch64_frecpx<mode>"
6012   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
6013         (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
6014          UNSPEC_FRECPX))]
6015   "TARGET_SIMD"
6016   "frecpx\t%<s>0, %<s>1"
6017   [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
6018 )
6019
6020 (define_insn "@aarch64_frecps<mode>"
6021   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
6022         (unspec:VHSDF_HSDF
6023           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
6024           (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
6025           UNSPEC_FRECPS))]
6026   "TARGET_SIMD"
6027   "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6028   [(set_attr "type" "neon_fp_recps_<stype><q>")]
6029 )
6030
6031 (define_insn "aarch64_urecpe<mode>"
6032   [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
6033         (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
6034                 UNSPEC_URECPE))]
6035  "TARGET_SIMD"
6036  "urecpe\\t%0.<Vtype>, %1.<Vtype>"
6037   [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
6038
6039 ;; Standard pattern name vec_extract<mode><Vel>.
6040
6041 (define_expand "vec_extract<mode><Vel>"
6042   [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
6043    (match_operand:VALL_F16 1 "register_operand" "")
6044    (match_operand:SI 2 "immediate_operand" "")]
6045   "TARGET_SIMD"
6046 {
6047     emit_insn
6048       (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
6049     DONE;
6050 })
6051
6052 ;; aes
6053
6054 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
6055   [(set (match_operand:V16QI 0 "register_operand" "=w")
6056         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%0")
6057                        (match_operand:V16QI 2 "register_operand" "w")]
6058          CRYPTO_AES))]
6059   "TARGET_SIMD && TARGET_AES"
6060   "aes<aes_op>\\t%0.16b, %2.16b"
6061   [(set_attr "type" "crypto_aese")]
6062 )
6063
6064 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
6065   [(set (match_operand:V16QI 0 "register_operand" "=w")
6066         (unspec:V16QI [(xor:V16QI
6067                         (match_operand:V16QI 1 "register_operand" "%0")
6068                         (match_operand:V16QI 2 "register_operand" "w"))
6069                        (match_operand:V16QI 3 "aarch64_simd_imm_zero" "")]
6070                        CRYPTO_AES))]
6071   "TARGET_SIMD && TARGET_AES"
6072   "aes<aes_op>\\t%0.16b, %2.16b"
6073   [(set_attr "type" "crypto_aese")]
6074 )
6075
6076 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
6077   [(set (match_operand:V16QI 0 "register_operand" "=w")
6078         (unspec:V16QI [(match_operand:V16QI 3 "aarch64_simd_imm_zero" "")
6079         (xor:V16QI (match_operand:V16QI 1 "register_operand" "%0")
6080                    (match_operand:V16QI 2 "register_operand" "w"))]
6081         CRYPTO_AES))]
6082   "TARGET_SIMD && TARGET_AES"
6083   "aes<aes_op>\\t%0.16b, %2.16b"
6084   [(set_attr "type" "crypto_aese")]
6085 )
6086
6087 ;; When AES/AESMC fusion is enabled we want the register allocation to
6088 ;; look like:
6089 ;;    AESE Vn, _
6090 ;;    AESMC Vn, Vn
6091 ;; So prefer to tie operand 1 to operand 0 when fusing.
6092
6093 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
6094   [(set (match_operand:V16QI 0 "register_operand" "=w,w")
6095         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
6096          CRYPTO_AESMC))]
6097   "TARGET_SIMD && TARGET_AES"
6098   "aes<aesmc_op>\\t%0.16b, %1.16b"
6099   [(set_attr "type" "crypto_aesmc")
6100    (set_attr_alternative "enabled"
6101      [(if_then_else (match_test
6102                        "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
6103                      (const_string "yes" )
6104                      (const_string "no"))
6105       (const_string "yes")])]
6106 )
6107
6108 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
6109 ;; and enforce the register dependency without scheduling or register
6110 ;; allocation messing up the order or introducing moves inbetween.
6111 ;;  Mash the two together during combine.
6112
6113 (define_insn "*aarch64_crypto_aese_fused"
6114   [(set (match_operand:V16QI 0 "register_operand" "=&w")
6115         (unspec:V16QI
6116           [(unspec:V16QI
6117             [(match_operand:V16QI 1 "register_operand" "0")
6118              (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE)
6119           ] UNSPEC_AESMC))]
6120   "TARGET_SIMD && TARGET_AES
6121    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6122   "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
6123   [(set_attr "type" "crypto_aese")
6124    (set_attr "length" "8")]
6125 )
6126
6127 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
6128 ;; and enforce the register dependency without scheduling or register
6129 ;; allocation messing up the order or introducing moves inbetween.
6130 ;;  Mash the two together during combine.
6131
6132 (define_insn "*aarch64_crypto_aesd_fused"
6133   [(set (match_operand:V16QI 0 "register_operand" "=&w")
6134         (unspec:V16QI
6135           [(unspec:V16QI
6136             [(match_operand:V16QI 1 "register_operand" "0")
6137              (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD)
6138           ] UNSPEC_AESIMC))]
6139   "TARGET_SIMD && TARGET_AES
6140    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6141   "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
6142   [(set_attr "type" "crypto_aese")
6143    (set_attr "length" "8")]
6144 )
6145
6146 ;; sha1
6147
6148 (define_insn "aarch64_crypto_sha1hsi"
6149   [(set (match_operand:SI 0 "register_operand" "=w")
6150         (unspec:SI [(match_operand:SI 1
6151                        "register_operand" "w")]
6152          UNSPEC_SHA1H))]
6153   "TARGET_SIMD && TARGET_SHA2"
6154   "sha1h\\t%s0, %s1"
6155   [(set_attr "type" "crypto_sha1_fast")]
6156 )
6157
6158 (define_insn "aarch64_crypto_sha1hv4si"
6159   [(set (match_operand:SI 0 "register_operand" "=w")
6160         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6161                      (parallel [(const_int 0)]))]
6162          UNSPEC_SHA1H))]
6163   "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
6164   "sha1h\\t%s0, %s1"
6165   [(set_attr "type" "crypto_sha1_fast")]
6166 )
6167
6168 (define_insn "aarch64_be_crypto_sha1hv4si"
6169   [(set (match_operand:SI 0 "register_operand" "=w")
6170         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6171                      (parallel [(const_int 3)]))]
6172          UNSPEC_SHA1H))]
6173   "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
6174   "sha1h\\t%s0, %s1"
6175   [(set_attr "type" "crypto_sha1_fast")]
6176 )
6177
6178 (define_insn "aarch64_crypto_sha1su1v4si"
6179   [(set (match_operand:V4SI 0 "register_operand" "=w")
6180         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6181                       (match_operand:V4SI 2 "register_operand" "w")]
6182          UNSPEC_SHA1SU1))]
6183   "TARGET_SIMD && TARGET_SHA2"
6184   "sha1su1\\t%0.4s, %2.4s"
6185   [(set_attr "type" "crypto_sha1_fast")]
6186 )
6187
6188 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
6189   [(set (match_operand:V4SI 0 "register_operand" "=w")
6190         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6191                       (match_operand:SI 2 "register_operand" "w")
6192                       (match_operand:V4SI 3 "register_operand" "w")]
6193          CRYPTO_SHA1))]
6194   "TARGET_SIMD && TARGET_SHA2"
6195   "sha1<sha1_op>\\t%q0, %s2, %3.4s"
6196   [(set_attr "type" "crypto_sha1_slow")]
6197 )
6198
6199 (define_insn "aarch64_crypto_sha1su0v4si"
6200   [(set (match_operand:V4SI 0 "register_operand" "=w")
6201         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6202                       (match_operand:V4SI 2 "register_operand" "w")
6203                       (match_operand:V4SI 3 "register_operand" "w")]
6204          UNSPEC_SHA1SU0))]
6205   "TARGET_SIMD && TARGET_SHA2"
6206   "sha1su0\\t%0.4s, %2.4s, %3.4s"
6207   [(set_attr "type" "crypto_sha1_xor")]
6208 )
6209
6210 ;; sha256
6211
6212 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6213   [(set (match_operand:V4SI 0 "register_operand" "=w")
6214         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6215                       (match_operand:V4SI 2 "register_operand" "w")
6216                       (match_operand:V4SI 3 "register_operand" "w")]
6217          CRYPTO_SHA256))]
6218   "TARGET_SIMD && TARGET_SHA2"
6219   "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
6220   [(set_attr "type" "crypto_sha256_slow")]
6221 )
6222
6223 (define_insn "aarch64_crypto_sha256su0v4si"
6224   [(set (match_operand:V4SI 0 "register_operand" "=w")
6225         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6226                       (match_operand:V4SI 2 "register_operand" "w")]
6227          UNSPEC_SHA256SU0))]
6228   "TARGET_SIMD && TARGET_SHA2"
6229   "sha256su0\\t%0.4s, %2.4s"
6230   [(set_attr "type" "crypto_sha256_fast")]
6231 )
6232
6233 (define_insn "aarch64_crypto_sha256su1v4si"
6234   [(set (match_operand:V4SI 0 "register_operand" "=w")
6235         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6236                       (match_operand:V4SI 2 "register_operand" "w")
6237                       (match_operand:V4SI 3 "register_operand" "w")]
6238          UNSPEC_SHA256SU1))]
6239   "TARGET_SIMD && TARGET_SHA2"
6240   "sha256su1\\t%0.4s, %2.4s, %3.4s"
6241   [(set_attr "type" "crypto_sha256_slow")]
6242 )
6243
6244 ;; sha512
6245
6246 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6247   [(set (match_operand:V2DI 0 "register_operand" "=w")
6248         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6249                       (match_operand:V2DI 2 "register_operand" "w")
6250                       (match_operand:V2DI 3 "register_operand" "w")]
6251          CRYPTO_SHA512))]
6252   "TARGET_SIMD && TARGET_SHA3"
6253   "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6254   [(set_attr "type" "crypto_sha512")]
6255 )
6256
6257 (define_insn "aarch64_crypto_sha512su0qv2di"
6258   [(set (match_operand:V2DI 0 "register_operand" "=w")
6259         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6260                       (match_operand:V2DI 2 "register_operand" "w")]
6261          UNSPEC_SHA512SU0))]
6262   "TARGET_SIMD && TARGET_SHA3"
6263   "sha512su0\\t%0.2d, %2.2d"
6264   [(set_attr "type" "crypto_sha512")]
6265 )
6266
6267 (define_insn "aarch64_crypto_sha512su1qv2di"
6268   [(set (match_operand:V2DI 0 "register_operand" "=w")
6269         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6270                       (match_operand:V2DI 2 "register_operand" "w")
6271                       (match_operand:V2DI 3 "register_operand" "w")]
6272          UNSPEC_SHA512SU1))]
6273   "TARGET_SIMD && TARGET_SHA3"
6274   "sha512su1\\t%0.2d, %2.2d, %3.2d"
6275   [(set_attr "type" "crypto_sha512")]
6276 )
6277
6278 ;; sha3
6279
6280 (define_insn "eor3q<mode>4"
6281   [(set (match_operand:VQ_I 0 "register_operand" "=w")
6282         (xor:VQ_I
6283          (xor:VQ_I
6284           (match_operand:VQ_I 2 "register_operand" "w")
6285           (match_operand:VQ_I 3 "register_operand" "w"))
6286          (match_operand:VQ_I 1 "register_operand" "w")))]
6287   "TARGET_SIMD && TARGET_SHA3"
6288   "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6289   [(set_attr "type" "crypto_sha3")]
6290 )
6291
6292 (define_insn "aarch64_rax1qv2di"
6293   [(set (match_operand:V2DI 0 "register_operand" "=w")
6294         (xor:V2DI
6295          (rotate:V2DI
6296           (match_operand:V2DI 2 "register_operand" "w")
6297           (const_int 1))
6298          (match_operand:V2DI 1 "register_operand" "w")))]
6299   "TARGET_SIMD && TARGET_SHA3"
6300   "rax1\\t%0.2d, %1.2d, %2.2d"
6301   [(set_attr "type" "crypto_sha3")]
6302 )
6303
6304 (define_insn "aarch64_xarqv2di"
6305   [(set (match_operand:V2DI 0 "register_operand" "=w")
6306         (rotatert:V2DI
6307          (xor:V2DI
6308           (match_operand:V2DI 1 "register_operand" "%w")
6309           (match_operand:V2DI 2 "register_operand" "w"))
6310          (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6311   "TARGET_SIMD && TARGET_SHA3"
6312   "xar\\t%0.2d, %1.2d, %2.2d, %3"
6313   [(set_attr "type" "crypto_sha3")]
6314 )
6315
6316 (define_insn "bcaxq<mode>4"
6317   [(set (match_operand:VQ_I 0 "register_operand" "=w")
6318         (xor:VQ_I
6319          (and:VQ_I
6320           (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6321           (match_operand:VQ_I 2 "register_operand" "w"))
6322          (match_operand:VQ_I 1 "register_operand" "w")))]
6323   "TARGET_SIMD && TARGET_SHA3"
6324   "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6325   [(set_attr "type" "crypto_sha3")]
6326 )
6327
6328 ;; SM3
6329
6330 (define_insn "aarch64_sm3ss1qv4si"
6331   [(set (match_operand:V4SI 0 "register_operand" "=w")
6332         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6333                       (match_operand:V4SI 2 "register_operand" "w")
6334                       (match_operand:V4SI 3 "register_operand" "w")]
6335          UNSPEC_SM3SS1))]
6336   "TARGET_SIMD && TARGET_SM4"
6337   "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6338   [(set_attr "type" "crypto_sm3")]
6339 )
6340
6341
6342 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6343   [(set (match_operand:V4SI 0 "register_operand" "=w")
6344         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6345                       (match_operand:V4SI 2 "register_operand" "w")
6346                       (match_operand:V4SI 3 "register_operand" "w")
6347                       (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6348          CRYPTO_SM3TT))]
6349   "TARGET_SIMD && TARGET_SM4"
6350   "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6351   [(set_attr "type" "crypto_sm3")]
6352 )
6353
6354 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6355   [(set (match_operand:V4SI 0 "register_operand" "=w")
6356         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6357                       (match_operand:V4SI 2 "register_operand" "w")
6358                       (match_operand:V4SI 3 "register_operand" "w")]
6359          CRYPTO_SM3PART))]
6360   "TARGET_SIMD && TARGET_SM4"
6361   "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6362   [(set_attr "type" "crypto_sm3")]
6363 )
6364
6365 ;; SM4
6366
6367 (define_insn "aarch64_sm4eqv4si"
6368   [(set (match_operand:V4SI 0 "register_operand" "=w")
6369         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6370                       (match_operand:V4SI 2 "register_operand" "w")]
6371          UNSPEC_SM4E))]
6372   "TARGET_SIMD && TARGET_SM4"
6373   "sm4e\\t%0.4s, %2.4s"
6374   [(set_attr "type" "crypto_sm4")]
6375 )
6376
6377 (define_insn "aarch64_sm4ekeyqv4si"
6378   [(set (match_operand:V4SI 0 "register_operand" "=w")
6379         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6380                       (match_operand:V4SI 2 "register_operand" "w")]
6381          UNSPEC_SM4EKEY))]
6382   "TARGET_SIMD && TARGET_SM4"
6383   "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6384   [(set_attr "type" "crypto_sm4")]
6385 )
6386
6387 ;; fp16fml
6388
6389 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6390   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6391         (unspec:VDQSF
6392          [(match_operand:VDQSF 1 "register_operand" "0")
6393           (match_operand:<VFMLA_W> 2 "register_operand" "w")
6394           (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6395          VFMLA16_LOW))]
6396   "TARGET_F16FML"
6397 {
6398   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6399                                             <nunits> * 2, false);
6400   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6401                                             <nunits> * 2, false);
6402
6403   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6404                                                                 operands[1],
6405                                                                 operands[2],
6406                                                                 operands[3],
6407                                                                 p1, p2));
6408   DONE;
6409
6410 })
6411
6412 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6413   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6414         (unspec:VDQSF
6415          [(match_operand:VDQSF 1 "register_operand" "0")
6416           (match_operand:<VFMLA_W> 2 "register_operand" "w")
6417           (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6418          VFMLA16_HIGH))]
6419   "TARGET_F16FML"
6420 {
6421   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6422   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6423
6424   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6425                                                                  operands[1],
6426                                                                  operands[2],
6427                                                                  operands[3],
6428                                                                  p1, p2));
6429   DONE;
6430 })
6431
6432 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6433   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6434         (fma:VDQSF
6435          (float_extend:VDQSF
6436           (vec_select:<VFMLA_SEL_W>
6437            (match_operand:<VFMLA_W> 2 "register_operand" "w")
6438            (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6439          (float_extend:VDQSF
6440           (vec_select:<VFMLA_SEL_W>
6441            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6442            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6443          (match_operand:VDQSF 1 "register_operand" "0")))]
6444   "TARGET_F16FML"
6445   "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6446   [(set_attr "type" "neon_fp_mul_s")]
6447 )
6448
6449 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6450   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6451         (fma:VDQSF
6452          (float_extend:VDQSF
6453           (neg:<VFMLA_SEL_W>
6454            (vec_select:<VFMLA_SEL_W>
6455             (match_operand:<VFMLA_W> 2 "register_operand" "w")
6456             (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6457          (float_extend:VDQSF
6458           (vec_select:<VFMLA_SEL_W>
6459            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6460            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6461          (match_operand:VDQSF 1 "register_operand" "0")))]
6462   "TARGET_F16FML"
6463   "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6464   [(set_attr "type" "neon_fp_mul_s")]
6465 )
6466
6467 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6468   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6469         (fma:VDQSF
6470          (float_extend:VDQSF
6471           (vec_select:<VFMLA_SEL_W>
6472            (match_operand:<VFMLA_W> 2 "register_operand" "w")
6473            (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6474          (float_extend:VDQSF
6475           (vec_select:<VFMLA_SEL_W>
6476            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6477            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6478          (match_operand:VDQSF 1 "register_operand" "0")))]
6479   "TARGET_F16FML"
6480   "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6481   [(set_attr "type" "neon_fp_mul_s")]
6482 )
6483
6484 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6485   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6486         (fma:VDQSF
6487          (float_extend:VDQSF
6488           (neg:<VFMLA_SEL_W>
6489            (vec_select:<VFMLA_SEL_W>
6490             (match_operand:<VFMLA_W> 2 "register_operand" "w")
6491             (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6492          (float_extend:VDQSF
6493           (vec_select:<VFMLA_SEL_W>
6494            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6495            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6496          (match_operand:VDQSF 1 "register_operand" "0")))]
6497   "TARGET_F16FML"
6498   "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6499   [(set_attr "type" "neon_fp_mul_s")]
6500 )
6501
6502 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6503   [(set (match_operand:V2SF 0 "register_operand" "")
6504         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6505                            (match_operand:V4HF 2 "register_operand" "")
6506                            (match_operand:V4HF 3 "register_operand" "")
6507                            (match_operand:SI 4 "aarch64_imm2" "")]
6508          VFMLA16_LOW))]
6509   "TARGET_F16FML"
6510 {
6511     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6512     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6513
6514     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6515                                                             operands[1],
6516                                                             operands[2],
6517                                                             operands[3],
6518                                                             p1, lane));
6519     DONE;
6520 }
6521 )
6522
6523 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6524   [(set (match_operand:V2SF 0 "register_operand" "")
6525         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6526                            (match_operand:V4HF 2 "register_operand" "")
6527                            (match_operand:V4HF 3 "register_operand" "")
6528                            (match_operand:SI 4 "aarch64_imm2" "")]
6529          VFMLA16_HIGH))]
6530   "TARGET_F16FML"
6531 {
6532     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6533     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6534
6535     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6536                                                              operands[1],
6537                                                              operands[2],
6538                                                              operands[3],
6539                                                              p1, lane));
6540     DONE;
6541 })
6542
6543 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6544   [(set (match_operand:V2SF 0 "register_operand" "=w")
6545         (fma:V2SF
6546          (float_extend:V2SF
6547            (vec_select:V2HF
6548             (match_operand:V4HF 2 "register_operand" "w")
6549             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6550          (float_extend:V2SF
6551            (vec_duplicate:V2HF
6552             (vec_select:HF
6553              (match_operand:V4HF 3 "register_operand" "x")
6554              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6555          (match_operand:V2SF 1 "register_operand" "0")))]
6556   "TARGET_F16FML"
6557   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6558   [(set_attr "type" "neon_fp_mul_s")]
6559 )
6560
6561 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6562   [(set (match_operand:V2SF 0 "register_operand" "=w")
6563         (fma:V2SF
6564          (float_extend:V2SF
6565           (neg:V2HF
6566            (vec_select:V2HF
6567             (match_operand:V4HF 2 "register_operand" "w")
6568             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6569          (float_extend:V2SF
6570           (vec_duplicate:V2HF
6571            (vec_select:HF
6572             (match_operand:V4HF 3 "register_operand" "x")
6573             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6574          (match_operand:V2SF 1 "register_operand" "0")))]
6575   "TARGET_F16FML"
6576   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6577   [(set_attr "type" "neon_fp_mul_s")]
6578 )
6579
6580 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6581   [(set (match_operand:V2SF 0 "register_operand" "=w")
6582         (fma:V2SF
6583          (float_extend:V2SF
6584            (vec_select:V2HF
6585             (match_operand:V4HF 2 "register_operand" "w")
6586             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6587          (float_extend:V2SF
6588            (vec_duplicate:V2HF
6589             (vec_select:HF
6590              (match_operand:V4HF 3 "register_operand" "x")
6591              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6592          (match_operand:V2SF 1 "register_operand" "0")))]
6593   "TARGET_F16FML"
6594   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6595   [(set_attr "type" "neon_fp_mul_s")]
6596 )
6597
6598 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6599   [(set (match_operand:V2SF 0 "register_operand" "=w")
6600         (fma:V2SF
6601          (float_extend:V2SF
6602            (neg:V2HF
6603             (vec_select:V2HF
6604              (match_operand:V4HF 2 "register_operand" "w")
6605              (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6606          (float_extend:V2SF
6607            (vec_duplicate:V2HF
6608             (vec_select:HF
6609              (match_operand:V4HF 3 "register_operand" "x")
6610              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6611          (match_operand:V2SF 1 "register_operand" "0")))]
6612   "TARGET_F16FML"
6613   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6614   [(set_attr "type" "neon_fp_mul_s")]
6615 )
6616
6617 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6618   [(set (match_operand:V4SF 0 "register_operand" "")
6619         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6620                            (match_operand:V8HF 2 "register_operand" "")
6621                            (match_operand:V8HF 3 "register_operand" "")
6622                            (match_operand:SI 4 "aarch64_lane_imm3" "")]
6623          VFMLA16_LOW))]
6624   "TARGET_F16FML"
6625 {
6626     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6627     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6628
6629     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6630                                                               operands[1],
6631                                                               operands[2],
6632                                                               operands[3],
6633                                                               p1, lane));
6634     DONE;
6635 })
6636
6637 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6638   [(set (match_operand:V4SF 0 "register_operand" "")
6639         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6640                            (match_operand:V8HF 2 "register_operand" "")
6641                            (match_operand:V8HF 3 "register_operand" "")
6642                            (match_operand:SI 4 "aarch64_lane_imm3" "")]
6643          VFMLA16_HIGH))]
6644   "TARGET_F16FML"
6645 {
6646     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6647     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6648
6649     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6650                                                                operands[1],
6651                                                                operands[2],
6652                                                                operands[3],
6653                                                                p1, lane));
6654     DONE;
6655 })
6656
6657 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6658   [(set (match_operand:V4SF 0 "register_operand" "=w")
6659         (fma:V4SF
6660          (float_extend:V4SF
6661           (vec_select:V4HF
6662             (match_operand:V8HF 2 "register_operand" "w")
6663             (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6664          (float_extend:V4SF
6665           (vec_duplicate:V4HF
6666            (vec_select:HF
6667             (match_operand:V8HF 3 "register_operand" "x")
6668             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6669          (match_operand:V4SF 1 "register_operand" "0")))]
6670   "TARGET_F16FML"
6671   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6672   [(set_attr "type" "neon_fp_mul_s")]
6673 )
6674
6675 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6676   [(set (match_operand:V4SF 0 "register_operand" "=w")
6677         (fma:V4SF
6678           (float_extend:V4SF
6679            (neg:V4HF
6680             (vec_select:V4HF
6681              (match_operand:V8HF 2 "register_operand" "w")
6682              (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6683          (float_extend:V4SF
6684           (vec_duplicate:V4HF
6685            (vec_select:HF
6686             (match_operand:V8HF 3 "register_operand" "x")
6687             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6688          (match_operand:V4SF 1 "register_operand" "0")))]
6689   "TARGET_F16FML"
6690   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6691   [(set_attr "type" "neon_fp_mul_s")]
6692 )
6693
6694 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6695   [(set (match_operand:V4SF 0 "register_operand" "=w")
6696         (fma:V4SF
6697          (float_extend:V4SF
6698           (vec_select:V4HF
6699             (match_operand:V8HF 2 "register_operand" "w")
6700             (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6701          (float_extend:V4SF
6702           (vec_duplicate:V4HF
6703            (vec_select:HF
6704             (match_operand:V8HF 3 "register_operand" "x")
6705             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6706          (match_operand:V4SF 1 "register_operand" "0")))]
6707   "TARGET_F16FML"
6708   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6709   [(set_attr "type" "neon_fp_mul_s")]
6710 )
6711
6712 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6713   [(set (match_operand:V4SF 0 "register_operand" "=w")
6714         (fma:V4SF
6715          (float_extend:V4SF
6716           (neg:V4HF
6717            (vec_select:V4HF
6718             (match_operand:V8HF 2 "register_operand" "w")
6719             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6720          (float_extend:V4SF
6721           (vec_duplicate:V4HF
6722            (vec_select:HF
6723             (match_operand:V8HF 3 "register_operand" "x")
6724             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6725          (match_operand:V4SF 1 "register_operand" "0")))]
6726   "TARGET_F16FML"
6727   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6728   [(set_attr "type" "neon_fp_mul_s")]
6729 )
6730
6731 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6732   [(set (match_operand:V2SF 0 "register_operand" "")
6733         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6734                       (match_operand:V4HF 2 "register_operand" "")
6735                       (match_operand:V8HF 3 "register_operand" "")
6736                       (match_operand:SI 4 "aarch64_lane_imm3" "")]
6737          VFMLA16_LOW))]
6738   "TARGET_F16FML"
6739 {
6740     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6741     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6742
6743     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6744                                                              operands[1],
6745                                                              operands[2],
6746                                                              operands[3],
6747                                                              p1, lane));
6748     DONE;
6749
6750 })
6751
6752 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6753   [(set (match_operand:V2SF 0 "register_operand" "")
6754         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6755                       (match_operand:V4HF 2 "register_operand" "")
6756                       (match_operand:V8HF 3 "register_operand" "")
6757                       (match_operand:SI 4 "aarch64_lane_imm3" "")]
6758          VFMLA16_HIGH))]
6759   "TARGET_F16FML"
6760 {
6761     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6762     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6763
6764     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6765                                                               operands[1],
6766                                                               operands[2],
6767                                                               operands[3],
6768                                                               p1, lane));
6769     DONE;
6770
6771 })
6772
6773 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6774   [(set (match_operand:V2SF 0 "register_operand" "=w")
6775         (fma:V2SF
6776          (float_extend:V2SF
6777            (vec_select:V2HF
6778             (match_operand:V4HF 2 "register_operand" "w")
6779             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6780          (float_extend:V2SF
6781           (vec_duplicate:V2HF
6782            (vec_select:HF
6783             (match_operand:V8HF 3 "register_operand" "x")
6784             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6785          (match_operand:V2SF 1 "register_operand" "0")))]
6786   "TARGET_F16FML"
6787   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6788   [(set_attr "type" "neon_fp_mul_s")]
6789 )
6790
6791 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6792   [(set (match_operand:V2SF 0 "register_operand" "=w")
6793         (fma:V2SF
6794          (float_extend:V2SF
6795           (neg:V2HF
6796            (vec_select:V2HF
6797             (match_operand:V4HF 2 "register_operand" "w")
6798             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6799          (float_extend:V2SF
6800           (vec_duplicate:V2HF
6801            (vec_select:HF
6802             (match_operand:V8HF 3 "register_operand" "x")
6803             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6804          (match_operand:V2SF 1 "register_operand" "0")))]
6805   "TARGET_F16FML"
6806   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6807   [(set_attr "type" "neon_fp_mul_s")]
6808 )
6809
6810 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6811   [(set (match_operand:V2SF 0 "register_operand" "=w")
6812         (fma:V2SF
6813          (float_extend:V2SF
6814            (vec_select:V2HF
6815             (match_operand:V4HF 2 "register_operand" "w")
6816             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6817          (float_extend:V2SF
6818           (vec_duplicate:V2HF
6819            (vec_select:HF
6820             (match_operand:V8HF 3 "register_operand" "x")
6821             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6822          (match_operand:V2SF 1 "register_operand" "0")))]
6823   "TARGET_F16FML"
6824   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6825   [(set_attr "type" "neon_fp_mul_s")]
6826 )
6827
6828 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6829   [(set (match_operand:V2SF 0 "register_operand" "=w")
6830         (fma:V2SF
6831          (float_extend:V2SF
6832           (neg:V2HF
6833            (vec_select:V2HF
6834             (match_operand:V4HF 2 "register_operand" "w")
6835             (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6836          (float_extend:V2SF
6837           (vec_duplicate:V2HF
6838            (vec_select:HF
6839             (match_operand:V8HF 3 "register_operand" "x")
6840             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6841          (match_operand:V2SF 1 "register_operand" "0")))]
6842   "TARGET_F16FML"
6843   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6844   [(set_attr "type" "neon_fp_mul_s")]
6845 )
6846
6847 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6848   [(set (match_operand:V4SF 0 "register_operand" "")
6849         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6850                       (match_operand:V8HF 2 "register_operand" "")
6851                       (match_operand:V4HF 3 "register_operand" "")
6852                       (match_operand:SI 4 "aarch64_imm2" "")]
6853          VFMLA16_LOW))]
6854   "TARGET_F16FML"
6855 {
6856     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6857     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6858
6859     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6860                                                              operands[1],
6861                                                              operands[2],
6862                                                              operands[3],
6863                                                              p1, lane));
6864     DONE;
6865 })
6866
6867 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6868   [(set (match_operand:V4SF 0 "register_operand" "")
6869         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6870                       (match_operand:V8HF 2 "register_operand" "")
6871                       (match_operand:V4HF 3 "register_operand" "")
6872                       (match_operand:SI 4 "aarch64_imm2" "")]
6873          VFMLA16_HIGH))]
6874   "TARGET_F16FML"
6875 {
6876     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6877     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6878
6879     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6880                                                               operands[1],
6881                                                               operands[2],
6882                                                               operands[3],
6883                                                               p1, lane));
6884     DONE;
6885 })
6886
6887 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6888   [(set (match_operand:V4SF 0 "register_operand" "=w")
6889         (fma:V4SF
6890          (float_extend:V4SF
6891           (vec_select:V4HF
6892            (match_operand:V8HF 2 "register_operand" "w")
6893            (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6894          (float_extend:V4SF
6895           (vec_duplicate:V4HF
6896            (vec_select:HF
6897             (match_operand:V4HF 3 "register_operand" "x")
6898             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6899          (match_operand:V4SF 1 "register_operand" "0")))]
6900   "TARGET_F16FML"
6901   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6902   [(set_attr "type" "neon_fp_mul_s")]
6903 )
6904
6905 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6906   [(set (match_operand:V4SF 0 "register_operand" "=w")
6907         (fma:V4SF
6908          (float_extend:V4SF
6909           (neg:V4HF
6910            (vec_select:V4HF
6911             (match_operand:V8HF 2 "register_operand" "w")
6912             (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6913          (float_extend:V4SF
6914           (vec_duplicate:V4HF
6915            (vec_select:HF
6916             (match_operand:V4HF 3 "register_operand" "x")
6917             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6918          (match_operand:V4SF 1 "register_operand" "0")))]
6919   "TARGET_F16FML"
6920   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6921   [(set_attr "type" "neon_fp_mul_s")]
6922 )
6923
6924 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6925   [(set (match_operand:V4SF 0 "register_operand" "=w")
6926         (fma:V4SF
6927          (float_extend:V4SF
6928           (vec_select:V4HF
6929            (match_operand:V8HF 2 "register_operand" "w")
6930            (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6931          (float_extend:V4SF
6932           (vec_duplicate:V4HF
6933            (vec_select:HF
6934             (match_operand:V4HF 3 "register_operand" "x")
6935             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6936          (match_operand:V4SF 1 "register_operand" "0")))]
6937   "TARGET_F16FML"
6938   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6939   [(set_attr "type" "neon_fp_mul_s")]
6940 )
6941
6942 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6943   [(set (match_operand:V4SF 0 "register_operand" "=w")
6944         (fma:V4SF
6945          (float_extend:V4SF
6946           (neg:V4HF
6947            (vec_select:V4HF
6948             (match_operand:V8HF 2 "register_operand" "w")
6949             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6950          (float_extend:V4SF
6951           (vec_duplicate:V4HF
6952            (vec_select:HF
6953             (match_operand:V4HF 3 "register_operand" "x")
6954             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6955          (match_operand:V4SF 1 "register_operand" "0")))]
6956   "TARGET_F16FML"
6957   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6958   [(set_attr "type" "neon_fp_mul_s")]
6959 )
6960
6961 ;; pmull
6962
6963 (define_insn "aarch64_crypto_pmulldi"
6964   [(set (match_operand:TI 0 "register_operand" "=w")
6965         (unspec:TI  [(match_operand:DI 1 "register_operand" "w")
6966                      (match_operand:DI 2 "register_operand" "w")]
6967                     UNSPEC_PMULL))]
6968  "TARGET_SIMD && TARGET_AES"
6969  "pmull\\t%0.1q, %1.1d, %2.1d"
6970   [(set_attr "type" "crypto_pmull")]
6971 )
6972
6973 (define_insn "aarch64_crypto_pmullv2di"
6974  [(set (match_operand:TI 0 "register_operand" "=w")
6975        (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
6976                    (match_operand:V2DI 2 "register_operand" "w")]
6977                   UNSPEC_PMULL2))]
6978   "TARGET_SIMD && TARGET_AES"
6979   "pmull2\\t%0.1q, %1.2d, %2.2d"
6980   [(set_attr "type" "crypto_pmull")]
6981 )