gcc/config/aarch64/aarch64-simd.md

   1 ;; Machine description for AArch64 AdvSIMD architecture.
   2 ;; Copyright (C) 2011-2020 Free Software Foundation, Inc.
   3 ;; Contributed by ARM Ltd.
   4 ;;
   5 ;; This file is part of GCC.
   6 ;;
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published by
   9 ;; the Free Software Foundation; either version 3, or (at your option)
  10 ;; any later version.
  11 ;;
  12 ;; GCC is distributed in the hope that it will be useful, but
  13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 ;; General Public License for more details.
  16 ;;
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 (define_expand "mov<mode>"
  22   [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
  23         (match_operand:VALL_F16 1 "general_operand"))]
  24   "TARGET_SIMD"
  25   "
  26   /* Force the operand into a register if it is not an
  27      immediate whose use can be replaced with xzr.
  28      If the mode is 16 bytes wide, then we will be doing
  29      a stp in DI mode, so we check the validity of that.
  30      If the mode is 8 bytes wide, then we will do doing a
  31      normal str, so the check need not apply.  */
  32   if (GET_CODE (operands[0]) == MEM
  33       && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
  34            && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
  35                 && aarch64_mem_pair_operand (operands[0], DImode))
  36                || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
  37       operands[1] = force_reg (<MODE>mode, operands[1]);
  38   "
  39 )
  40
  41 (define_expand "movmisalign<mode>"
  42   [(set (match_operand:VALL 0 "nonimmediate_operand")
  43         (match_operand:VALL 1 "general_operand"))]
  44   "TARGET_SIMD"
  45 {
  46   /* This pattern is not permitted to fail during expansion: if both arguments
  47      are non-registers (e.g. memory := constant, which can be created by the
  48      auto-vectorizer), force operand 1 into a register.  */
  49   if (!register_operand (operands[0], <MODE>mode)
  50       && !register_operand (operands[1], <MODE>mode))
  51     operands[1] = force_reg (<MODE>mode, operands[1]);
  52 })
  53
  54 (define_insn "aarch64_simd_dup<mode>"
  55   [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
  56         (vec_duplicate:VDQ_I
  57           (match_operand:<VEL> 1 "register_operand" "w,?r")))]
  58   "TARGET_SIMD"
  59   "@
  60    dup\\t%0.<Vtype>, %1.<Vetype>[0]
  61    dup\\t%0.<Vtype>, %<vw>1"
  62   [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
  63 )
  64
  65 (define_insn "aarch64_simd_dup<mode>"
  66   [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
  67         (vec_duplicate:VDQF_F16
  68           (match_operand:<VEL> 1 "register_operand" "w")))]
  69   "TARGET_SIMD"
  70   "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
  71   [(set_attr "type" "neon_dup<q>")]
  72 )
  73
  74 (define_insn "aarch64_dup_lane<mode>"
  75   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
  76         (vec_duplicate:VALL_F16
  77           (vec_select:<VEL>
  78             (match_operand:VALL_F16 1 "register_operand" "w")
  79             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  80           )))]
  81   "TARGET_SIMD"
  82   {
  83     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
  84     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
  85   }
  86   [(set_attr "type" "neon_dup<q>")]
  87 )
  88
  89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
  90   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
  91         (vec_duplicate:VALL_F16_NO_V2Q
  92           (vec_select:<VEL>
  93             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
  94             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  95           )))]
  96   "TARGET_SIMD"
  97   {
  98     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
  99     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
 100   }
 101   [(set_attr "type" "neon_dup<q>")]
 102 )
 103
 104 (define_insn "*aarch64_simd_mov<VD:mode>"
 105   [(set (match_operand:VD 0 "nonimmediate_operand"
 106                 "=w, m,  m,  w, ?r, ?w, ?r, w")
 107         (match_operand:VD 1 "general_operand"
 108                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 109   "TARGET_SIMD
 110    && (register_operand (operands[0], <MODE>mode)
 111        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 112 {
 113    switch (which_alternative)
 114      {
 115      case 0: return "ldr\t%d0, %1";
 116      case 1: return "str\txzr, %0";
 117      case 2: return "str\t%d1, %0";
 118      case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 119      case 4: return "umov\t%0, %1.d[0]";
 120      case 5: return "fmov\t%d0, %1";
 121      case 6: return "mov\t%0, %1";
 122      case 7:
 123         return aarch64_output_simd_mov_immediate (operands[1], 64);
 124      default: gcc_unreachable ();
 125      }
 126 }
 127   [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
 128                      neon_logic<q>, neon_to_gp<q>, f_mcr,\
 129                      mov_reg, neon_move<q>")]
 130 )
 131
 132 (define_insn "*aarch64_simd_mov<VQ:mode>"
 133   [(set (match_operand:VQ 0 "nonimmediate_operand"
 134                 "=w, Umn,  m,  w, ?r, ?w, ?r, w")
 135         (match_operand:VQ 1 "general_operand"
 136                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 137   "TARGET_SIMD
 138    && (register_operand (operands[0], <MODE>mode)
 139        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 140 {
 141   switch (which_alternative)
 142     {
 143     case 0:
 144         return "ldr\t%q0, %1";
 145     case 1:
 146         return "stp\txzr, xzr, %0";
 147     case 2:
 148         return "str\t%q1, %0";
 149     case 3:
 150         return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 151     case 4:
 152     case 5:
 153     case 6:
 154         return "#";
 155     case 7:
 156         return aarch64_output_simd_mov_immediate (operands[1], 128);
 157     default:
 158         gcc_unreachable ();
 159     }
 160 }
 161   [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
 162                      neon_logic<q>, multiple, multiple,\
 163                      multiple, neon_move<q>")
 164    (set_attr "length" "4,4,4,4,8,8,8,4")]
 165 )
 166
 167 ;; When storing lane zero we can use the normal STR and its more permissive
 168 ;; addressing modes.
 169
 170 (define_insn "aarch64_store_lane0<mode>"
 171   [(set (match_operand:<VEL> 0 "memory_operand" "=m")
 172         (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
 173                         (parallel [(match_operand 2 "const_int_operand" "n")])))]
 174   "TARGET_SIMD
 175    && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
 176   "str\\t%<Vetype>1, %0"
 177   [(set_attr "type" "neon_store1_1reg<q>")]
 178 )
 179
 180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
 181   [(set (match_operand:DREG 0 "register_operand" "=w")
 182         (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
 183    (set (match_operand:DREG2 2 "register_operand" "=w")
 184         (match_operand:DREG2 3 "memory_operand" "m"))]
 185   "TARGET_SIMD
 186    && rtx_equal_p (XEXP (operands[3], 0),
 187                    plus_constant (Pmode,
 188                                   XEXP (operands[1], 0),
 189                                   GET_MODE_SIZE (<DREG:MODE>mode)))"
 190   "ldp\\t%d0, %d2, %1"
 191   [(set_attr "type" "neon_ldp")]
 192 )
 193
 194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
 195   [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
 196         (match_operand:DREG 1 "register_operand" "w"))
 197    (set (match_operand:DREG2 2 "memory_operand" "=m")
 198         (match_operand:DREG2 3 "register_operand" "w"))]
 199   "TARGET_SIMD
 200    && rtx_equal_p (XEXP (operands[2], 0),
 201                    plus_constant (Pmode,
 202                                   XEXP (operands[0], 0),
 203                                   GET_MODE_SIZE (<DREG:MODE>mode)))"
 204   "stp\\t%d1, %d3, %0"
 205   [(set_attr "type" "neon_stp")]
 206 )
 207
 208 (define_insn "load_pair<VQ:mode><VQ2:mode>"
 209   [(set (match_operand:VQ 0 "register_operand" "=w")
 210         (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
 211    (set (match_operand:VQ2 2 "register_operand" "=w")
 212         (match_operand:VQ2 3 "memory_operand" "m"))]
 213   "TARGET_SIMD
 214     && rtx_equal_p (XEXP (operands[3], 0),
 215                     plus_constant (Pmode,
 216                                XEXP (operands[1], 0),
 217                                GET_MODE_SIZE (<VQ:MODE>mode)))"
 218   "ldp\\t%q0, %q2, %1"
 219   [(set_attr "type" "neon_ldp_q")]
 220 )
 221
 222 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
 223   [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
 224         (match_operand:VQ 1 "register_operand" "w"))
 225    (set (match_operand:VQ2 2 "memory_operand" "=m")
 226         (match_operand:VQ2 3 "register_operand" "w"))]
 227   "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
 228                 plus_constant (Pmode,
 229                                XEXP (operands[0], 0),
 230                                GET_MODE_SIZE (<VQ:MODE>mode)))"
 231   "stp\\t%q1, %q3, %0"
 232   [(set_attr "type" "neon_stp_q")]
 233 )
 234
 235
 236 (define_split
 237   [(set (match_operand:VQ 0 "register_operand" "")
 238       (match_operand:VQ 1 "register_operand" ""))]
 239   "TARGET_SIMD && reload_completed
 240    && GP_REGNUM_P (REGNO (operands[0]))
 241    && GP_REGNUM_P (REGNO (operands[1]))"
 242   [(const_int 0)]
 243 {
 244   aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
 245   DONE;
 246 })
 247
 248 (define_split
 249   [(set (match_operand:VQ 0 "register_operand" "")
 250         (match_operand:VQ 1 "register_operand" ""))]
 251   "TARGET_SIMD && reload_completed
 252    && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
 253        || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
 254   [(const_int 0)]
 255 {
 256   aarch64_split_simd_move (operands[0], operands[1]);
 257   DONE;
 258 })
 259
 260 (define_expand "@aarch64_split_simd_mov<mode>"
 261   [(set (match_operand:VQ 0)
 262         (match_operand:VQ 1))]
 263   "TARGET_SIMD"
 264   {
 265     rtx dst = operands[0];
 266     rtx src = operands[1];
 267
 268     if (GP_REGNUM_P (REGNO (src)))
 269       {
 270         rtx src_low_part = gen_lowpart (<VHALF>mode, src);
 271         rtx src_high_part = gen_highpart (<VHALF>mode, src);
 272
 273         emit_insn
 274           (gen_move_lo_quad_<mode> (dst, src_low_part));
 275         emit_insn
 276           (gen_move_hi_quad_<mode> (dst, src_high_part));
 277       }
 278
 279     else
 280       {
 281         rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
 282         rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
 283         rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
 284         rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
 285
 286         emit_insn
 287           (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
 288         emit_insn
 289           (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
 290       }
 291     DONE;
 292   }
 293 )
 294
 295 (define_insn "aarch64_simd_mov_from_<mode>low"
 296   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
 297         (vec_select:<VHALF>
 298           (match_operand:VQ 1 "register_operand" "w")
 299           (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
 300   "TARGET_SIMD && reload_completed"
 301   "umov\t%0, %1.d[0]"
 302   [(set_attr "type" "neon_to_gp<q>")
 303    (set_attr "length" "4")
 304   ])
 305
 306 (define_insn "aarch64_simd_mov_from_<mode>high"
 307   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
 308         (vec_select:<VHALF>
 309           (match_operand:VQ 1 "register_operand" "w")
 310           (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
 311   "TARGET_SIMD && reload_completed"
 312   "umov\t%0, %1.d[1]"
 313   [(set_attr "type" "neon_to_gp<q>")
 314    (set_attr "length" "4")
 315   ])
 316
 317 (define_insn "orn<mode>3"
 318  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 319        (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 320                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 321  "TARGET_SIMD"
 322  "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 323   [(set_attr "type" "neon_logic<q>")]
 324 )
 325
 326 (define_insn "bic<mode>3"
 327  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 328        (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 329                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 330  "TARGET_SIMD"
 331  "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 332   [(set_attr "type" "neon_logic<q>")]
 333 )
 334
 335 (define_insn "add<mode>3"
 336   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 337         (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 338                   (match_operand:VDQ_I 2 "register_operand" "w")))]
 339   "TARGET_SIMD"
 340   "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 341   [(set_attr "type" "neon_add<q>")]
 342 )
 343
 344 (define_insn "sub<mode>3"
 345   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 346         (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 347                    (match_operand:VDQ_I 2 "register_operand" "w")))]
 348   "TARGET_SIMD"
 349   "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 350   [(set_attr "type" "neon_sub<q>")]
 351 )
 352
 353 (define_insn "mul<mode>3"
 354   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 355         (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
 356                    (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
 357   "TARGET_SIMD"
 358   "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 359   [(set_attr "type" "neon_mul_<Vetype><q>")]
 360 )
 361
 362 (define_insn "bswap<mode>2"
 363   [(set (match_operand:VDQHSD 0 "register_operand" "=w")
 364         (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
 365   "TARGET_SIMD"
 366   "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
 367   [(set_attr "type" "neon_rev<q>")]
 368 )
 369
 370 (define_insn "aarch64_rbit<mode>"
 371   [(set (match_operand:VB 0 "register_operand" "=w")
 372         (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
 373                    UNSPEC_RBIT))]
 374   "TARGET_SIMD"
 375   "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
 376   [(set_attr "type" "neon_rbit")]
 377 )
 378
 379 (define_expand "ctz<mode>2"
 380   [(set (match_operand:VS 0 "register_operand")
 381         (ctz:VS (match_operand:VS 1 "register_operand")))]
 382   "TARGET_SIMD"
 383   {
 384      emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
 385      rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
 386                                              <MODE>mode, 0);
 387      emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
 388      emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
 389      DONE;
 390   }
 391 )
 392
 393 (define_expand "xorsign<mode>3"
 394   [(match_operand:VHSDF 0 "register_operand")
 395    (match_operand:VHSDF 1 "register_operand")
 396    (match_operand:VHSDF 2 "register_operand")]
 397   "TARGET_SIMD"
 398 {
 399
 400   machine_mode imode = <V_INT_EQUIV>mode;
 401   rtx v_bitmask = gen_reg_rtx (imode);
 402   rtx op1x = gen_reg_rtx (imode);
 403   rtx op2x = gen_reg_rtx (imode);
 404
 405   rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
 406   rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
 407
 408   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 409
 410   emit_move_insn (v_bitmask,
 411                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 412                                                      HOST_WIDE_INT_M1U << bits));
 413
 414   emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
 415   emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
 416   emit_move_insn (operands[0],
 417                   lowpart_subreg (<MODE>mode, op1x, imode));
 418   DONE;
 419 }
 420 )
 421
 422 ;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
 423 ;; fact that their usage need to guarantee that the source vectors are
 424 ;; contiguous.  It would be wrong to describe the operation without being able
 425 ;; to describe the permute that is also required, but even if that is done
 426 ;; the permute would have been created as a LOAD_LANES which means the values
 427 ;; in the registers are in the wrong order.
 428 (define_insn "aarch64_fcadd<rot><mode>"
 429   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 430         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
 431                        (match_operand:VHSDF 2 "register_operand" "w")]
 432                        FCADD))]
 433   "TARGET_COMPLEX"
 434   "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
 435   [(set_attr "type" "neon_fcadd")]
 436 )
 437
 438 (define_insn "aarch64_fcmla<rot><mode>"
 439   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 440         (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
 441                     (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
 442                                    (match_operand:VHSDF 3 "register_operand" "w")]
 443                                    FCMLA)))]
 444   "TARGET_COMPLEX"
 445   "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
 446   [(set_attr "type" "neon_fcmla")]
 447 )
 448
 449
 450 (define_insn "aarch64_fcmla_lane<rot><mode>"
 451   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 452         (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
 453                     (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
 454                                    (match_operand:VHSDF 3 "register_operand" "w")
 455                                    (match_operand:SI 4 "const_int_operand" "n")]
 456                                    FCMLA)))]
 457   "TARGET_COMPLEX"
 458 {
 459   operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
 460   return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
 461 }
 462   [(set_attr "type" "neon_fcmla")]
 463 )
 464
 465 (define_insn "aarch64_fcmla_laneq<rot>v4hf"
 466   [(set (match_operand:V4HF 0 "register_operand" "=w")
 467         (plus:V4HF (match_operand:V4HF 1 "register_operand" "0")
 468                    (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
 469                                  (match_operand:V8HF 3 "register_operand" "w")
 470                                  (match_operand:SI 4 "const_int_operand" "n")]
 471                                  FCMLA)))]
 472   "TARGET_COMPLEX"
 473 {
 474   operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
 475   return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
 476 }
 477   [(set_attr "type" "neon_fcmla")]
 478 )
 479
 480 (define_insn "aarch64_fcmlaq_lane<rot><mode>"
 481   [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
 482         (plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0")
 483                      (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
 484                                      (match_operand:<VHALF> 3 "register_operand" "w")
 485                                      (match_operand:SI 4 "const_int_operand" "n")]
 486                                      FCMLA)))]
 487   "TARGET_COMPLEX"
 488 {
 489   int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
 490   operands[4]
 491     = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
 492   return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
 493 }
 494   [(set_attr "type" "neon_fcmla")]
 495 )
 496
 497 ;; These instructions map to the __builtins for the Dot Product operations.
 498 (define_insn "aarch64_<sur>dot<vsi2qi>"
 499   [(set (match_operand:VS 0 "register_operand" "=w")
 500         (plus:VS (match_operand:VS 1 "register_operand" "0")
 501                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 502                             (match_operand:<VSI2QI> 3 "register_operand" "w")]
 503                 DOTPROD)))]
 504   "TARGET_DOTPROD"
 505   "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
 506   [(set_attr "type" "neon_dot<q>")]
 507 )
 508
 509 ;; These expands map to the Dot Product optab the vectorizer checks for.
 510 ;; The auto-vectorizer expects a dot product builtin that also does an
 511 ;; accumulation into the provided register.
 512 ;; Given the following pattern
 513 ;;
 514 ;; for (i=0; i<len; i++) {
 515 ;;     c = a[i] * b[i];
 516 ;;     r += c;
 517 ;; }
 518 ;; return result;
 519 ;;
 520 ;; This can be auto-vectorized to
 521 ;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
 522 ;;
 523 ;; given enough iterations.  However the vectorizer can keep unrolling the loop
 524 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
 525 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
 526 ;; ...
 527 ;;
 528 ;; and so the vectorizer provides r, in which the result has to be accumulated.
 529 (define_expand "<sur>dot_prod<vsi2qi>"
 530   [(set (match_operand:VS 0 "register_operand")
 531         (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
 532                             (match_operand:<VSI2QI> 2 "register_operand")]
 533                  DOTPROD)
 534                 (match_operand:VS 3 "register_operand")))]
 535   "TARGET_DOTPROD"
 536 {
 537   emit_insn (
 538     gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
 539                                     operands[2]));
 540   emit_insn (gen_rtx_SET (operands[0], operands[3]));
 541   DONE;
 542 })
 543
 544 ;; These instructions map to the __builtins for the Dot Product
 545 ;; indexed operations.
 546 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
 547   [(set (match_operand:VS 0 "register_operand" "=w")
 548         (plus:VS (match_operand:VS 1 "register_operand" "0")
 549                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 550                             (match_operand:V8QI 3 "register_operand" "<h_con>")
 551                             (match_operand:SI 4 "immediate_operand" "i")]
 552                 DOTPROD)))]
 553   "TARGET_DOTPROD"
 554   {
 555     operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
 556     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 557   }
 558   [(set_attr "type" "neon_dot<q>")]
 559 )
 560
 561 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
 562   [(set (match_operand:VS 0 "register_operand" "=w")
 563         (plus:VS (match_operand:VS 1 "register_operand" "0")
 564                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 565                             (match_operand:V16QI 3 "register_operand" "<h_con>")
 566                             (match_operand:SI 4 "immediate_operand" "i")]
 567                 DOTPROD)))]
 568   "TARGET_DOTPROD"
 569   {
 570     operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
 571     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 572   }
 573   [(set_attr "type" "neon_dot<q>")]
 574 )
 575
 576 (define_expand "copysign<mode>3"
 577   [(match_operand:VHSDF 0 "register_operand")
 578    (match_operand:VHSDF 1 "register_operand")
 579    (match_operand:VHSDF 2 "register_operand")]
 580   "TARGET_FLOAT && TARGET_SIMD"
 581 {
 582   rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
 583   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 584
 585   emit_move_insn (v_bitmask,
 586                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 587                                                      HOST_WIDE_INT_M1U << bits));
 588   emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
 589                                          operands[2], operands[1]));
 590   DONE;
 591 }
 592 )
 593
 594 (define_insn "*aarch64_mul3_elt<mode>"
 595  [(set (match_operand:VMUL 0 "register_operand" "=w")
 596     (mult:VMUL
 597       (vec_duplicate:VMUL
 598           (vec_select:<VEL>
 599             (match_operand:VMUL 1 "register_operand" "<h_con>")
 600             (parallel [(match_operand:SI 2 "immediate_operand")])))
 601       (match_operand:VMUL 3 "register_operand" "w")))]
 602   "TARGET_SIMD"
 603   {
 604     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
 605     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 606   }
 607   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 608 )
 609
 610 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
 611   [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
 612      (mult:VMUL_CHANGE_NLANES
 613        (vec_duplicate:VMUL_CHANGE_NLANES
 614           (vec_select:<VEL>
 615             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
 616             (parallel [(match_operand:SI 2 "immediate_operand")])))
 617       (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
 618   "TARGET_SIMD"
 619   {
 620     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
 621     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 622   }
 623   [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
 624 )
 625
 626 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
 627  [(set (match_operand:VMUL 0 "register_operand" "=w")
 628     (mult:VMUL
 629       (vec_duplicate:VMUL
 630             (match_operand:<VEL> 1 "register_operand" "<h_con>"))
 631       (match_operand:VMUL 2 "register_operand" "w")))]
 632   "TARGET_SIMD"
 633   "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
 634   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 635 )
 636
 637 (define_insn "@aarch64_rsqrte<mode>"
 638   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 639         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
 640                      UNSPEC_RSQRTE))]
 641   "TARGET_SIMD"
 642   "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 643   [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
 644
 645 (define_insn "@aarch64_rsqrts<mode>"
 646   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 647         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
 648                             (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
 649          UNSPEC_RSQRTS))]
 650   "TARGET_SIMD"
 651   "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 652   [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
 653
 654 (define_expand "rsqrt<mode>2"
 655   [(set (match_operand:VALLF 0 "register_operand")
 656         (unspec:VALLF [(match_operand:VALLF 1 "register_operand")]
 657                      UNSPEC_RSQRT))]
 658   "TARGET_SIMD"
 659 {
 660   aarch64_emit_approx_sqrt (operands[0], operands[1], true);
 661   DONE;
 662 })
 663
 664 (define_insn "*aarch64_mul3_elt_to_64v2df"
 665   [(set (match_operand:DF 0 "register_operand" "=w")
 666      (mult:DF
 667        (vec_select:DF
 668          (match_operand:V2DF 1 "register_operand" "w")
 669          (parallel [(match_operand:SI 2 "immediate_operand")]))
 670        (match_operand:DF 3 "register_operand" "w")))]
 671   "TARGET_SIMD"
 672   {
 673     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
 674     return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
 675   }
 676   [(set_attr "type" "neon_fp_mul_d_scalar_q")]
 677 )
 678
 679 (define_insn "neg<mode>2"
 680   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 681         (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 682   "TARGET_SIMD"
 683   "neg\t%0.<Vtype>, %1.<Vtype>"
 684   [(set_attr "type" "neon_neg<q>")]
 685 )
 686
 687 (define_insn "abs<mode>2"
 688   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 689         (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 690   "TARGET_SIMD"
 691   "abs\t%0.<Vtype>, %1.<Vtype>"
 692   [(set_attr "type" "neon_abs<q>")]
 693 )
 694
 695 ;; The intrinsic version of integer ABS must not be allowed to
 696 ;; combine with any operation with an integerated ABS step, such
 697 ;; as SABD.
 698 (define_insn "aarch64_abs<mode>"
 699   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
 700           (unspec:VSDQ_I_DI
 701             [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
 702            UNSPEC_ABS))]
 703   "TARGET_SIMD"
 704   "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 705   [(set_attr "type" "neon_abs<q>")]
 706 )
 707
 708 ;; It's tempting to represent SABD as ABS (MINUS op1 op2).
 709 ;; This isn't accurate as ABS treats always its input as a signed value.
 710 ;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
 711 ;; Whereas SABD would return 192 (-64 signed) on the above example.
 712 ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
 713 (define_insn "aarch64_<su>abd<mode>_3"
 714   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 715         (minus:VDQ_BHSI
 716           (USMAX:VDQ_BHSI
 717             (match_operand:VDQ_BHSI 1 "register_operand" "w")
 718             (match_operand:VDQ_BHSI 2 "register_operand" "w"))
 719           (<max_opp>:VDQ_BHSI
 720             (match_dup 1)
 721             (match_dup 2))))]
 722   "TARGET_SIMD"
 723   "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 724   [(set_attr "type" "neon_abd<q>")]
 725 )
 726
 727 (define_insn "aarch64_<sur>abdl2<mode>_3"
 728   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 729         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 730                           (match_operand:VDQV_S 2 "register_operand" "w")]
 731         ABDL2))]
 732   "TARGET_SIMD"
 733   "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
 734   [(set_attr "type" "neon_abd<q>")]
 735 )
 736
 737 (define_insn "aarch64_<sur>abal<mode>_4"
 738   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 739         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 740                           (match_operand:VDQV_S 2 "register_operand" "w")
 741                          (match_operand:<VDBLW> 3 "register_operand" "0")]
 742         ABAL))]
 743   "TARGET_SIMD"
 744   "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
 745   [(set_attr "type" "neon_arith_acc<q>")]
 746 )
 747
 748 (define_insn "aarch64_<sur>adalp<mode>_3"
 749   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 750         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 751                           (match_operand:<VDBLW> 2 "register_operand" "0")]
 752         ADALP))]
 753   "TARGET_SIMD"
 754   "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
 755   [(set_attr "type" "neon_reduc_add<q>")]
 756 )
 757
 758 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
 759 ;; inputs in operands 1 and 2.  The sequence also has to perform a widening
 760 ;; reduction of the difference into a V4SI vector and accumulate that into
 761 ;; operand 3 before copying that into the result operand 0.
 762 ;; Perform that with a sequence of:
 763 ;; UABDL2       tmp.8h, op1.16b, op2.16b
 764 ;; UABAL        tmp.8h, op1.16b, op2.16b
 765 ;; UADALP       op3.4s, tmp.8h
 766 ;; MOV          op0, op3 // should be eliminated in later passes.
 767 ;;
 768 ;; For TARGET_DOTPROD we do:
 769 ;; MOV  tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
 770 ;; UABD tmp2.16b, op1.16b, op2.16b
 771 ;; UDOT op3.4s, tmp2.16b, tmp1.16b
 772 ;; MOV  op0, op3 // RA will tie the operands of UDOT appropriately.
 773 ;;
 774 ;; The signed version just uses the signed variants of the above instructions
 775 ;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
 776 ;; unsigned.
 777
 778 (define_expand "<sur>sadv16qi"
 779   [(use (match_operand:V4SI 0 "register_operand"))
 780    (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
 781                   (use (match_operand:V16QI 2 "register_operand"))] ABAL)
 782    (use (match_operand:V4SI 3 "register_operand"))]
 783   "TARGET_SIMD"
 784   {
 785     if (TARGET_DOTPROD)
 786       {
 787         rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
 788         rtx abd = gen_reg_rtx (V16QImode);
 789         emit_insn (gen_aarch64_<sur>abdv16qi_3 (abd, operands[1], operands[2]));
 790         emit_insn (gen_aarch64_udotv16qi (operands[0], operands[3],
 791                                           abd, ones));
 792         DONE;
 793       }
 794     rtx reduc = gen_reg_rtx (V8HImode);
 795     emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
 796                                                operands[2]));
 797     emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
 798                                               operands[2], reduc));
 799     emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
 800                                               operands[3]));
 801     emit_move_insn (operands[0], operands[3]);
 802     DONE;
 803   }
 804 )
 805
 806 (define_insn "aba<mode>_3"
 807   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 808         (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
 809                          (match_operand:VDQ_BHSI 1 "register_operand" "w")
 810                          (match_operand:VDQ_BHSI 2 "register_operand" "w")))
 811                        (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
 812   "TARGET_SIMD"
 813   "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 814   [(set_attr "type" "neon_arith_acc<q>")]
 815 )
 816
 817 (define_insn "fabd<mode>3"
 818   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 819         (abs:VHSDF_HSDF
 820           (minus:VHSDF_HSDF
 821             (match_operand:VHSDF_HSDF 1 "register_operand" "w")
 822             (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
 823   "TARGET_SIMD"
 824   "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 825   [(set_attr "type" "neon_fp_abd_<stype><q>")]
 826 )
 827
 828 ;; For AND (vector, register) and BIC (vector, immediate)
 829 (define_insn "and<mode>3"
 830   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
 831         (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
 832                    (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
 833   "TARGET_SIMD"
 834   {
 835     switch (which_alternative)
 836       {
 837       case 0:
 838         return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
 839       case 1:
 840         return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
 841                                                   AARCH64_CHECK_BIC);
 842       default:
 843         gcc_unreachable ();
 844       }
 845   }
 846   [(set_attr "type" "neon_logic<q>")]
 847 )
 848
 849 ;; For ORR (vector, register) and ORR (vector, immediate)
 850 (define_insn "ior<mode>3"
 851   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
 852         (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
 853                    (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
 854   "TARGET_SIMD"
 855   {
 856     switch (which_alternative)
 857       {
 858       case 0:
 859         return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
 860       case 1:
 861         return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
 862                                                   AARCH64_CHECK_ORR);
 863       default:
 864         gcc_unreachable ();
 865       }
 866   }
 867   [(set_attr "type" "neon_logic<q>")]
 868 )
 869
 870 (define_insn "xor<mode>3"
 871   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 872         (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 873                  (match_operand:VDQ_I 2 "register_operand" "w")))]
 874   "TARGET_SIMD"
 875   "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
 876   [(set_attr "type" "neon_logic<q>")]
 877 )
 878
 879 (define_insn "one_cmpl<mode>2"
 880   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 881         (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 882   "TARGET_SIMD"
 883   "not\t%0.<Vbtype>, %1.<Vbtype>"
 884   [(set_attr "type" "neon_logic<q>")]
 885 )
 886
 887 (define_insn "aarch64_simd_vec_set<mode>"
 888   [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
 889         (vec_merge:VALL_F16
 890             (vec_duplicate:VALL_F16
 891                 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
 892             (match_operand:VALL_F16 3 "register_operand" "0,0,0")
 893             (match_operand:SI 2 "immediate_operand" "i,i,i")))]
 894   "TARGET_SIMD"
 895   {
 896    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 897    operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
 898    switch (which_alternative)
 899      {
 900      case 0:
 901         return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
 902      case 1:
 903         return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
 904      case 2:
 905         return "ld1\\t{%0.<Vetype>}[%p2], %1";
 906      default:
 907         gcc_unreachable ();
 908      }
 909   }
 910   [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
 911 )
 912
 913 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
 914   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
 915         (vec_merge:VALL_F16
 916             (vec_duplicate:VALL_F16
 917               (vec_select:<VEL>
 918                 (match_operand:VALL_F16 3 "register_operand" "w")
 919                 (parallel
 920                   [(match_operand:SI 4 "immediate_operand" "i")])))
 921             (match_operand:VALL_F16 1 "register_operand" "0")
 922             (match_operand:SI 2 "immediate_operand" "i")))]
 923   "TARGET_SIMD"
 924   {
 925     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 926     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
 927     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
 928
 929     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
 930   }
 931   [(set_attr "type" "neon_ins<q>")]
 932 )
 933
 934 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
 935   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
 936         (vec_merge:VALL_F16_NO_V2Q
 937             (vec_duplicate:VALL_F16_NO_V2Q
 938               (vec_select:<VEL>
 939                 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
 940                 (parallel
 941                   [(match_operand:SI 4 "immediate_operand" "i")])))
 942             (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
 943             (match_operand:SI 2 "immediate_operand" "i")))]
 944   "TARGET_SIMD"
 945   {
 946     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 947     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
 948     operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
 949                                            INTVAL (operands[4]));
 950
 951     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
 952   }
 953   [(set_attr "type" "neon_ins<q>")]
 954 )
 955
 956 (define_expand "signbit<mode>2"
 957   [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
 958    (use (match_operand:VDQSF 1 "register_operand"))]
 959   "TARGET_SIMD"
 960 {
 961   int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
 962   rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 963                                                         shift_amount);
 964   operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
 965
 966   emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
 967                                                  shift_vector));
 968   DONE;
 969 })
 970
 971 (define_insn "aarch64_simd_lshr<mode>"
 972  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 973        (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 974                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
 975  "TARGET_SIMD"
 976  "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
 977   [(set_attr "type" "neon_shift_imm<q>")]
 978 )
 979
 980 (define_insn "aarch64_simd_ashr<mode>"
 981  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 982        (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 983                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
 984  "TARGET_SIMD"
 985  "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
 986   [(set_attr "type" "neon_shift_imm<q>")]
 987 )
 988
 989 (define_insn "*aarch64_simd_sra<mode>"
 990  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 991         (plus:VDQ_I
 992            (SHIFTRT:VDQ_I
 993                 (match_operand:VDQ_I 1 "register_operand" "w")
 994                 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr"))
 995            (match_operand:VDQ_I 3 "register_operand" "0")))]
 996   "TARGET_SIMD"
 997   "<sra_op>sra\t%0.<Vtype>, %1.<Vtype>, %2"
 998   [(set_attr "type" "neon_shift_acc<q>")]
 999 )
1000
1001 (define_insn "aarch64_simd_imm_shl<mode>"
1002  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1003        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1004                    (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm" "Dl")))]
1005  "TARGET_SIMD"
1006   "shl\t%0.<Vtype>, %1.<Vtype>, %2"
1007   [(set_attr "type" "neon_shift_imm<q>")]
1008 )
1009
1010 (define_insn "aarch64_simd_reg_sshl<mode>"
1011  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1012        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1013                    (match_operand:VDQ_I 2 "register_operand" "w")))]
1014  "TARGET_SIMD"
1015  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1016   [(set_attr "type" "neon_shift_reg<q>")]
1017 )
1018
1019 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
1020  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1021        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1022                     (match_operand:VDQ_I 2 "register_operand" "w")]
1023                    UNSPEC_ASHIFT_UNSIGNED))]
1024  "TARGET_SIMD"
1025  "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1026   [(set_attr "type" "neon_shift_reg<q>")]
1027 )
1028
1029 (define_insn "aarch64_simd_reg_shl<mode>_signed"
1030  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1031        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1032                     (match_operand:VDQ_I 2 "register_operand" "w")]
1033                    UNSPEC_ASHIFT_SIGNED))]
1034  "TARGET_SIMD"
1035  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1036   [(set_attr "type" "neon_shift_reg<q>")]
1037 )
1038
1039 (define_expand "ashl<mode>3"
1040   [(match_operand:VDQ_I 0 "register_operand")
1041    (match_operand:VDQ_I 1 "register_operand")
1042    (match_operand:SI  2 "general_operand")]
1043  "TARGET_SIMD"
1044 {
1045   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1046   int shift_amount;
1047
1048   if (CONST_INT_P (operands[2]))
1049     {
1050       shift_amount = INTVAL (operands[2]);
1051       if (shift_amount >= 0 && shift_amount < bit_width)
1052         {
1053           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1054                                                        shift_amount);
1055           emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
1056                                                      operands[1],
1057                                                      tmp));
1058           DONE;
1059         }
1060       else
1061         {
1062           operands[2] = force_reg (SImode, operands[2]);
1063         }
1064     }
1065   else if (MEM_P (operands[2]))
1066     {
1067       operands[2] = force_reg (SImode, operands[2]);
1068     }
1069
1070   if (REG_P (operands[2]))
1071     {
1072       rtx tmp = gen_reg_rtx (<MODE>mode);
1073       emit_insn (gen_aarch64_simd_dup<mode> (tmp,
1074                                              convert_to_mode (<VEL>mode,
1075                                                               operands[2],
1076                                                               0)));
1077       emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1078                                                   tmp));
1079       DONE;
1080     }
1081   else
1082     FAIL;
1083 }
1084 )
1085
1086 (define_expand "lshr<mode>3"
1087   [(match_operand:VDQ_I 0 "register_operand")
1088    (match_operand:VDQ_I 1 "register_operand")
1089    (match_operand:SI  2 "general_operand")]
1090  "TARGET_SIMD"
1091 {
1092   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1093   int shift_amount;
1094
1095   if (CONST_INT_P (operands[2]))
1096     {
1097       shift_amount = INTVAL (operands[2]);
1098       if (shift_amount > 0 && shift_amount <= bit_width)
1099         {
1100           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1101                                                        shift_amount);
1102           emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
1103                                                   operands[1],
1104                                                   tmp));
1105           DONE;
1106         }
1107       else
1108         operands[2] = force_reg (SImode, operands[2]);
1109     }
1110   else if (MEM_P (operands[2]))
1111     {
1112       operands[2] = force_reg (SImode, operands[2]);
1113     }
1114
1115   if (REG_P (operands[2]))
1116     {
1117       rtx tmp = gen_reg_rtx (SImode);
1118       rtx tmp1 = gen_reg_rtx (<MODE>mode);
1119       emit_insn (gen_negsi2 (tmp, operands[2]));
1120       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1121                                              convert_to_mode (<VEL>mode,
1122                                                               tmp, 0)));
1123       emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
1124                                                           operands[1],
1125                                                           tmp1));
1126       DONE;
1127     }
1128   else
1129     FAIL;
1130 }
1131 )
1132
1133 (define_expand "ashr<mode>3"
1134   [(match_operand:VDQ_I 0 "register_operand")
1135    (match_operand:VDQ_I 1 "register_operand")
1136    (match_operand:SI  2 "general_operand")]
1137  "TARGET_SIMD"
1138 {
1139   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1140   int shift_amount;
1141
1142   if (CONST_INT_P (operands[2]))
1143     {
1144       shift_amount = INTVAL (operands[2]);
1145       if (shift_amount > 0 && shift_amount <= bit_width)
1146         {
1147           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1148                                                        shift_amount);
1149           emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1150                                                   operands[1],
1151                                                   tmp));
1152           DONE;
1153         }
1154       else
1155         operands[2] = force_reg (SImode, operands[2]);
1156     }
1157   else if (MEM_P (operands[2]))
1158     {
1159       operands[2] = force_reg (SImode, operands[2]);
1160     }
1161
1162   if (REG_P (operands[2]))
1163     {
1164       rtx tmp = gen_reg_rtx (SImode);
1165       rtx tmp1 = gen_reg_rtx (<MODE>mode);
1166       emit_insn (gen_negsi2 (tmp, operands[2]));
1167       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1168                                              convert_to_mode (<VEL>mode,
1169                                                               tmp, 0)));
1170       emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
1171                                                         operands[1],
1172                                                         tmp1));
1173       DONE;
1174     }
1175   else
1176     FAIL;
1177 }
1178 )
1179
1180 (define_expand "vashl<mode>3"
1181  [(match_operand:VDQ_I 0 "register_operand")
1182   (match_operand:VDQ_I 1 "register_operand")
1183   (match_operand:VDQ_I 2 "register_operand")]
1184  "TARGET_SIMD"
1185 {
1186   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1187                                               operands[2]));
1188   DONE;
1189 })
1190
1191 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1192 ;; Negating individual lanes most certainly offsets the
1193 ;; gain from vectorization.
1194 (define_expand "vashr<mode>3"
1195  [(match_operand:VDQ_BHSI 0 "register_operand")
1196   (match_operand:VDQ_BHSI 1 "register_operand")
1197   (match_operand:VDQ_BHSI 2 "register_operand")]
1198  "TARGET_SIMD"
1199 {
1200   rtx neg = gen_reg_rtx (<MODE>mode);
1201   emit (gen_neg<mode>2 (neg, operands[2]));
1202   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1203                                                     neg));
1204   DONE;
1205 })
1206
1207 ;; DI vector shift
1208 (define_expand "aarch64_ashr_simddi"
1209   [(match_operand:DI 0 "register_operand")
1210    (match_operand:DI 1 "register_operand")
1211    (match_operand:SI 2 "aarch64_shift_imm64_di")]
1212   "TARGET_SIMD"
1213   {
1214     /* An arithmetic shift right by 64 fills the result with copies of the sign
1215        bit, just like asr by 63 - however the standard pattern does not handle
1216        a shift by 64.  */
1217     if (INTVAL (operands[2]) == 64)
1218       operands[2] = GEN_INT (63);
1219     emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1220     DONE;
1221   }
1222 )
1223
1224 (define_expand "vlshr<mode>3"
1225  [(match_operand:VDQ_BHSI 0 "register_operand")
1226   (match_operand:VDQ_BHSI 1 "register_operand")
1227   (match_operand:VDQ_BHSI 2 "register_operand")]
1228  "TARGET_SIMD"
1229 {
1230   rtx neg = gen_reg_rtx (<MODE>mode);
1231   emit (gen_neg<mode>2 (neg, operands[2]));
1232   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1233                                                       neg));
1234   DONE;
1235 })
1236
1237 (define_expand "aarch64_lshr_simddi"
1238   [(match_operand:DI 0 "register_operand")
1239    (match_operand:DI 1 "register_operand")
1240    (match_operand:SI 2 "aarch64_shift_imm64_di")]
1241   "TARGET_SIMD"
1242   {
1243     if (INTVAL (operands[2]) == 64)
1244       emit_move_insn (operands[0], const0_rtx);
1245     else
1246       emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1247     DONE;
1248   }
1249 )
1250
1251 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1252 (define_insn "vec_shr_<mode>"
1253   [(set (match_operand:VD 0 "register_operand" "=w")
1254         (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1255                     (match_operand:SI 2 "immediate_operand" "i")]
1256                    UNSPEC_VEC_SHR))]
1257   "TARGET_SIMD"
1258   {
1259     if (BYTES_BIG_ENDIAN)
1260       return "shl %d0, %d1, %2";
1261     else
1262       return "ushr %d0, %d1, %2";
1263   }
1264   [(set_attr "type" "neon_shift_imm")]
1265 )
1266
1267 (define_expand "vec_set<mode>"
1268   [(match_operand:VALL_F16 0 "register_operand")
1269    (match_operand:<VEL> 1 "register_operand")
1270    (match_operand:SI 2 "immediate_operand")]
1271   "TARGET_SIMD"
1272   {
1273     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1274     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1275                                           GEN_INT (elem), operands[0]));
1276     DONE;
1277   }
1278 )
1279
1280
1281 (define_insn "aarch64_mla<mode>"
1282  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1283        (plus:VDQ_BHSI (mult:VDQ_BHSI
1284                         (match_operand:VDQ_BHSI 2 "register_operand" "w")
1285                         (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1286                       (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1287  "TARGET_SIMD"
1288  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1289   [(set_attr "type" "neon_mla_<Vetype><q>")]
1290 )
1291
1292 (define_insn "*aarch64_mla_elt<mode>"
1293  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1294        (plus:VDQHS
1295          (mult:VDQHS
1296            (vec_duplicate:VDQHS
1297               (vec_select:<VEL>
1298                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1299                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1300            (match_operand:VDQHS 3 "register_operand" "w"))
1301          (match_operand:VDQHS 4 "register_operand" "0")))]
1302  "TARGET_SIMD"
1303   {
1304     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1305     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1306   }
1307   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1308 )
1309
1310 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1311  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1312        (plus:VDQHS
1313          (mult:VDQHS
1314            (vec_duplicate:VDQHS
1315               (vec_select:<VEL>
1316                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1317                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1318            (match_operand:VDQHS 3 "register_operand" "w"))
1319          (match_operand:VDQHS 4 "register_operand" "0")))]
1320  "TARGET_SIMD"
1321   {
1322     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1323     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1324   }
1325   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1326 )
1327
1328 (define_insn "*aarch64_mla_elt_merge<mode>"
1329   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1330         (plus:VDQHS
1331           (mult:VDQHS (vec_duplicate:VDQHS
1332                   (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1333                 (match_operand:VDQHS 2 "register_operand" "w"))
1334           (match_operand:VDQHS 3 "register_operand" "0")))]
1335  "TARGET_SIMD"
1336  "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1337   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1338 )
1339
1340 (define_insn "aarch64_mls<mode>"
1341  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1342        (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1343                    (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1344                               (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1345  "TARGET_SIMD"
1346  "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1347   [(set_attr "type" "neon_mla_<Vetype><q>")]
1348 )
1349
1350 (define_insn "*aarch64_mls_elt<mode>"
1351  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1352        (minus:VDQHS
1353          (match_operand:VDQHS 4 "register_operand" "0")
1354          (mult:VDQHS
1355            (vec_duplicate:VDQHS
1356               (vec_select:<VEL>
1357                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1358                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1359            (match_operand:VDQHS 3 "register_operand" "w"))))]
1360  "TARGET_SIMD"
1361   {
1362     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1363     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1364   }
1365   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1366 )
1367
1368 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1369  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1370        (minus:VDQHS
1371          (match_operand:VDQHS 4 "register_operand" "0")
1372          (mult:VDQHS
1373            (vec_duplicate:VDQHS
1374               (vec_select:<VEL>
1375                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1376                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1377            (match_operand:VDQHS 3 "register_operand" "w"))))]
1378  "TARGET_SIMD"
1379   {
1380     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1381     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1382   }
1383   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1384 )
1385
1386 (define_insn "*aarch64_mls_elt_merge<mode>"
1387   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1388         (minus:VDQHS
1389           (match_operand:VDQHS 1 "register_operand" "0")
1390           (mult:VDQHS (vec_duplicate:VDQHS
1391                   (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1392                 (match_operand:VDQHS 3 "register_operand" "w"))))]
1393   "TARGET_SIMD"
1394   "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1395   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1396 )
1397
1398 ;; Max/Min operations.
1399 (define_insn "<su><maxmin><mode>3"
1400  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1401        (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1402                     (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1403  "TARGET_SIMD"
1404  "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1405   [(set_attr "type" "neon_minmax<q>")]
1406 )
1407
1408 (define_expand "<su><maxmin>v2di3"
1409  [(set (match_operand:V2DI 0 "register_operand")
1410        (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand")
1411                     (match_operand:V2DI 2 "register_operand")))]
1412  "TARGET_SIMD"
1413 {
1414   enum rtx_code cmp_operator;
1415   rtx cmp_fmt;
1416
1417   switch (<CODE>)
1418     {
1419     case UMIN:
1420       cmp_operator = LTU;
1421       break;
1422     case SMIN:
1423       cmp_operator = LT;
1424       break;
1425     case UMAX:
1426       cmp_operator = GTU;
1427       break;
1428     case SMAX:
1429       cmp_operator = GT;
1430       break;
1431     default:
1432       gcc_unreachable ();
1433     }
1434
1435   cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1436   emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1437               operands[2], cmp_fmt, operands[1], operands[2]));
1438   DONE;
1439 })
1440
1441 ;; Pairwise Integer Max/Min operations.
1442 (define_insn "aarch64_<maxmin_uns>p<mode>"
1443  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1444        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1445                          (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1446                         MAXMINV))]
1447  "TARGET_SIMD"
1448  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1449   [(set_attr "type" "neon_minmax<q>")]
1450 )
1451
1452 ;; Pairwise FP Max/Min operations.
1453 (define_insn "aarch64_<maxmin_uns>p<mode>"
1454  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1455        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1456                       (match_operand:VHSDF 2 "register_operand" "w")]
1457                       FMAXMINV))]
1458  "TARGET_SIMD"
1459  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1460   [(set_attr "type" "neon_minmax<q>")]
1461 )
1462
1463 ;; vec_concat gives a new vector with the low elements from operand 1, and
1464 ;; the high elements from operand 2.  That is to say, given op1 = { a, b }
1465 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1466 ;; What that means, is that the RTL descriptions of the below patterns
1467 ;; need to change depending on endianness.
1468
1469 ;; Move to the low architectural bits of the register.
1470 ;; On little-endian this is { operand, zeroes }
1471 ;; On big-endian this is { zeroes, operand }
1472
1473 (define_insn "move_lo_quad_internal_<mode>"
1474   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1475         (vec_concat:VQ_NO2E
1476           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1477           (vec_duplicate:<VHALF> (const_int 0))))]
1478   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1479   "@
1480    dup\\t%d0, %1.d[0]
1481    fmov\\t%d0, %1
1482    dup\\t%d0, %1"
1483   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1484    (set_attr "length" "4")
1485    (set_attr "arch" "simd,fp,simd")]
1486 )
1487
1488 (define_insn "move_lo_quad_internal_<mode>"
1489   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1490         (vec_concat:VQ_2E
1491           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1492           (const_int 0)))]
1493   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1494   "@
1495    dup\\t%d0, %1.d[0]
1496    fmov\\t%d0, %1
1497    dup\\t%d0, %1"
1498   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1499    (set_attr "length" "4")
1500    (set_attr "arch" "simd,fp,simd")]
1501 )
1502
1503 (define_insn "move_lo_quad_internal_be_<mode>"
1504   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1505         (vec_concat:VQ_NO2E
1506           (vec_duplicate:<VHALF> (const_int 0))
1507           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1508   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1509   "@
1510    dup\\t%d0, %1.d[0]
1511    fmov\\t%d0, %1
1512    dup\\t%d0, %1"
1513   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1514    (set_attr "length" "4")
1515    (set_attr "arch" "simd,fp,simd")]
1516 )
1517
1518 (define_insn "move_lo_quad_internal_be_<mode>"
1519   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1520         (vec_concat:VQ_2E
1521           (const_int 0)
1522           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1523   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1524   "@
1525    dup\\t%d0, %1.d[0]
1526    fmov\\t%d0, %1
1527    dup\\t%d0, %1"
1528   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1529    (set_attr "length" "4")
1530    (set_attr "arch" "simd,fp,simd")]
1531 )
1532
1533 (define_expand "move_lo_quad_<mode>"
1534   [(match_operand:VQ 0 "register_operand")
1535    (match_operand:VQ 1 "register_operand")]
1536   "TARGET_SIMD"
1537 {
1538   if (BYTES_BIG_ENDIAN)
1539     emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1540   else
1541     emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1542   DONE;
1543 }
1544 )
1545
1546 ;; Move operand1 to the high architectural bits of the register, keeping
1547 ;; the low architectural bits of operand2.
1548 ;; For little-endian this is { operand2, operand1 }
1549 ;; For big-endian this is { operand1, operand2 }
1550
1551 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1552   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1553         (vec_concat:VQ
1554           (vec_select:<VHALF>
1555                 (match_dup 0)
1556                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1557           (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1558   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1559   "@
1560    ins\\t%0.d[1], %1.d[0]
1561    ins\\t%0.d[1], %1"
1562   [(set_attr "type" "neon_ins")]
1563 )
1564
1565 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1566   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1567         (vec_concat:VQ
1568           (match_operand:<VHALF> 1 "register_operand" "w,r")
1569           (vec_select:<VHALF>
1570                 (match_dup 0)
1571                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1572   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1573   "@
1574    ins\\t%0.d[1], %1.d[0]
1575    ins\\t%0.d[1], %1"
1576   [(set_attr "type" "neon_ins")]
1577 )
1578
1579 (define_expand "move_hi_quad_<mode>"
1580  [(match_operand:VQ 0 "register_operand")
1581   (match_operand:<VHALF> 1 "register_operand")]
1582  "TARGET_SIMD"
1583 {
1584   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1585   if (BYTES_BIG_ENDIAN)
1586     emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1587                     operands[1], p));
1588   else
1589     emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1590                     operands[1], p));
1591   DONE;
1592 })
1593
1594 ;; Narrowing operations.
1595
1596 ;; For doubles.
1597 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1598  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1599        (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1600  "TARGET_SIMD"
1601  "xtn\\t%0.<Vntype>, %1.<Vtype>"
1602   [(set_attr "type" "neon_shift_imm_narrow_q")]
1603 )
1604
1605 (define_expand "vec_pack_trunc_<mode>"
1606  [(match_operand:<VNARROWD> 0 "register_operand")
1607   (match_operand:VDN 1 "register_operand")
1608   (match_operand:VDN 2 "register_operand")]
1609  "TARGET_SIMD"
1610 {
1611   rtx tempreg = gen_reg_rtx (<VDBL>mode);
1612   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1613   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1614
1615   emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1616   emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1617   emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1618   DONE;
1619 })
1620
1621 ;; For quads.
1622
1623 (define_insn "vec_pack_trunc_<mode>"
1624  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1625        (vec_concat:<VNARROWQ2>
1626          (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1627          (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1628  "TARGET_SIMD"
1629  {
1630    if (BYTES_BIG_ENDIAN)
1631      return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1632    else
1633      return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1634  }
1635   [(set_attr "type" "multiple")
1636    (set_attr "length" "8")]
1637 )
1638
1639 ;; Widening operations.
1640
1641 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1642   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1643         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1644                                (match_operand:VQW 1 "register_operand" "w")
1645                                (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1646                             )))]
1647   "TARGET_SIMD"
1648   "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1649   [(set_attr "type" "neon_shift_imm_long")]
1650 )
1651
1652 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1653   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1654         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1655                                (match_operand:VQW 1 "register_operand" "w")
1656                                (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1657                             )))]
1658   "TARGET_SIMD"
1659   "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1660   [(set_attr "type" "neon_shift_imm_long")]
1661 )
1662
1663 (define_expand "vec_unpack<su>_hi_<mode>"
1664   [(match_operand:<VWIDE> 0 "register_operand")
1665    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1666   "TARGET_SIMD"
1667   {
1668     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1669     emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1670                                                           operands[1], p));
1671     DONE;
1672   }
1673 )
1674
1675 (define_expand "vec_unpack<su>_lo_<mode>"
1676   [(match_operand:<VWIDE> 0 "register_operand")
1677    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1678   "TARGET_SIMD"
1679   {
1680     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1681     emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1682                                                           operands[1], p));
1683     DONE;
1684   }
1685 )
1686
1687 ;; Widening arithmetic.
1688
1689 (define_insn "*aarch64_<su>mlal_lo<mode>"
1690   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1691         (plus:<VWIDE>
1692           (mult:<VWIDE>
1693               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1694                  (match_operand:VQW 2 "register_operand" "w")
1695                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1696               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1697                  (match_operand:VQW 4 "register_operand" "w")
1698                  (match_dup 3))))
1699           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1700   "TARGET_SIMD"
1701   "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1702   [(set_attr "type" "neon_mla_<Vetype>_long")]
1703 )
1704
1705 (define_insn "*aarch64_<su>mlal_hi<mode>"
1706   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1707         (plus:<VWIDE>
1708           (mult:<VWIDE>
1709               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1710                  (match_operand:VQW 2 "register_operand" "w")
1711                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1712               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1713                  (match_operand:VQW 4 "register_operand" "w")
1714                  (match_dup 3))))
1715           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1716   "TARGET_SIMD"
1717   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1718   [(set_attr "type" "neon_mla_<Vetype>_long")]
1719 )
1720
1721 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1722   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1723         (minus:<VWIDE>
1724           (match_operand:<VWIDE> 1 "register_operand" "0")
1725           (mult:<VWIDE>
1726               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1727                  (match_operand:VQW 2 "register_operand" "w")
1728                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1729               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1730                  (match_operand:VQW 4 "register_operand" "w")
1731                  (match_dup 3))))))]
1732   "TARGET_SIMD"
1733   "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1734   [(set_attr "type" "neon_mla_<Vetype>_long")]
1735 )
1736
1737 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1738   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1739         (minus:<VWIDE>
1740           (match_operand:<VWIDE> 1 "register_operand" "0")
1741           (mult:<VWIDE>
1742               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1743                  (match_operand:VQW 2 "register_operand" "w")
1744                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1745               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1746                  (match_operand:VQW 4 "register_operand" "w")
1747                  (match_dup 3))))))]
1748   "TARGET_SIMD"
1749   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1750   [(set_attr "type" "neon_mla_<Vetype>_long")]
1751 )
1752
1753 (define_insn "*aarch64_<su>mlal<mode>"
1754   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1755         (plus:<VWIDE>
1756           (mult:<VWIDE>
1757             (ANY_EXTEND:<VWIDE>
1758               (match_operand:VD_BHSI 1 "register_operand" "w"))
1759             (ANY_EXTEND:<VWIDE>
1760               (match_operand:VD_BHSI 2 "register_operand" "w")))
1761           (match_operand:<VWIDE> 3 "register_operand" "0")))]
1762   "TARGET_SIMD"
1763   "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1764   [(set_attr "type" "neon_mla_<Vetype>_long")]
1765 )
1766
1767 (define_insn "*aarch64_<su>mlsl<mode>"
1768   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1769         (minus:<VWIDE>
1770           (match_operand:<VWIDE> 1 "register_operand" "0")
1771           (mult:<VWIDE>
1772             (ANY_EXTEND:<VWIDE>
1773               (match_operand:VD_BHSI 2 "register_operand" "w"))
1774             (ANY_EXTEND:<VWIDE>
1775               (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1776   "TARGET_SIMD"
1777   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1778   [(set_attr "type" "neon_mla_<Vetype>_long")]
1779 )
1780
1781 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1782  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1783        (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1784                            (match_operand:VQW 1 "register_operand" "w")
1785                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1786                      (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1787                            (match_operand:VQW 2 "register_operand" "w")
1788                            (match_dup 3)))))]
1789   "TARGET_SIMD"
1790   "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1791   [(set_attr "type" "neon_mul_<Vetype>_long")]
1792 )
1793
1794 (define_expand "vec_widen_<su>mult_lo_<mode>"
1795   [(match_operand:<VWIDE> 0 "register_operand")
1796    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
1797    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
1798  "TARGET_SIMD"
1799  {
1800    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1801    emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1802                                                        operands[1],
1803                                                        operands[2], p));
1804    DONE;
1805  }
1806 )
1807
1808 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1809  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1810       (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1811                             (match_operand:VQW 1 "register_operand" "w")
1812                             (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1813                     (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1814                             (match_operand:VQW 2 "register_operand" "w")
1815                             (match_dup 3)))))]
1816   "TARGET_SIMD"
1817   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1818   [(set_attr "type" "neon_mul_<Vetype>_long")]
1819 )
1820
1821 (define_expand "vec_widen_<su>mult_hi_<mode>"
1822   [(match_operand:<VWIDE> 0 "register_operand")
1823    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
1824    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
1825  "TARGET_SIMD"
1826  {
1827    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1828    emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1829                                                        operands[1],
1830                                                        operands[2], p));
1831    DONE;
1832
1833  }
1834 )
1835
1836 ;; FP vector operations.
1837 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1838 ;; double-precision (64-bit) floating-point data types and arithmetic as
1839 ;; defined by the IEEE 754-2008 standard.  This makes them vectorizable
1840 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1841 ;;
1842 ;; Floating-point operations can raise an exception.  Vectorizing such
1843 ;; operations are safe because of reasons explained below.
1844 ;;
1845 ;; ARMv8 permits an extension to enable trapped floating-point
1846 ;; exception handling, however this is an optional feature.  In the
1847 ;; event of a floating-point exception being raised by vectorised
1848 ;; code then:
1849 ;; 1.  If trapped floating-point exceptions are available, then a trap
1850 ;;     will be taken when any lane raises an enabled exception.  A trap
1851 ;;     handler may determine which lane raised the exception.
1852 ;; 2.  Alternatively a sticky exception flag is set in the
1853 ;;     floating-point status register (FPSR).  Software may explicitly
1854 ;;     test the exception flags, in which case the tests will either
1855 ;;     prevent vectorisation, allowing precise identification of the
1856 ;;     failing operation, or if tested outside of vectorisable regions
1857 ;;     then the specific operation and lane are not of interest.
1858
1859 ;; FP arithmetic operations.
1860
1861 (define_insn "add<mode>3"
1862  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1863        (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1864                    (match_operand:VHSDF 2 "register_operand" "w")))]
1865  "TARGET_SIMD"
1866  "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1867   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1868 )
1869
1870 (define_insn "sub<mode>3"
1871  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1872        (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1873                     (match_operand:VHSDF 2 "register_operand" "w")))]
1874  "TARGET_SIMD"
1875  "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1876   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1877 )
1878
1879 (define_insn "mul<mode>3"
1880  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1881        (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1882                    (match_operand:VHSDF 2 "register_operand" "w")))]
1883  "TARGET_SIMD"
1884  "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1885   [(set_attr "type" "neon_fp_mul_<stype><q>")]
1886 )
1887
1888 (define_expand "div<mode>3"
1889  [(set (match_operand:VHSDF 0 "register_operand")
1890        (div:VHSDF (match_operand:VHSDF 1 "register_operand")
1891                   (match_operand:VHSDF 2 "register_operand")))]
1892  "TARGET_SIMD"
1893 {
1894   if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1895     DONE;
1896
1897   operands[1] = force_reg (<MODE>mode, operands[1]);
1898 })
1899
1900 (define_insn "*div<mode>3"
1901  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1902        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1903                  (match_operand:VHSDF 2 "register_operand" "w")))]
1904  "TARGET_SIMD"
1905  "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1906   [(set_attr "type" "neon_fp_div_<stype><q>")]
1907 )
1908
1909 (define_insn "neg<mode>2"
1910  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1911        (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1912  "TARGET_SIMD"
1913  "fneg\\t%0.<Vtype>, %1.<Vtype>"
1914   [(set_attr "type" "neon_fp_neg_<stype><q>")]
1915 )
1916
1917 (define_insn "abs<mode>2"
1918  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1919        (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1920  "TARGET_SIMD"
1921  "fabs\\t%0.<Vtype>, %1.<Vtype>"
1922   [(set_attr "type" "neon_fp_abs_<stype><q>")]
1923 )
1924
1925 (define_insn "fma<mode>4"
1926   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1927        (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1928                   (match_operand:VHSDF 2 "register_operand" "w")
1929                   (match_operand:VHSDF 3 "register_operand" "0")))]
1930   "TARGET_SIMD"
1931  "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1932   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1933 )
1934
1935 (define_insn "*aarch64_fma4_elt<mode>"
1936   [(set (match_operand:VDQF 0 "register_operand" "=w")
1937     (fma:VDQF
1938       (vec_duplicate:VDQF
1939         (vec_select:<VEL>
1940           (match_operand:VDQF 1 "register_operand" "<h_con>")
1941           (parallel [(match_operand:SI 2 "immediate_operand")])))
1942       (match_operand:VDQF 3 "register_operand" "w")
1943       (match_operand:VDQF 4 "register_operand" "0")))]
1944   "TARGET_SIMD"
1945   {
1946     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1947     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1948   }
1949   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1950 )
1951
1952 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1953   [(set (match_operand:VDQSF 0 "register_operand" "=w")
1954     (fma:VDQSF
1955       (vec_duplicate:VDQSF
1956         (vec_select:<VEL>
1957           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1958           (parallel [(match_operand:SI 2 "immediate_operand")])))
1959       (match_operand:VDQSF 3 "register_operand" "w")
1960       (match_operand:VDQSF 4 "register_operand" "0")))]
1961   "TARGET_SIMD"
1962   {
1963     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1964     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1965   }
1966   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1967 )
1968
1969 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1970   [(set (match_operand:VMUL 0 "register_operand" "=w")
1971     (fma:VMUL
1972       (vec_duplicate:VMUL
1973           (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1974       (match_operand:VMUL 2 "register_operand" "w")
1975       (match_operand:VMUL 3 "register_operand" "0")))]
1976   "TARGET_SIMD"
1977   "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1978   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1979 )
1980
1981 (define_insn "*aarch64_fma4_elt_to_64v2df"
1982   [(set (match_operand:DF 0 "register_operand" "=w")
1983     (fma:DF
1984         (vec_select:DF
1985           (match_operand:V2DF 1 "register_operand" "w")
1986           (parallel [(match_operand:SI 2 "immediate_operand")]))
1987       (match_operand:DF 3 "register_operand" "w")
1988       (match_operand:DF 4 "register_operand" "0")))]
1989   "TARGET_SIMD"
1990   {
1991     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1992     return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1993   }
1994   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1995 )
1996
1997 (define_insn "fnma<mode>4"
1998   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1999         (fma:VHSDF
2000           (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
2001           (match_operand:VHSDF 2 "register_operand" "w")
2002           (match_operand:VHSDF 3 "register_operand" "0")))]
2003   "TARGET_SIMD"
2004   "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2005   [(set_attr "type" "neon_fp_mla_<stype><q>")]
2006 )
2007
2008 (define_insn "*aarch64_fnma4_elt<mode>"
2009   [(set (match_operand:VDQF 0 "register_operand" "=w")
2010     (fma:VDQF
2011       (neg:VDQF
2012         (match_operand:VDQF 3 "register_operand" "w"))
2013       (vec_duplicate:VDQF
2014         (vec_select:<VEL>
2015           (match_operand:VDQF 1 "register_operand" "<h_con>")
2016           (parallel [(match_operand:SI 2 "immediate_operand")])))
2017       (match_operand:VDQF 4 "register_operand" "0")))]
2018   "TARGET_SIMD"
2019   {
2020     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2021     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
2022   }
2023   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2024 )
2025
2026 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
2027   [(set (match_operand:VDQSF 0 "register_operand" "=w")
2028     (fma:VDQSF
2029       (neg:VDQSF
2030         (match_operand:VDQSF 3 "register_operand" "w"))
2031       (vec_duplicate:VDQSF
2032         (vec_select:<VEL>
2033           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2034           (parallel [(match_operand:SI 2 "immediate_operand")])))
2035       (match_operand:VDQSF 4 "register_operand" "0")))]
2036   "TARGET_SIMD"
2037   {
2038     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2039     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
2040   }
2041   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2042 )
2043
2044 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
2045   [(set (match_operand:VMUL 0 "register_operand" "=w")
2046     (fma:VMUL
2047       (neg:VMUL
2048         (match_operand:VMUL 2 "register_operand" "w"))
2049       (vec_duplicate:VMUL
2050         (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2051       (match_operand:VMUL 3 "register_operand" "0")))]
2052   "TARGET_SIMD"
2053   "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2054   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2055 )
2056
2057 (define_insn "*aarch64_fnma4_elt_to_64v2df"
2058   [(set (match_operand:DF 0 "register_operand" "=w")
2059     (fma:DF
2060       (vec_select:DF
2061         (match_operand:V2DF 1 "register_operand" "w")
2062         (parallel [(match_operand:SI 2 "immediate_operand")]))
2063       (neg:DF
2064         (match_operand:DF 3 "register_operand" "w"))
2065       (match_operand:DF 4 "register_operand" "0")))]
2066   "TARGET_SIMD"
2067   {
2068     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2069     return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
2070   }
2071   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2072 )
2073
2074 ;; Vector versions of the floating-point frint patterns.
2075 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
2076 (define_insn "<frint_pattern><mode>2"
2077   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2078         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2079                        FRINT))]
2080   "TARGET_SIMD"
2081   "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
2082   [(set_attr "type" "neon_fp_round_<stype><q>")]
2083 )
2084
2085 ;; Vector versions of the fcvt standard patterns.
2086 ;; Expands to lbtrunc, lround, lceil, lfloor
2087 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
2088   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2089         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2090                                [(match_operand:VHSDF 1 "register_operand" "w")]
2091                                FCVT)))]
2092   "TARGET_SIMD"
2093   "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
2094   [(set_attr "type" "neon_fp_to_int_<stype><q>")]
2095 )
2096
2097 ;; HF Scalar variants of related SIMD instructions.
2098 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
2099   [(set (match_operand:HI 0 "register_operand" "=w")
2100         (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
2101                       FCVT)))]
2102   "TARGET_SIMD_F16INST"
2103   "fcvt<frint_suffix><su>\t%h0, %h1"
2104   [(set_attr "type" "neon_fp_to_int_s")]
2105 )
2106
2107 (define_insn "<optab>_trunchfhi2"
2108   [(set (match_operand:HI 0 "register_operand" "=w")
2109         (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
2110   "TARGET_SIMD_F16INST"
2111   "fcvtz<su>\t%h0, %h1"
2112   [(set_attr "type" "neon_fp_to_int_s")]
2113 )
2114
2115 (define_insn "<optab>hihf2"
2116   [(set (match_operand:HF 0 "register_operand" "=w")
2117         (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
2118   "TARGET_SIMD_F16INST"
2119   "<su_optab>cvtf\t%h0, %h1"
2120   [(set_attr "type" "neon_int_to_fp_s")]
2121 )
2122
2123 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
2124   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2125         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2126                                [(mult:VDQF
2127          (match_operand:VDQF 1 "register_operand" "w")
2128          (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
2129                                UNSPEC_FRINTZ)))]
2130   "TARGET_SIMD
2131    && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
2132                 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
2133   {
2134     int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
2135     char buf[64];
2136     snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
2137     output_asm_insn (buf, operands);
2138     return "";
2139   }
2140   [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
2141 )
2142
2143 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
2144   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2145         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2146                                [(match_operand:VHSDF 1 "register_operand")]
2147                                 UNSPEC_FRINTZ)))]
2148   "TARGET_SIMD"
2149   {})
2150
2151 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2152   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2153         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2154                                [(match_operand:VHSDF 1 "register_operand")]
2155                                 UNSPEC_FRINTZ)))]
2156   "TARGET_SIMD"
2157   {})
2158
2159 (define_expand "ftrunc<VHSDF:mode>2"
2160   [(set (match_operand:VHSDF 0 "register_operand")
2161         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2162                        UNSPEC_FRINTZ))]
2163   "TARGET_SIMD"
2164   {})
2165
2166 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2167   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2168         (FLOATUORS:VHSDF
2169           (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2170   "TARGET_SIMD"
2171   "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2172   [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2173 )
2174
2175 ;; Conversions between vectors of floats and doubles.
2176 ;; Contains a mix of patterns to match standard pattern names
2177 ;; and those for intrinsics.
2178
2179 ;; Float widening operations.
2180
2181 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2182   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2183         (float_extend:<VWIDE> (vec_select:<VHALF>
2184                                (match_operand:VQ_HSF 1 "register_operand" "w")
2185                                (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2186                             )))]
2187   "TARGET_SIMD"
2188   "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2189   [(set_attr "type" "neon_fp_cvt_widen_s")]
2190 )
2191
2192 ;; Convert between fixed-point and floating-point (vector modes)
2193
2194 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2195   [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2196         (unspec:<VHSDF:FCVT_TARGET>
2197           [(match_operand:VHSDF 1 "register_operand" "w")
2198            (match_operand:SI 2 "immediate_operand" "i")]
2199          FCVT_F2FIXED))]
2200   "TARGET_SIMD"
2201   "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2202   [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2203 )
2204
2205 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2206   [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2207         (unspec:<VDQ_HSDI:FCVT_TARGET>
2208           [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2209            (match_operand:SI 2 "immediate_operand" "i")]
2210          FCVT_FIXED2F))]
2211   "TARGET_SIMD"
2212   "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2213   [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2214 )
2215
2216 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2217 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2218 ;; the meaning of HI and LO changes depending on the target endianness.
2219 ;; While elsewhere we map the higher numbered elements of a vector to
2220 ;; the lower architectural lanes of the vector, for these patterns we want
2221 ;; to always treat "hi" as referring to the higher architectural lanes.
2222 ;; Consequently, while the patterns below look inconsistent with our
2223 ;; other big-endian patterns their behavior is as required.
2224
2225 (define_expand "vec_unpacks_lo_<mode>"
2226   [(match_operand:<VWIDE> 0 "register_operand")
2227    (match_operand:VQ_HSF 1 "register_operand")]
2228   "TARGET_SIMD"
2229   {
2230     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2231     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2232                                                        operands[1], p));
2233     DONE;
2234   }
2235 )
2236
2237 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2238   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2239         (float_extend:<VWIDE> (vec_select:<VHALF>
2240                                (match_operand:VQ_HSF 1 "register_operand" "w")
2241                                (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2242                             )))]
2243   "TARGET_SIMD"
2244   "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2245   [(set_attr "type" "neon_fp_cvt_widen_s")]
2246 )
2247
2248 (define_expand "vec_unpacks_hi_<mode>"
2249   [(match_operand:<VWIDE> 0 "register_operand")
2250    (match_operand:VQ_HSF 1 "register_operand")]
2251   "TARGET_SIMD"
2252   {
2253     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2254     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2255                                                        operands[1], p));
2256     DONE;
2257   }
2258 )
2259 (define_insn "aarch64_float_extend_lo_<Vwide>"
2260   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2261         (float_extend:<VWIDE>
2262           (match_operand:VDF 1 "register_operand" "w")))]
2263   "TARGET_SIMD"
2264   "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2265   [(set_attr "type" "neon_fp_cvt_widen_s")]
2266 )
2267
2268 ;; Float narrowing operations.
2269
2270 (define_insn "aarch64_float_truncate_lo_<mode>"
2271   [(set (match_operand:VDF 0 "register_operand" "=w")
2272       (float_truncate:VDF
2273         (match_operand:<VWIDE> 1 "register_operand" "w")))]
2274   "TARGET_SIMD"
2275   "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2276   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2277 )
2278
2279 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2280   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2281     (vec_concat:<VDBL>
2282       (match_operand:VDF 1 "register_operand" "0")
2283       (float_truncate:VDF
2284         (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2285   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2286   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2287   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2288 )
2289
2290 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2291   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2292     (vec_concat:<VDBL>
2293       (float_truncate:VDF
2294         (match_operand:<VWIDE> 2 "register_operand" "w"))
2295       (match_operand:VDF 1 "register_operand" "0")))]
2296   "TARGET_SIMD && BYTES_BIG_ENDIAN"
2297   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2298   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2299 )
2300
2301 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2302   [(match_operand:<VDBL> 0 "register_operand")
2303    (match_operand:VDF 1 "register_operand")
2304    (match_operand:<VWIDE> 2 "register_operand")]
2305   "TARGET_SIMD"
2306 {
2307   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2308                              ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2309                              : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2310   emit_insn (gen (operands[0], operands[1], operands[2]));
2311   DONE;
2312 }
2313 )
2314
2315 (define_expand "vec_pack_trunc_v2df"
2316   [(set (match_operand:V4SF 0 "register_operand")
2317       (vec_concat:V4SF
2318         (float_truncate:V2SF
2319             (match_operand:V2DF 1 "register_operand"))
2320         (float_truncate:V2SF
2321             (match_operand:V2DF 2 "register_operand"))
2322           ))]
2323   "TARGET_SIMD"
2324   {
2325     rtx tmp = gen_reg_rtx (V2SFmode);
2326     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2327     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2328
2329     emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2330     emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2331                                                    tmp, operands[hi]));
2332     DONE;
2333   }
2334 )
2335
2336 (define_expand "vec_pack_trunc_df"
2337   [(set (match_operand:V2SF 0 "register_operand")
2338       (vec_concat:V2SF
2339         (float_truncate:SF
2340             (match_operand:DF 1 "register_operand"))
2341         (float_truncate:SF
2342             (match_operand:DF 2 "register_operand"))
2343           ))]
2344   "TARGET_SIMD"
2345   {
2346     rtx tmp = gen_reg_rtx (V2SFmode);
2347     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2348     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2349
2350     emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2351     emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2352     emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2353     DONE;
2354   }
2355 )
2356
2357 ;; FP Max/Min
2358 ;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
2359 ;; expression like:
2360 ;;      a = (b < c) ? b : c;
2361 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
2362 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
2363 ;; -ffast-math.
2364 ;;
2365 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2366 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2367 ;; operand will be returned when both operands are zero (i.e. they may not
2368 ;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
2369 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2370 ;; NaNs.
2371
2372 (define_insn "<su><maxmin><mode>3"
2373   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2374         (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2375                        (match_operand:VHSDF 2 "register_operand" "w")))]
2376   "TARGET_SIMD"
2377   "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2378   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2379 )
2380
2381 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2382 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2383 ;; which implement the IEEE fmax ()/fmin () functions.
2384 (define_insn "<maxmin_uns><mode>3"
2385   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2386        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2387                       (match_operand:VHSDF 2 "register_operand" "w")]
2388                       FMAXMIN_UNS))]
2389   "TARGET_SIMD"
2390   "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2391   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2392 )
2393
2394 ;; 'across lanes' add.
2395
2396 (define_expand "reduc_plus_scal_<mode>"
2397   [(match_operand:<VEL> 0 "register_operand")
2398    (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand")]
2399                UNSPEC_ADDV)]
2400   "TARGET_SIMD"
2401   {
2402     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2403     rtx scratch = gen_reg_rtx (<MODE>mode);
2404     emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2405     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2406     DONE;
2407   }
2408 )
2409
2410 (define_insn "aarch64_faddp<mode>"
2411  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2412        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2413                       (match_operand:VHSDF 2 "register_operand" "w")]
2414         UNSPEC_FADDV))]
2415  "TARGET_SIMD"
2416  "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2417   [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2418 )
2419
2420 (define_insn "aarch64_reduc_plus_internal<mode>"
2421  [(set (match_operand:VDQV 0 "register_operand" "=w")
2422        (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2423                     UNSPEC_ADDV))]
2424  "TARGET_SIMD"
2425  "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2426   [(set_attr "type" "neon_reduc_add<q>")]
2427 )
2428
2429 (define_insn "aarch64_reduc_plus_internalv2si"
2430  [(set (match_operand:V2SI 0 "register_operand" "=w")
2431        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2432                     UNSPEC_ADDV))]
2433  "TARGET_SIMD"
2434  "addp\\t%0.2s, %1.2s, %1.2s"
2435   [(set_attr "type" "neon_reduc_add")]
2436 )
2437
2438 (define_insn "reduc_plus_scal_<mode>"
2439  [(set (match_operand:<VEL> 0 "register_operand" "=w")
2440        (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2441                    UNSPEC_FADDV))]
2442  "TARGET_SIMD"
2443  "faddp\\t%<Vetype>0, %1.<Vtype>"
2444   [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2445 )
2446
2447 (define_expand "reduc_plus_scal_v4sf"
2448  [(set (match_operand:SF 0 "register_operand")
2449        (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2450                     UNSPEC_FADDV))]
2451  "TARGET_SIMD"
2452 {
2453   rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2454   rtx scratch = gen_reg_rtx (V4SFmode);
2455   emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2456   emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2457   emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2458   DONE;
2459 })
2460
2461 (define_insn "clrsb<mode>2"
2462   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2463         (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2464   "TARGET_SIMD"
2465   "cls\\t%0.<Vtype>, %1.<Vtype>"
2466   [(set_attr "type" "neon_cls<q>")]
2467 )
2468
2469 (define_insn "clz<mode>2"
2470  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2471        (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2472  "TARGET_SIMD"
2473  "clz\\t%0.<Vtype>, %1.<Vtype>"
2474   [(set_attr "type" "neon_cls<q>")]
2475 )
2476
2477 (define_insn "popcount<mode>2"
2478   [(set (match_operand:VB 0 "register_operand" "=w")
2479         (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2480   "TARGET_SIMD"
2481   "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2482   [(set_attr "type" "neon_cnt<q>")]
2483 )
2484
2485 ;; 'across lanes' max and min ops.
2486
2487 ;; Template for outputting a scalar, so we can create __builtins which can be
2488 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function.  (This is FP smax/smin).
2489 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2490   [(match_operand:<VEL> 0 "register_operand")
2491    (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2492                   FMAXMINV)]
2493   "TARGET_SIMD"
2494   {
2495     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2496     rtx scratch = gen_reg_rtx (<MODE>mode);
2497     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2498                                                               operands[1]));
2499     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2500     DONE;
2501   }
2502 )
2503
2504 ;; Likewise for integer cases, signed and unsigned.
2505 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2506   [(match_operand:<VEL> 0 "register_operand")
2507    (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2508                     MAXMINV)]
2509   "TARGET_SIMD"
2510   {
2511     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2512     rtx scratch = gen_reg_rtx (<MODE>mode);
2513     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2514                                                               operands[1]));
2515     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2516     DONE;
2517   }
2518 )
2519
2520 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2521  [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2522        (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2523                     MAXMINV))]
2524  "TARGET_SIMD"
2525  "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2526   [(set_attr "type" "neon_reduc_minmax<q>")]
2527 )
2528
2529 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2530  [(set (match_operand:V2SI 0 "register_operand" "=w")
2531        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2532                     MAXMINV))]
2533  "TARGET_SIMD"
2534  "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2535   [(set_attr "type" "neon_reduc_minmax")]
2536 )
2537
2538 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2539  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2540        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2541                       FMAXMINV))]
2542  "TARGET_SIMD"
2543  "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2544   [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2545 )
2546
2547 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2548 ;; allocation.
2549 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2550 ;; to select.
2551 ;;
2552 ;; Thus our BSL is of the form:
2553 ;;   op0 = bsl (mask, op2, op3)
2554 ;; We can use any of:
2555 ;;
2556 ;;   if (op0 = mask)
2557 ;;     bsl mask, op1, op2
2558 ;;   if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2559 ;;     bit op0, op2, mask
2560 ;;   if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2561 ;;     bif op0, op1, mask
2562 ;;
2563 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2564 ;; Some forms of straight-line code may generate the equivalent form
2565 ;; in *aarch64_simd_bsl<mode>_alt.
2566
2567 (define_insn "aarch64_simd_bsl<mode>_internal"
2568   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2569         (xor:VDQ_I
2570            (and:VDQ_I
2571              (xor:VDQ_I
2572                (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2573                (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2574              (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2575           (match_dup:<V_INT_EQUIV> 3)
2576         ))]
2577   "TARGET_SIMD"
2578   "@
2579   bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2580   bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2581   bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2582   [(set_attr "type" "neon_bsl<q>")]
2583 )
2584
2585 ;; We need this form in addition to the above pattern to match the case
2586 ;; when combine tries merging three insns such that the second operand of
2587 ;; the outer XOR matches the second operand of the inner XOR rather than
2588 ;; the first.  The two are equivalent but since recog doesn't try all
2589 ;; permutations of commutative operations, we have to have a separate pattern.
2590
2591 (define_insn "*aarch64_simd_bsl<mode>_alt"
2592   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2593         (xor:VDQ_I
2594            (and:VDQ_I
2595              (xor:VDQ_I
2596                (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2597                (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2598               (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2599           (match_dup:<V_INT_EQUIV> 2)))]
2600   "TARGET_SIMD"
2601   "@
2602   bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2603   bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2604   bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2605   [(set_attr "type" "neon_bsl<q>")]
2606 )
2607
2608 ;; DImode is special, we want to avoid computing operations which are
2609 ;; more naturally computed in general purpose registers in the vector
2610 ;; registers.  If we do that, we need to move all three operands from general
2611 ;; purpose registers to vector registers, then back again.  However, we
2612 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2613 ;; optimizations based on the component operations of a BSL.
2614 ;;
2615 ;; That means we need a splitter back to the individual operations, if they
2616 ;; would be better calculated on the integer side.
2617
2618 (define_insn_and_split "aarch64_simd_bsldi_internal"
2619   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2620         (xor:DI
2621            (and:DI
2622              (xor:DI
2623                (match_operand:DI 3 "register_operand" "w,0,w,r")
2624                (match_operand:DI 2 "register_operand" "w,w,0,r"))
2625              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2626           (match_dup:DI 3)
2627         ))]
2628   "TARGET_SIMD"
2629   "@
2630   bsl\\t%0.8b, %2.8b, %3.8b
2631   bit\\t%0.8b, %2.8b, %1.8b
2632   bif\\t%0.8b, %3.8b, %1.8b
2633   #"
2634   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2635   [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2636 {
2637   /* Split back to individual operations.  If we're before reload, and
2638      able to create a temporary register, do so.  If we're after reload,
2639      we've got an early-clobber destination register, so use that.
2640      Otherwise, we can't create pseudos and we can't yet guarantee that
2641      operands[0] is safe to write, so FAIL to split.  */
2642
2643   rtx scratch;
2644   if (reload_completed)
2645     scratch = operands[0];
2646   else if (can_create_pseudo_p ())
2647     scratch = gen_reg_rtx (DImode);
2648   else
2649     FAIL;
2650
2651   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2652   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2653   emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2654   DONE;
2655 }
2656   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2657    (set_attr "length" "4,4,4,12")]
2658 )
2659
2660 (define_insn_and_split "aarch64_simd_bsldi_alt"
2661   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2662         (xor:DI
2663            (and:DI
2664              (xor:DI
2665                (match_operand:DI 3 "register_operand" "w,w,0,r")
2666                (match_operand:DI 2 "register_operand" "w,0,w,r"))
2667              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2668           (match_dup:DI 2)
2669         ))]
2670   "TARGET_SIMD"
2671   "@
2672   bsl\\t%0.8b, %3.8b, %2.8b
2673   bit\\t%0.8b, %3.8b, %1.8b
2674   bif\\t%0.8b, %2.8b, %1.8b
2675   #"
2676   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2677   [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2678 {
2679   /* Split back to individual operations.  If we're before reload, and
2680      able to create a temporary register, do so.  If we're after reload,
2681      we've got an early-clobber destination register, so use that.
2682      Otherwise, we can't create pseudos and we can't yet guarantee that
2683      operands[0] is safe to write, so FAIL to split.  */
2684
2685   rtx scratch;
2686   if (reload_completed)
2687     scratch = operands[0];
2688   else if (can_create_pseudo_p ())
2689     scratch = gen_reg_rtx (DImode);
2690   else
2691     FAIL;
2692
2693   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2694   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2695   emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2696   DONE;
2697 }
2698   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2699    (set_attr "length" "4,4,4,12")]
2700 )
2701
2702 (define_expand "aarch64_simd_bsl<mode>"
2703   [(match_operand:VALLDIF 0 "register_operand")
2704    (match_operand:<V_INT_EQUIV> 1 "register_operand")
2705    (match_operand:VALLDIF 2 "register_operand")
2706    (match_operand:VALLDIF 3 "register_operand")]
2707  "TARGET_SIMD"
2708 {
2709   /* We can't alias operands together if they have different modes.  */
2710   rtx tmp = operands[0];
2711   if (FLOAT_MODE_P (<MODE>mode))
2712     {
2713       operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2714       operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2715       tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2716     }
2717   operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2718   emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2719                                                          operands[1],
2720                                                          operands[2],
2721                                                          operands[3]));
2722   if (tmp != operands[0])
2723     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2724
2725   DONE;
2726 })
2727
2728 (define_expand "vcond_mask_<mode><v_int_equiv>"
2729   [(match_operand:VALLDI 0 "register_operand")
2730    (match_operand:VALLDI 1 "nonmemory_operand")
2731    (match_operand:VALLDI 2 "nonmemory_operand")
2732    (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2733   "TARGET_SIMD"
2734 {
2735   /* If we have (a = (P) ? -1 : 0);
2736      Then we can simply move the generated mask (result must be int).  */
2737   if (operands[1] == CONSTM1_RTX (<MODE>mode)
2738       && operands[2] == CONST0_RTX (<MODE>mode))
2739     emit_move_insn (operands[0], operands[3]);
2740   /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask.  */
2741   else if (operands[1] == CONST0_RTX (<MODE>mode)
2742            && operands[2] == CONSTM1_RTX (<MODE>mode))
2743     emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2744   else
2745     {
2746       if (!REG_P (operands[1]))
2747         operands[1] = force_reg (<MODE>mode, operands[1]);
2748       if (!REG_P (operands[2]))
2749         operands[2] = force_reg (<MODE>mode, operands[2]);
2750       emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2751                                              operands[1], operands[2]));
2752     }
2753
2754   DONE;
2755 })
2756
2757 ;; Patterns comparing two vectors to produce a mask.
2758
2759 (define_expand "vec_cmp<mode><mode>"
2760   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2761           (match_operator 1 "comparison_operator"
2762             [(match_operand:VSDQ_I_DI 2 "register_operand")
2763              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2764   "TARGET_SIMD"
2765 {
2766   rtx mask = operands[0];
2767   enum rtx_code code = GET_CODE (operands[1]);
2768
2769   switch (code)
2770     {
2771     case NE:
2772     case LE:
2773     case LT:
2774     case GE:
2775     case GT:
2776     case EQ:
2777       if (operands[3] == CONST0_RTX (<MODE>mode))
2778         break;
2779
2780       /* Fall through.  */
2781     default:
2782       if (!REG_P (operands[3]))
2783         operands[3] = force_reg (<MODE>mode, operands[3]);
2784
2785       break;
2786     }
2787
2788   switch (code)
2789     {
2790     case LT:
2791       emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2792       break;
2793
2794     case GE:
2795       emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2796       break;
2797
2798     case LE:
2799       emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2800       break;
2801
2802     case GT:
2803       emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2804       break;
2805
2806     case LTU:
2807       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2808       break;
2809
2810     case GEU:
2811       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2812       break;
2813
2814     case LEU:
2815       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2816       break;
2817
2818     case GTU:
2819       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2820       break;
2821
2822     case NE:
2823       /* Handle NE as !EQ.  */
2824       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2825       emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2826       break;
2827
2828     case EQ:
2829       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2830       break;
2831
2832     default:
2833       gcc_unreachable ();
2834     }
2835
2836   DONE;
2837 })
2838
2839 (define_expand "vec_cmp<mode><v_int_equiv>"
2840   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2841         (match_operator 1 "comparison_operator"
2842             [(match_operand:VDQF 2 "register_operand")
2843              (match_operand:VDQF 3 "nonmemory_operand")]))]
2844   "TARGET_SIMD"
2845 {
2846   int use_zero_form = 0;
2847   enum rtx_code code = GET_CODE (operands[1]);
2848   rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2849
2850   rtx (*comparison) (rtx, rtx, rtx) = NULL;
2851
2852   switch (code)
2853     {
2854     case LE:
2855     case LT:
2856     case GE:
2857     case GT:
2858     case EQ:
2859       if (operands[3] == CONST0_RTX (<MODE>mode))
2860         {
2861           use_zero_form = 1;
2862           break;
2863         }
2864       /* Fall through.  */
2865     default:
2866       if (!REG_P (operands[3]))
2867         operands[3] = force_reg (<MODE>mode, operands[3]);
2868
2869       break;
2870     }
2871
2872   switch (code)
2873     {
2874     case LT:
2875       if (use_zero_form)
2876         {
2877           comparison = gen_aarch64_cmlt<mode>;
2878           break;
2879         }
2880       /* Fall through.  */
2881     case UNLT:
2882       std::swap (operands[2], operands[3]);
2883       /* Fall through.  */
2884     case UNGT:
2885     case GT:
2886       comparison = gen_aarch64_cmgt<mode>;
2887       break;
2888     case LE:
2889       if (use_zero_form)
2890         {
2891           comparison = gen_aarch64_cmle<mode>;
2892           break;
2893         }
2894       /* Fall through.  */
2895     case UNLE:
2896       std::swap (operands[2], operands[3]);
2897       /* Fall through.  */
2898     case UNGE:
2899     case GE:
2900       comparison = gen_aarch64_cmge<mode>;
2901       break;
2902     case NE:
2903     case EQ:
2904       comparison = gen_aarch64_cmeq<mode>;
2905       break;
2906     case UNEQ:
2907     case ORDERED:
2908     case UNORDERED:
2909     case LTGT:
2910       break;
2911     default:
2912       gcc_unreachable ();
2913     }
2914
2915   switch (code)
2916     {
2917     case UNGE:
2918     case UNGT:
2919     case UNLE:
2920     case UNLT:
2921       {
2922         /* All of the above must not raise any FP exceptions.  Thus we first
2923            check each operand for NaNs and force any elements containing NaN to
2924            zero before using them in the compare.
2925            Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2926                                      (cm<cc> (isnan (a) ? 0.0 : a,
2927                                               isnan (b) ? 0.0 : b))
2928            We use the following transformations for doing the comparisions:
2929            a UNGE b -> a GE b
2930            a UNGT b -> a GT b
2931            a UNLE b -> b GE a
2932            a UNLT b -> b GT a.  */
2933
2934         rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2935         rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2936         rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2937         emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2938         emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2939         emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2940         emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2941                                           lowpart_subreg (<V_INT_EQUIV>mode,
2942                                                           operands[2],
2943                                                           <MODE>mode)));
2944         emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2945                                           lowpart_subreg (<V_INT_EQUIV>mode,
2946                                                           operands[3],
2947                                                           <MODE>mode)));
2948         gcc_assert (comparison != NULL);
2949         emit_insn (comparison (operands[0],
2950                                lowpart_subreg (<MODE>mode,
2951                                                tmp0, <V_INT_EQUIV>mode),
2952                                lowpart_subreg (<MODE>mode,
2953                                                tmp1, <V_INT_EQUIV>mode)));
2954         emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2955       }
2956       break;
2957
2958     case LT:
2959     case LE:
2960     case GT:
2961     case GE:
2962     case EQ:
2963     case NE:
2964       /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
2965          As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
2966          a GE b -> a GE b
2967          a GT b -> a GT b
2968          a LE b -> b GE a
2969          a LT b -> b GT a
2970          a EQ b -> a EQ b
2971          a NE b -> ~(a EQ b)  */
2972       gcc_assert (comparison != NULL);
2973       emit_insn (comparison (operands[0], operands[2], operands[3]));
2974       if (code == NE)
2975         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2976       break;
2977
2978     case LTGT:
2979       /* LTGT is not guranteed to not generate a FP exception.  So let's
2980          go the faster way : ((a > b) || (b > a)).  */
2981       emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2982                                          operands[2], operands[3]));
2983       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2984       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2985       break;
2986
2987     case ORDERED:
2988     case UNORDERED:
2989     case UNEQ:
2990       /* cmeq (a, a) & cmeq (b, b).  */
2991       emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2992                                          operands[2], operands[2]));
2993       emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2994       emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2995
2996       if (code == UNORDERED)
2997         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2998       else if (code == UNEQ)
2999         {
3000           emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
3001           emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
3002         }
3003       break;
3004
3005     default:
3006       gcc_unreachable ();
3007     }
3008
3009   DONE;
3010 })
3011
3012 (define_expand "vec_cmpu<mode><mode>"
3013   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3014           (match_operator 1 "comparison_operator"
3015             [(match_operand:VSDQ_I_DI 2 "register_operand")
3016              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
3017   "TARGET_SIMD"
3018 {
3019   emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
3020                                       operands[2], operands[3]));
3021   DONE;
3022 })
3023
3024 (define_expand "vcond<mode><mode>"
3025   [(set (match_operand:VALLDI 0 "register_operand")
3026         (if_then_else:VALLDI
3027           (match_operator 3 "comparison_operator"
3028             [(match_operand:VALLDI 4 "register_operand")
3029              (match_operand:VALLDI 5 "nonmemory_operand")])
3030           (match_operand:VALLDI 1 "nonmemory_operand")
3031           (match_operand:VALLDI 2 "nonmemory_operand")))]
3032   "TARGET_SIMD"
3033 {
3034   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3035   enum rtx_code code = GET_CODE (operands[3]);
3036
3037   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3038      it as well as switch operands 1/2 in order to avoid the additional
3039      NOT instruction.  */
3040   if (code == NE)
3041     {
3042       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3043                                     operands[4], operands[5]);
3044       std::swap (operands[1], operands[2]);
3045     }
3046   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3047                                              operands[4], operands[5]));
3048   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3049                                                  operands[2], mask));
3050
3051   DONE;
3052 })
3053
3054 (define_expand "vcond<v_cmp_mixed><mode>"
3055   [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
3056         (if_then_else:<V_cmp_mixed>
3057           (match_operator 3 "comparison_operator"
3058             [(match_operand:VDQF_COND 4 "register_operand")
3059              (match_operand:VDQF_COND 5 "nonmemory_operand")])
3060           (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
3061           (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
3062   "TARGET_SIMD"
3063 {
3064   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3065   enum rtx_code code = GET_CODE (operands[3]);
3066
3067   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3068      it as well as switch operands 1/2 in order to avoid the additional
3069      NOT instruction.  */
3070   if (code == NE)
3071     {
3072       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3073                                     operands[4], operands[5]);
3074       std::swap (operands[1], operands[2]);
3075     }
3076   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3077                                              operands[4], operands[5]));
3078   emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
3079                                                 operands[0], operands[1],
3080                                                 operands[2], mask));
3081
3082   DONE;
3083 })
3084
3085 (define_expand "vcondu<mode><mode>"
3086   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3087         (if_then_else:VSDQ_I_DI
3088           (match_operator 3 "comparison_operator"
3089             [(match_operand:VSDQ_I_DI 4 "register_operand")
3090              (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
3091           (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
3092           (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
3093   "TARGET_SIMD"
3094 {
3095   rtx mask = gen_reg_rtx (<MODE>mode);
3096   enum rtx_code code = GET_CODE (operands[3]);
3097
3098   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3099      it as well as switch operands 1/2 in order to avoid the additional
3100      NOT instruction.  */
3101   if (code == NE)
3102     {
3103       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3104                                     operands[4], operands[5]);
3105       std::swap (operands[1], operands[2]);
3106     }
3107   emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
3108                                       operands[4], operands[5]));
3109   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3110                                                  operands[2], mask));
3111   DONE;
3112 })
3113
3114 (define_expand "vcondu<mode><v_cmp_mixed>"
3115   [(set (match_operand:VDQF 0 "register_operand")
3116         (if_then_else:VDQF
3117           (match_operator 3 "comparison_operator"
3118             [(match_operand:<V_cmp_mixed> 4 "register_operand")
3119              (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
3120           (match_operand:VDQF 1 "nonmemory_operand")
3121           (match_operand:VDQF 2 "nonmemory_operand")))]
3122   "TARGET_SIMD"
3123 {
3124   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3125   enum rtx_code code = GET_CODE (operands[3]);
3126
3127   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3128      it as well as switch operands 1/2 in order to avoid the additional
3129      NOT instruction.  */
3130   if (code == NE)
3131     {
3132       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3133                                     operands[4], operands[5]);
3134       std::swap (operands[1], operands[2]);
3135     }
3136   emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
3137                                                   mask, operands[3],
3138                                                   operands[4], operands[5]));
3139   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3140                                                  operands[2], mask));
3141   DONE;
3142 })
3143
3144 ;; Patterns for AArch64 SIMD Intrinsics.
3145
3146 ;; Lane extraction with sign extension to general purpose register.
3147 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3148   [(set (match_operand:GPI 0 "register_operand" "=r")
3149         (sign_extend:GPI
3150           (vec_select:<VDQQH:VEL>
3151             (match_operand:VDQQH 1 "register_operand" "w")
3152             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3153   "TARGET_SIMD"
3154   {
3155     operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3156                                            INTVAL (operands[2]));
3157     return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3158   }
3159   [(set_attr "type" "neon_to_gp<VDQQH:q>")]
3160 )
3161
3162 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
3163   [(set (match_operand:GPI 0 "register_operand" "=r")
3164         (zero_extend:GPI
3165           (vec_select:<VDQQH:VEL>
3166             (match_operand:VDQQH 1 "register_operand" "w")
3167             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3168   "TARGET_SIMD"
3169   {
3170     operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3171                                            INTVAL (operands[2]));
3172     return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
3173   }
3174   [(set_attr "type" "neon_to_gp<VDQQH:q>")]
3175 )
3176
3177 ;; Lane extraction of a value, neither sign nor zero extension
3178 ;; is guaranteed so upper bits should be considered undefined.
3179 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3180 (define_insn "aarch64_get_lane<mode>"
3181   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3182         (vec_select:<VEL>
3183           (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3184           (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3185   "TARGET_SIMD"
3186   {
3187     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3188     switch (which_alternative)
3189       {
3190         case 0:
3191           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3192         case 1:
3193           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3194         case 2:
3195           return "st1\\t{%1.<Vetype>}[%2], %0";
3196         default:
3197           gcc_unreachable ();
3198       }
3199   }
3200   [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3201 )
3202
3203 (define_insn "load_pair_lanes<mode>"
3204   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3205         (vec_concat:<VDBL>
3206            (match_operand:VDC 1 "memory_operand" "Utq")
3207            (match_operand:VDC 2 "memory_operand" "m")))]
3208   "TARGET_SIMD && !STRICT_ALIGNMENT
3209    && rtx_equal_p (XEXP (operands[2], 0),
3210                    plus_constant (Pmode,
3211                                   XEXP (operands[1], 0),
3212                                   GET_MODE_SIZE (<MODE>mode)))"
3213   "ldr\\t%q0, %1"
3214   [(set_attr "type" "neon_load1_1reg_q")]
3215 )
3216
3217 (define_insn "store_pair_lanes<mode>"
3218   [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
3219         (vec_concat:<VDBL>
3220            (match_operand:VDC 1 "register_operand" "w, r")
3221            (match_operand:VDC 2 "register_operand" "w, r")))]
3222   "TARGET_SIMD"
3223   "@
3224    stp\\t%d1, %d2, %y0
3225    stp\\t%x1, %x2, %y0"
3226   [(set_attr "type" "neon_stp, store_16")]
3227 )
3228
3229 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3230 ;; dest vector.
3231
3232 (define_insn "@aarch64_combinez<mode>"
3233   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3234         (vec_concat:<VDBL>
3235           (match_operand:VDC 1 "general_operand" "w,?r,m")
3236           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3237   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3238   "@
3239    mov\\t%0.8b, %1.8b
3240    fmov\t%d0, %1
3241    ldr\\t%d0, %1"
3242   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3243    (set_attr "arch" "simd,fp,simd")]
3244 )
3245
3246 (define_insn "@aarch64_combinez_be<mode>"
3247   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3248         (vec_concat:<VDBL>
3249           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3250           (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3251   "TARGET_SIMD && BYTES_BIG_ENDIAN"
3252   "@
3253    mov\\t%0.8b, %1.8b
3254    fmov\t%d0, %1
3255    ldr\\t%d0, %1"
3256   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3257    (set_attr "arch" "simd,fp,simd")]
3258 )
3259
3260 (define_expand "aarch64_combine<mode>"
3261   [(match_operand:<VDBL> 0 "register_operand")
3262    (match_operand:VDC 1 "register_operand")
3263    (match_operand:VDC 2 "register_operand")]
3264   "TARGET_SIMD"
3265 {
3266   aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3267
3268   DONE;
3269 }
3270 )
3271
3272 (define_expand "@aarch64_simd_combine<mode>"
3273   [(match_operand:<VDBL> 0 "register_operand")
3274    (match_operand:VDC 1 "register_operand")
3275    (match_operand:VDC 2 "register_operand")]
3276   "TARGET_SIMD"
3277   {
3278     emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3279     emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3280     DONE;
3281   }
3282 [(set_attr "type" "multiple")]
3283 )
3284
3285 ;; <su><addsub>l<q>.
3286
3287 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3288  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3289        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3290                            (match_operand:VQW 1 "register_operand" "w")
3291                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3292                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3293                            (match_operand:VQW 2 "register_operand" "w")
3294                            (match_dup 3)))))]
3295   "TARGET_SIMD"
3296   "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3297   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3298 )
3299
3300 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3301  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3302        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3303                            (match_operand:VQW 1 "register_operand" "w")
3304                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3305                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3306                            (match_operand:VQW 2 "register_operand" "w")
3307                            (match_dup 3)))))]
3308   "TARGET_SIMD"
3309   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3310   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3311 )
3312
3313
3314 (define_expand "aarch64_saddl2<mode>"
3315   [(match_operand:<VWIDE> 0 "register_operand")
3316    (match_operand:VQW 1 "register_operand")
3317    (match_operand:VQW 2 "register_operand")]
3318   "TARGET_SIMD"
3319 {
3320   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3321   emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3322                                                   operands[2], p));
3323   DONE;
3324 })
3325
3326 (define_expand "aarch64_uaddl2<mode>"
3327   [(match_operand:<VWIDE> 0 "register_operand")
3328    (match_operand:VQW 1 "register_operand")
3329    (match_operand:VQW 2 "register_operand")]
3330   "TARGET_SIMD"
3331 {
3332   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3333   emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3334                                                   operands[2], p));
3335   DONE;
3336 })
3337
3338 (define_expand "aarch64_ssubl2<mode>"
3339   [(match_operand:<VWIDE> 0 "register_operand")
3340    (match_operand:VQW 1 "register_operand")
3341    (match_operand:VQW 2 "register_operand")]
3342   "TARGET_SIMD"
3343 {
3344   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3345   emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3346                                                 operands[2], p));
3347   DONE;
3348 })
3349
3350 (define_expand "aarch64_usubl2<mode>"
3351   [(match_operand:<VWIDE> 0 "register_operand")
3352    (match_operand:VQW 1 "register_operand")
3353    (match_operand:VQW 2 "register_operand")]
3354   "TARGET_SIMD"
3355 {
3356   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3357   emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3358                                                 operands[2], p));
3359   DONE;
3360 })
3361
3362 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3363  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3364        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3365                            (match_operand:VD_BHSI 1 "register_operand" "w"))
3366                        (ANY_EXTEND:<VWIDE>
3367                            (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3368   "TARGET_SIMD"
3369   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3370   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3371 )
3372
3373 ;; <su><addsub>w<q>.
3374
3375 (define_expand "widen_ssum<mode>3"
3376   [(set (match_operand:<VDBLW> 0 "register_operand")
3377         (plus:<VDBLW> (sign_extend:<VDBLW>
3378                         (match_operand:VQW 1 "register_operand"))
3379                       (match_operand:<VDBLW> 2 "register_operand")))]
3380   "TARGET_SIMD"
3381   {
3382     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3383     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3384
3385     emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3386                                                 operands[1], p));
3387     emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3388     DONE;
3389   }
3390 )
3391
3392 (define_expand "widen_ssum<mode>3"
3393   [(set (match_operand:<VWIDE> 0 "register_operand")
3394         (plus:<VWIDE> (sign_extend:<VWIDE>
3395                         (match_operand:VD_BHSI 1 "register_operand"))
3396                       (match_operand:<VWIDE> 2 "register_operand")))]
3397   "TARGET_SIMD"
3398 {
3399   emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3400   DONE;
3401 })
3402
3403 (define_expand "widen_usum<mode>3"
3404   [(set (match_operand:<VDBLW> 0 "register_operand")
3405         (plus:<VDBLW> (zero_extend:<VDBLW>
3406                         (match_operand:VQW 1 "register_operand"))
3407                       (match_operand:<VDBLW> 2 "register_operand")))]
3408   "TARGET_SIMD"
3409   {
3410     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3411     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3412
3413     emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3414                                                  operands[1], p));
3415     emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3416     DONE;
3417   }
3418 )
3419
3420 (define_expand "widen_usum<mode>3"
3421   [(set (match_operand:<VWIDE> 0 "register_operand")
3422         (plus:<VWIDE> (zero_extend:<VWIDE>
3423                         (match_operand:VD_BHSI 1 "register_operand"))
3424                       (match_operand:<VWIDE> 2 "register_operand")))]
3425   "TARGET_SIMD"
3426 {
3427   emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3428   DONE;
3429 })
3430
3431 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
3432   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3433         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3434           (ANY_EXTEND:<VWIDE>
3435             (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3436   "TARGET_SIMD"
3437   "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3438   [(set_attr "type" "neon_sub_widen")]
3439 )
3440
3441 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
3442   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3443         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3444           (ANY_EXTEND:<VWIDE>
3445             (vec_select:<VHALF>
3446               (match_operand:VQW 2 "register_operand" "w")
3447               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3448   "TARGET_SIMD"
3449   "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3450   [(set_attr "type" "neon_sub_widen")]
3451 )
3452
3453 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
3454   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3455         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3456           (ANY_EXTEND:<VWIDE>
3457             (vec_select:<VHALF>
3458               (match_operand:VQW 2 "register_operand" "w")
3459               (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3460   "TARGET_SIMD"
3461   "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3462   [(set_attr "type" "neon_sub_widen")]
3463 )
3464
3465 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
3466   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3467         (plus:<VWIDE>
3468           (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
3469           (match_operand:<VWIDE> 1 "register_operand" "w")))]
3470   "TARGET_SIMD"
3471   "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3472   [(set_attr "type" "neon_add_widen")]
3473 )
3474
3475 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
3476   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3477         (plus:<VWIDE>
3478           (ANY_EXTEND:<VWIDE>
3479             (vec_select:<VHALF>
3480               (match_operand:VQW 2 "register_operand" "w")
3481               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3482           (match_operand:<VWIDE> 1 "register_operand" "w")))]
3483   "TARGET_SIMD"
3484   "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3485   [(set_attr "type" "neon_add_widen")]
3486 )
3487
3488 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
3489   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3490         (plus:<VWIDE>
3491           (ANY_EXTEND:<VWIDE>
3492             (vec_select:<VHALF>
3493               (match_operand:VQW 2 "register_operand" "w")
3494               (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3495           (match_operand:<VWIDE> 1 "register_operand" "w")))]
3496   "TARGET_SIMD"
3497   "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3498   [(set_attr "type" "neon_add_widen")]
3499 )
3500
3501 (define_expand "aarch64_saddw2<mode>"
3502   [(match_operand:<VWIDE> 0 "register_operand")
3503    (match_operand:<VWIDE> 1 "register_operand")
3504    (match_operand:VQW 2 "register_operand")]
3505   "TARGET_SIMD"
3506 {
3507   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3508   emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3509                                                 operands[2], p));
3510   DONE;
3511 })
3512
3513 (define_expand "aarch64_uaddw2<mode>"
3514   [(match_operand:<VWIDE> 0 "register_operand")
3515    (match_operand:<VWIDE> 1 "register_operand")
3516    (match_operand:VQW 2 "register_operand")]
3517   "TARGET_SIMD"
3518 {
3519   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3520   emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3521                                                 operands[2], p));
3522   DONE;
3523 })
3524
3525
3526 (define_expand "aarch64_ssubw2<mode>"
3527   [(match_operand:<VWIDE> 0 "register_operand")
3528    (match_operand:<VWIDE> 1 "register_operand")
3529    (match_operand:VQW 2 "register_operand")]
3530   "TARGET_SIMD"
3531 {
3532   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3533   emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3534                                                 operands[2], p));
3535   DONE;
3536 })
3537
3538 (define_expand "aarch64_usubw2<mode>"
3539   [(match_operand:<VWIDE> 0 "register_operand")
3540    (match_operand:<VWIDE> 1 "register_operand")
3541    (match_operand:VQW 2 "register_operand")]
3542   "TARGET_SIMD"
3543 {
3544   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3545   emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3546                                                 operands[2], p));
3547   DONE;
3548 })
3549
3550 ;; <su><r>h<addsub>.
3551
3552 (define_expand "<u>avg<mode>3_floor"
3553   [(set (match_operand:VDQ_BHSI 0 "register_operand")
3554         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3555                           (match_operand:VDQ_BHSI 2 "register_operand")]
3556                          HADD))]
3557   "TARGET_SIMD"
3558 )
3559
3560 (define_expand "<u>avg<mode>3_ceil"
3561   [(set (match_operand:VDQ_BHSI 0 "register_operand")
3562         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3563                           (match_operand:VDQ_BHSI 2 "register_operand")]
3564                          RHADD))]
3565   "TARGET_SIMD"
3566 )
3567
3568 (define_insn "aarch64_<sur>h<addsub><mode>"
3569   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3570         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3571                       (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3572                      HADDSUB))]
3573   "TARGET_SIMD"
3574   "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3575   [(set_attr "type" "neon_<addsub>_halve<q>")]
3576 )
3577
3578 ;; <r><addsub>hn<q>.
3579
3580 (define_insn "aarch64_<sur><addsub>hn<mode>"
3581   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3582         (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3583                             (match_operand:VQN 2 "register_operand" "w")]
3584                            ADDSUBHN))]
3585   "TARGET_SIMD"
3586   "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3587   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3588 )
3589
3590 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3591   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3592         (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3593                              (match_operand:VQN 2 "register_operand" "w")
3594                              (match_operand:VQN 3 "register_operand" "w")]
3595                             ADDSUBHN2))]
3596   "TARGET_SIMD"
3597   "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3598   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3599 )
3600
3601 ;; pmul.
3602
3603 (define_insn "aarch64_pmul<mode>"
3604   [(set (match_operand:VB 0 "register_operand" "=w")
3605         (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3606                     (match_operand:VB 2 "register_operand" "w")]
3607                    UNSPEC_PMUL))]
3608  "TARGET_SIMD"
3609  "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3610   [(set_attr "type" "neon_mul_<Vetype><q>")]
3611 )
3612
3613 ;; fmulx.
3614
3615 (define_insn "aarch64_fmulx<mode>"
3616   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3617         (unspec:VHSDF_HSDF
3618           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3619            (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3620            UNSPEC_FMULX))]
3621  "TARGET_SIMD"
3622  "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3623  [(set_attr "type" "neon_fp_mul_<stype>")]
3624 )
3625
3626 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3627
3628 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3629   [(set (match_operand:VDQSF 0 "register_operand" "=w")
3630         (unspec:VDQSF
3631          [(match_operand:VDQSF 1 "register_operand" "w")
3632           (vec_duplicate:VDQSF
3633            (vec_select:<VEL>
3634             (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3635             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3636          UNSPEC_FMULX))]
3637   "TARGET_SIMD"
3638   {
3639     operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3640     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3641   }
3642   [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3643 )
3644
3645 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3646
3647 (define_insn "*aarch64_mulx_elt<mode>"
3648   [(set (match_operand:VDQF 0 "register_operand" "=w")
3649         (unspec:VDQF
3650          [(match_operand:VDQF 1 "register_operand" "w")
3651           (vec_duplicate:VDQF
3652            (vec_select:<VEL>
3653             (match_operand:VDQF 2 "register_operand" "w")
3654             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3655          UNSPEC_FMULX))]
3656   "TARGET_SIMD"
3657   {
3658     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3659     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3660   }
3661   [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3662 )
3663
3664 ;; vmulxq_lane
3665
3666 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3667   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3668         (unspec:VHSDF
3669          [(match_operand:VHSDF 1 "register_operand" "w")
3670           (vec_duplicate:VHSDF
3671             (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3672          UNSPEC_FMULX))]
3673   "TARGET_SIMD"
3674   "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3675   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3676 )
3677
3678 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3679 ;; vmulxd_lane_f64 ==  vmulx_lane_f64
3680 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3681
3682 (define_insn "*aarch64_vgetfmulx<mode>"
3683   [(set (match_operand:<VEL> 0 "register_operand" "=w")
3684         (unspec:<VEL>
3685          [(match_operand:<VEL> 1 "register_operand" "w")
3686           (vec_select:<VEL>
3687            (match_operand:VDQF 2 "register_operand" "w")
3688             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3689          UNSPEC_FMULX))]
3690   "TARGET_SIMD"
3691   {
3692     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3693     return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3694   }
3695   [(set_attr "type" "fmul<Vetype>")]
3696 )
3697 ;; <su>q<addsub>
3698
3699 (define_insn "aarch64_<su_optab><optab><mode>"
3700   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3701         (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3702                           (match_operand:VSDQ_I 2 "register_operand" "w")))]
3703   "TARGET_SIMD"
3704   "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3705   [(set_attr "type" "neon_<optab><q>")]
3706 )
3707
3708 ;; suqadd and usqadd
3709
3710 (define_insn "aarch64_<sur>qadd<mode>"
3711   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3712         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3713                         (match_operand:VSDQ_I 2 "register_operand" "w")]
3714                        USSUQADD))]
3715   "TARGET_SIMD"
3716   "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3717   [(set_attr "type" "neon_qadd<q>")]
3718 )
3719
3720 ;; sqmovun
3721
3722 (define_insn "aarch64_sqmovun<mode>"
3723   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3724         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3725                             UNSPEC_SQXTUN))]
3726    "TARGET_SIMD"
3727    "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3728    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3729 )
3730
3731 ;; sqmovn and uqmovn
3732
3733 (define_insn "aarch64_<sur>qmovn<mode>"
3734   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3735         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3736                             SUQMOVN))]
3737   "TARGET_SIMD"
3738   "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3739    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3740 )
3741
3742 ;; <su>q<absneg>
3743
3744 (define_insn "aarch64_s<optab><mode>"
3745   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3746         (UNQOPS:VSDQ_I
3747           (match_operand:VSDQ_I 1 "register_operand" "w")))]
3748   "TARGET_SIMD"
3749   "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3750   [(set_attr "type" "neon_<optab><q>")]
3751 )
3752
3753 ;; sq<r>dmulh.
3754
3755 (define_insn "aarch64_sq<r>dmulh<mode>"
3756   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3757         (unspec:VSDQ_HSI
3758           [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3759            (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3760          VQDMULH))]
3761   "TARGET_SIMD"
3762   "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3763   [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3764 )
3765
3766 ;; sq<r>dmulh_lane
3767
3768 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3769   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3770         (unspec:VDQHS
3771           [(match_operand:VDQHS 1 "register_operand" "w")
3772            (vec_select:<VEL>
3773              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3774              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3775          VQDMULH))]
3776   "TARGET_SIMD"
3777   "*
3778    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3779    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3780   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3781 )
3782
3783 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3784   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3785         (unspec:VDQHS
3786           [(match_operand:VDQHS 1 "register_operand" "w")
3787            (vec_select:<VEL>
3788              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3789              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3790          VQDMULH))]
3791   "TARGET_SIMD"
3792   "*
3793    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3794    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3795   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3796 )
3797
3798 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3799   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3800         (unspec:SD_HSI
3801           [(match_operand:SD_HSI 1 "register_operand" "w")
3802            (vec_select:<VEL>
3803              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3804              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3805          VQDMULH))]
3806   "TARGET_SIMD"
3807   "*
3808    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3809    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3810   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3811 )
3812
3813 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3814   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3815         (unspec:SD_HSI
3816           [(match_operand:SD_HSI 1 "register_operand" "w")
3817            (vec_select:<VEL>
3818              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3819              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3820          VQDMULH))]
3821   "TARGET_SIMD"
3822   "*
3823    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3824    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3825   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3826 )
3827
3828 ;; sqrdml[as]h.
3829
3830 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3831   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3832         (unspec:VSDQ_HSI
3833           [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3834            (match_operand:VSDQ_HSI 2 "register_operand" "w")
3835            (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3836           SQRDMLH_AS))]
3837    "TARGET_SIMD_RDMA"
3838    "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3839    [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3840 )
3841
3842 ;; sqrdml[as]h_lane.
3843
3844 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3845   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3846         (unspec:VDQHS
3847           [(match_operand:VDQHS 1 "register_operand" "0")
3848            (match_operand:VDQHS 2 "register_operand" "w")
3849            (vec_select:<VEL>
3850              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3851              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3852           SQRDMLH_AS))]
3853    "TARGET_SIMD_RDMA"
3854    {
3855      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3856      return
3857       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3858    }
3859    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3860 )
3861
3862 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3863   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3864         (unspec:SD_HSI
3865           [(match_operand:SD_HSI 1 "register_operand" "0")
3866            (match_operand:SD_HSI 2 "register_operand" "w")
3867            (vec_select:<VEL>
3868              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3869              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3870           SQRDMLH_AS))]
3871    "TARGET_SIMD_RDMA"
3872    {
3873      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3874      return
3875       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3876    }
3877    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3878 )
3879
3880 ;; sqrdml[as]h_laneq.
3881
3882 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3883   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3884         (unspec:VDQHS
3885           [(match_operand:VDQHS 1 "register_operand" "0")
3886            (match_operand:VDQHS 2 "register_operand" "w")
3887            (vec_select:<VEL>
3888              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3889              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3890           SQRDMLH_AS))]
3891    "TARGET_SIMD_RDMA"
3892    {
3893      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3894      return
3895       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3896    }
3897    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3898 )
3899
3900 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3901   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3902         (unspec:SD_HSI
3903           [(match_operand:SD_HSI 1 "register_operand" "0")
3904            (match_operand:SD_HSI 2 "register_operand" "w")
3905            (vec_select:<VEL>
3906              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3907              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3908           SQRDMLH_AS))]
3909    "TARGET_SIMD_RDMA"
3910    {
3911      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3912      return
3913       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3914    }
3915    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3916 )
3917
3918 ;; vqdml[sa]l
3919
3920 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3921   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3922         (SBINQOPS:<VWIDE>
3923           (match_operand:<VWIDE> 1 "register_operand" "0")
3924           (ss_ashift:<VWIDE>
3925               (mult:<VWIDE>
3926                 (sign_extend:<VWIDE>
3927                       (match_operand:VSD_HSI 2 "register_operand" "w"))
3928                 (sign_extend:<VWIDE>
3929                       (match_operand:VSD_HSI 3 "register_operand" "w")))
3930               (const_int 1))))]
3931   "TARGET_SIMD"
3932   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3933   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3934 )
3935
3936 ;; vqdml[sa]l_lane
3937
3938 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3939   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3940         (SBINQOPS:<VWIDE>
3941           (match_operand:<VWIDE> 1 "register_operand" "0")
3942           (ss_ashift:<VWIDE>
3943             (mult:<VWIDE>
3944               (sign_extend:<VWIDE>
3945                 (match_operand:VD_HSI 2 "register_operand" "w"))
3946               (sign_extend:<VWIDE>
3947                 (vec_duplicate:VD_HSI
3948                   (vec_select:<VEL>
3949                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3950                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3951               ))
3952             (const_int 1))))]
3953   "TARGET_SIMD"
3954   {
3955     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3956     return
3957       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3958   }
3959   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3960 )
3961
3962 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3963   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3964         (SBINQOPS:<VWIDE>
3965           (match_operand:<VWIDE> 1 "register_operand" "0")
3966           (ss_ashift:<VWIDE>
3967             (mult:<VWIDE>
3968               (sign_extend:<VWIDE>
3969                 (match_operand:VD_HSI 2 "register_operand" "w"))
3970               (sign_extend:<VWIDE>
3971                 (vec_duplicate:VD_HSI
3972                   (vec_select:<VEL>
3973                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3974                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3975               ))
3976             (const_int 1))))]
3977   "TARGET_SIMD"
3978   {
3979     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3980     return
3981       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3982   }
3983   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3984 )
3985
3986 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3987   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3988         (SBINQOPS:<VWIDE>
3989           (match_operand:<VWIDE> 1 "register_operand" "0")
3990           (ss_ashift:<VWIDE>
3991             (mult:<VWIDE>
3992               (sign_extend:<VWIDE>
3993                 (match_operand:SD_HSI 2 "register_operand" "w"))
3994               (sign_extend:<VWIDE>
3995                 (vec_select:<VEL>
3996                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3997                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3998               )
3999             (const_int 1))))]
4000   "TARGET_SIMD"
4001   {
4002     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4003     return
4004       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4005   }
4006   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4007 )
4008
4009 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
4010   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4011         (SBINQOPS:<VWIDE>
4012           (match_operand:<VWIDE> 1 "register_operand" "0")
4013           (ss_ashift:<VWIDE>
4014             (mult:<VWIDE>
4015               (sign_extend:<VWIDE>
4016                 (match_operand:SD_HSI 2 "register_operand" "w"))
4017               (sign_extend:<VWIDE>
4018                 (vec_select:<VEL>
4019                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4020                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4021               )
4022             (const_int 1))))]
4023   "TARGET_SIMD"
4024   {
4025     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4026     return
4027       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4028   }
4029   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4030 )
4031
4032 ;; vqdml[sa]l_n
4033
4034 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
4035   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4036         (SBINQOPS:<VWIDE>
4037           (match_operand:<VWIDE> 1 "register_operand" "0")
4038           (ss_ashift:<VWIDE>
4039               (mult:<VWIDE>
4040                 (sign_extend:<VWIDE>
4041                       (match_operand:VD_HSI 2 "register_operand" "w"))
4042                 (sign_extend:<VWIDE>
4043                   (vec_duplicate:VD_HSI
4044                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4045               (const_int 1))))]
4046   "TARGET_SIMD"
4047   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4048   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4049 )
4050
4051 ;; sqdml[as]l2
4052
4053 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
4054   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4055         (SBINQOPS:<VWIDE>
4056          (match_operand:<VWIDE> 1 "register_operand" "0")
4057          (ss_ashift:<VWIDE>
4058              (mult:<VWIDE>
4059                (sign_extend:<VWIDE>
4060                  (vec_select:<VHALF>
4061                      (match_operand:VQ_HSI 2 "register_operand" "w")
4062                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4063                (sign_extend:<VWIDE>
4064                  (vec_select:<VHALF>
4065                      (match_operand:VQ_HSI 3 "register_operand" "w")
4066                      (match_dup 4))))
4067              (const_int 1))))]
4068   "TARGET_SIMD"
4069   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4070   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4071 )
4072
4073 (define_expand "aarch64_sqdmlal2<mode>"
4074   [(match_operand:<VWIDE> 0 "register_operand")
4075    (match_operand:<VWIDE> 1 "register_operand")
4076    (match_operand:VQ_HSI 2 "register_operand")
4077    (match_operand:VQ_HSI 3 "register_operand")]
4078   "TARGET_SIMD"
4079 {
4080   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4081   emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
4082                                                   operands[2], operands[3], p));
4083   DONE;
4084 })
4085
4086 (define_expand "aarch64_sqdmlsl2<mode>"
4087   [(match_operand:<VWIDE> 0 "register_operand")
4088    (match_operand:<VWIDE> 1 "register_operand")
4089    (match_operand:VQ_HSI 2 "register_operand")
4090    (match_operand:VQ_HSI 3 "register_operand")]
4091   "TARGET_SIMD"
4092 {
4093   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4094   emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
4095                                                   operands[2], operands[3], p));
4096   DONE;
4097 })
4098
4099 ;; vqdml[sa]l2_lane
4100
4101 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
4102   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4103         (SBINQOPS:<VWIDE>
4104           (match_operand:<VWIDE> 1 "register_operand" "0")
4105           (ss_ashift:<VWIDE>
4106               (mult:<VWIDE>
4107                 (sign_extend:<VWIDE>
4108                   (vec_select:<VHALF>
4109                     (match_operand:VQ_HSI 2 "register_operand" "w")
4110                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4111                 (sign_extend:<VWIDE>
4112                   (vec_duplicate:<VHALF>
4113                     (vec_select:<VEL>
4114                       (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4115                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4116                     ))))
4117               (const_int 1))))]
4118   "TARGET_SIMD"
4119   {
4120     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4121     return
4122      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4123   }
4124   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4125 )
4126
4127 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
4128   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4129         (SBINQOPS:<VWIDE>
4130           (match_operand:<VWIDE> 1 "register_operand" "0")
4131           (ss_ashift:<VWIDE>
4132               (mult:<VWIDE>
4133                 (sign_extend:<VWIDE>
4134                   (vec_select:<VHALF>
4135                     (match_operand:VQ_HSI 2 "register_operand" "w")
4136                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4137                 (sign_extend:<VWIDE>
4138                   (vec_duplicate:<VHALF>
4139                     (vec_select:<VEL>
4140                       (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4141                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4142                     ))))
4143               (const_int 1))))]
4144   "TARGET_SIMD"
4145   {
4146     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4147     return
4148      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4149   }
4150   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4151 )
4152
4153 (define_expand "aarch64_sqdmlal2_lane<mode>"
4154   [(match_operand:<VWIDE> 0 "register_operand")
4155    (match_operand:<VWIDE> 1 "register_operand")
4156    (match_operand:VQ_HSI 2 "register_operand")
4157    (match_operand:<VCOND> 3 "register_operand")
4158    (match_operand:SI 4 "immediate_operand")]
4159   "TARGET_SIMD"
4160 {
4161   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4162   emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
4163                                                        operands[2], operands[3],
4164                                                        operands[4], p));
4165   DONE;
4166 })
4167
4168 (define_expand "aarch64_sqdmlal2_laneq<mode>"
4169   [(match_operand:<VWIDE> 0 "register_operand")
4170    (match_operand:<VWIDE> 1 "register_operand")
4171    (match_operand:VQ_HSI 2 "register_operand")
4172    (match_operand:<VCONQ> 3 "register_operand")
4173    (match_operand:SI 4 "immediate_operand")]
4174   "TARGET_SIMD"
4175 {
4176   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4177   emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
4178                                                        operands[2], operands[3],
4179                                                        operands[4], p));
4180   DONE;
4181 })
4182
4183 (define_expand "aarch64_sqdmlsl2_lane<mode>"
4184   [(match_operand:<VWIDE> 0 "register_operand")
4185    (match_operand:<VWIDE> 1 "register_operand")
4186    (match_operand:VQ_HSI 2 "register_operand")
4187    (match_operand:<VCOND> 3 "register_operand")
4188    (match_operand:SI 4 "immediate_operand")]
4189   "TARGET_SIMD"
4190 {
4191   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4192   emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
4193                                                        operands[2], operands[3],
4194                                                        operands[4], p));
4195   DONE;
4196 })
4197
4198 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
4199   [(match_operand:<VWIDE> 0 "register_operand")
4200    (match_operand:<VWIDE> 1 "register_operand")
4201    (match_operand:VQ_HSI 2 "register_operand")
4202    (match_operand:<VCONQ> 3 "register_operand")
4203    (match_operand:SI 4 "immediate_operand")]
4204   "TARGET_SIMD"
4205 {
4206   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4207   emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4208                                                        operands[2], operands[3],
4209                                                        operands[4], p));
4210   DONE;
4211 })
4212
4213 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4214   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4215         (SBINQOPS:<VWIDE>
4216           (match_operand:<VWIDE> 1 "register_operand" "0")
4217           (ss_ashift:<VWIDE>
4218             (mult:<VWIDE>
4219               (sign_extend:<VWIDE>
4220                 (vec_select:<VHALF>
4221                   (match_operand:VQ_HSI 2 "register_operand" "w")
4222                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4223               (sign_extend:<VWIDE>
4224                 (vec_duplicate:<VHALF>
4225                   (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4226             (const_int 1))))]
4227   "TARGET_SIMD"
4228   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4229   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4230 )
4231
4232 (define_expand "aarch64_sqdmlal2_n<mode>"
4233   [(match_operand:<VWIDE> 0 "register_operand")
4234    (match_operand:<VWIDE> 1 "register_operand")
4235    (match_operand:VQ_HSI 2 "register_operand")
4236    (match_operand:<VEL> 3 "register_operand")]
4237   "TARGET_SIMD"
4238 {
4239   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4240   emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4241                                                     operands[2], operands[3],
4242                                                     p));
4243   DONE;
4244 })
4245
4246 (define_expand "aarch64_sqdmlsl2_n<mode>"
4247   [(match_operand:<VWIDE> 0 "register_operand")
4248    (match_operand:<VWIDE> 1 "register_operand")
4249    (match_operand:VQ_HSI 2 "register_operand")
4250    (match_operand:<VEL> 3 "register_operand")]
4251   "TARGET_SIMD"
4252 {
4253   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4254   emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4255                                                     operands[2], operands[3],
4256                                                     p));
4257   DONE;
4258 })
4259
4260 ;; vqdmull
4261
4262 (define_insn "aarch64_sqdmull<mode>"
4263   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4264         (ss_ashift:<VWIDE>
4265              (mult:<VWIDE>
4266                (sign_extend:<VWIDE>
4267                      (match_operand:VSD_HSI 1 "register_operand" "w"))
4268                (sign_extend:<VWIDE>
4269                      (match_operand:VSD_HSI 2 "register_operand" "w")))
4270              (const_int 1)))]
4271   "TARGET_SIMD"
4272   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4273   [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4274 )
4275
4276 ;; vqdmull_lane
4277
4278 (define_insn "aarch64_sqdmull_lane<mode>"
4279   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4280         (ss_ashift:<VWIDE>
4281              (mult:<VWIDE>
4282                (sign_extend:<VWIDE>
4283                  (match_operand:VD_HSI 1 "register_operand" "w"))
4284                (sign_extend:<VWIDE>
4285                  (vec_duplicate:VD_HSI
4286                    (vec_select:<VEL>
4287                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4288                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4289                ))
4290              (const_int 1)))]
4291   "TARGET_SIMD"
4292   {
4293     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4294     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4295   }
4296   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4297 )
4298
4299 (define_insn "aarch64_sqdmull_laneq<mode>"
4300   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4301         (ss_ashift:<VWIDE>
4302              (mult:<VWIDE>
4303                (sign_extend:<VWIDE>
4304                  (match_operand:VD_HSI 1 "register_operand" "w"))
4305                (sign_extend:<VWIDE>
4306                  (vec_duplicate:VD_HSI
4307                    (vec_select:<VEL>
4308                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4309                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4310                ))
4311              (const_int 1)))]
4312   "TARGET_SIMD"
4313   {
4314     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4315     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4316   }
4317   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4318 )
4319
4320 (define_insn "aarch64_sqdmull_lane<mode>"
4321   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4322         (ss_ashift:<VWIDE>
4323              (mult:<VWIDE>
4324                (sign_extend:<VWIDE>
4325                  (match_operand:SD_HSI 1 "register_operand" "w"))
4326                (sign_extend:<VWIDE>
4327                  (vec_select:<VEL>
4328                    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4329                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4330                ))
4331              (const_int 1)))]
4332   "TARGET_SIMD"
4333   {
4334     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4335     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4336   }
4337   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4338 )
4339
4340 (define_insn "aarch64_sqdmull_laneq<mode>"
4341   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4342         (ss_ashift:<VWIDE>
4343              (mult:<VWIDE>
4344                (sign_extend:<VWIDE>
4345                  (match_operand:SD_HSI 1 "register_operand" "w"))
4346                (sign_extend:<VWIDE>
4347                  (vec_select:<VEL>
4348                    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4349                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4350                ))
4351              (const_int 1)))]
4352   "TARGET_SIMD"
4353   {
4354     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4355     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4356   }
4357   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4358 )
4359
4360 ;; vqdmull_n
4361
4362 (define_insn "aarch64_sqdmull_n<mode>"
4363   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4364         (ss_ashift:<VWIDE>
4365              (mult:<VWIDE>
4366                (sign_extend:<VWIDE>
4367                  (match_operand:VD_HSI 1 "register_operand" "w"))
4368                (sign_extend:<VWIDE>
4369                  (vec_duplicate:VD_HSI
4370                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4371                )
4372              (const_int 1)))]
4373   "TARGET_SIMD"
4374   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4375   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4376 )
4377
4378 ;; vqdmull2
4379
4380
4381
4382 (define_insn "aarch64_sqdmull2<mode>_internal"
4383   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4384         (ss_ashift:<VWIDE>
4385              (mult:<VWIDE>
4386                (sign_extend:<VWIDE>
4387                  (vec_select:<VHALF>
4388                    (match_operand:VQ_HSI 1 "register_operand" "w")
4389                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4390                (sign_extend:<VWIDE>
4391                  (vec_select:<VHALF>
4392                    (match_operand:VQ_HSI 2 "register_operand" "w")
4393                    (match_dup 3)))
4394                )
4395              (const_int 1)))]
4396   "TARGET_SIMD"
4397   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4398   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4399 )
4400
4401 (define_expand "aarch64_sqdmull2<mode>"
4402   [(match_operand:<VWIDE> 0 "register_operand")
4403    (match_operand:VQ_HSI 1 "register_operand")
4404    (match_operand:VQ_HSI 2 "register_operand")]
4405   "TARGET_SIMD"
4406 {
4407   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4408   emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4409                                                   operands[2], p));
4410   DONE;
4411 })
4412
4413 ;; vqdmull2_lane
4414
4415 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4416   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4417         (ss_ashift:<VWIDE>
4418              (mult:<VWIDE>
4419                (sign_extend:<VWIDE>
4420                  (vec_select:<VHALF>
4421                    (match_operand:VQ_HSI 1 "register_operand" "w")
4422                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4423                (sign_extend:<VWIDE>
4424                  (vec_duplicate:<VHALF>
4425                    (vec_select:<VEL>
4426                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4427                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4428                ))
4429              (const_int 1)))]
4430   "TARGET_SIMD"
4431   {
4432     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4433     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4434   }
4435   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4436 )
4437
4438 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4439   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4440         (ss_ashift:<VWIDE>
4441              (mult:<VWIDE>
4442                (sign_extend:<VWIDE>
4443                  (vec_select:<VHALF>
4444                    (match_operand:VQ_HSI 1 "register_operand" "w")
4445                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4446                (sign_extend:<VWIDE>
4447                  (vec_duplicate:<VHALF>
4448                    (vec_select:<VEL>
4449                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4450                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4451                ))
4452              (const_int 1)))]
4453   "TARGET_SIMD"
4454   {
4455     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4456     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4457   }
4458   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4459 )
4460
4461 (define_expand "aarch64_sqdmull2_lane<mode>"
4462   [(match_operand:<VWIDE> 0 "register_operand")
4463    (match_operand:VQ_HSI 1 "register_operand")
4464    (match_operand:<VCOND> 2 "register_operand")
4465    (match_operand:SI 3 "immediate_operand")]
4466   "TARGET_SIMD"
4467 {
4468   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4469   emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4470                                                        operands[2], operands[3],
4471                                                        p));
4472   DONE;
4473 })
4474
4475 (define_expand "aarch64_sqdmull2_laneq<mode>"
4476   [(match_operand:<VWIDE> 0 "register_operand")
4477    (match_operand:VQ_HSI 1 "register_operand")
4478    (match_operand:<VCONQ> 2 "register_operand")
4479    (match_operand:SI 3 "immediate_operand")]
4480   "TARGET_SIMD"
4481 {
4482   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4483   emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4484                                                        operands[2], operands[3],
4485                                                        p));
4486   DONE;
4487 })
4488
4489 ;; vqdmull2_n
4490
4491 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4492   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4493         (ss_ashift:<VWIDE>
4494              (mult:<VWIDE>
4495                (sign_extend:<VWIDE>
4496                  (vec_select:<VHALF>
4497                    (match_operand:VQ_HSI 1 "register_operand" "w")
4498                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4499                (sign_extend:<VWIDE>
4500                  (vec_duplicate:<VHALF>
4501                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4502                )
4503              (const_int 1)))]
4504   "TARGET_SIMD"
4505   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4506   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4507 )
4508
4509 (define_expand "aarch64_sqdmull2_n<mode>"
4510   [(match_operand:<VWIDE> 0 "register_operand")
4511    (match_operand:VQ_HSI 1 "register_operand")
4512    (match_operand:<VEL> 2 "register_operand")]
4513   "TARGET_SIMD"
4514 {
4515   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4516   emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4517                                                     operands[2], p));
4518   DONE;
4519 })
4520
4521 ;; vshl
4522
4523 (define_insn "aarch64_<sur>shl<mode>"
4524   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4525         (unspec:VSDQ_I_DI
4526           [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4527            (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4528          VSHL))]
4529   "TARGET_SIMD"
4530   "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4531   [(set_attr "type" "neon_shift_reg<q>")]
4532 )
4533
4534
4535 ;; vqshl
4536
4537 (define_insn "aarch64_<sur>q<r>shl<mode>"
4538   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4539         (unspec:VSDQ_I
4540           [(match_operand:VSDQ_I 1 "register_operand" "w")
4541            (match_operand:VSDQ_I 2 "register_operand" "w")]
4542          VQSHL))]
4543   "TARGET_SIMD"
4544   "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4545   [(set_attr "type" "neon_sat_shift_reg<q>")]
4546 )
4547
4548 ;; vshll_n
4549
4550 (define_insn "aarch64_<sur>shll_n<mode>"
4551   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4552         (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4553                          (match_operand:SI 2
4554                            "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4555                          VSHLL))]
4556   "TARGET_SIMD"
4557   {
4558     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4559       return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4560     else
4561       return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4562   }
4563   [(set_attr "type" "neon_shift_imm_long")]
4564 )
4565
4566 ;; vshll_high_n
4567
4568 (define_insn "aarch64_<sur>shll2_n<mode>"
4569   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4570         (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4571                          (match_operand:SI 2 "immediate_operand" "i")]
4572                          VSHLL))]
4573   "TARGET_SIMD"
4574   {
4575     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4576       return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4577     else
4578       return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4579   }
4580   [(set_attr "type" "neon_shift_imm_long")]
4581 )
4582
4583 ;; vrshr_n
4584
4585 (define_insn "aarch64_<sur>shr_n<mode>"
4586   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4587         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4588                            (match_operand:SI 2
4589                              "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4590                           VRSHR_N))]
4591   "TARGET_SIMD"
4592   "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4593   [(set_attr "type" "neon_sat_shift_imm<q>")]
4594 )
4595
4596 ;; v(r)sra_n
4597
4598 (define_insn "aarch64_<sur>sra_n<mode>"
4599   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4600         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4601                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4602                        (match_operand:SI 3
4603                          "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4604                       VSRA))]
4605   "TARGET_SIMD"
4606   "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4607   [(set_attr "type" "neon_shift_acc<q>")]
4608 )
4609
4610 ;; vs<lr>i_n
4611
4612 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4613   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4614         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4615                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4616                        (match_operand:SI 3
4617                          "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4618                       VSLRI))]
4619   "TARGET_SIMD"
4620   "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4621   [(set_attr "type" "neon_shift_imm<q>")]
4622 )
4623
4624 ;; vqshl(u)
4625
4626 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4627   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4628         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4629                        (match_operand:SI 2
4630                          "aarch64_simd_shift_imm_<ve_mode>" "i")]
4631                       VQSHL_N))]
4632   "TARGET_SIMD"
4633   "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4634   [(set_attr "type" "neon_sat_shift_imm<q>")]
4635 )
4636
4637
4638 ;; vq(r)shr(u)n_n
4639
4640 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4641   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4642         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4643                             (match_operand:SI 2
4644                               "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4645                            VQSHRN_N))]
4646   "TARGET_SIMD"
4647   "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4648   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4649 )
4650
4651
4652 ;; cm(eq|ge|gt|lt|le)
4653 ;; Note, we have constraints for Dz and Z as different expanders
4654 ;; have different ideas of what should be passed to this pattern.
4655
4656 (define_insn "aarch64_cm<optab><mode>"
4657   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4658         (neg:<V_INT_EQUIV>
4659           (COMPARISONS:<V_INT_EQUIV>
4660             (match_operand:VDQ_I 1 "register_operand" "w,w")
4661             (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4662           )))]
4663   "TARGET_SIMD"
4664   "@
4665   cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4666   cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4667   [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4668 )
4669
4670 (define_insn_and_split "aarch64_cm<optab>di"
4671   [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4672         (neg:DI
4673           (COMPARISONS:DI
4674             (match_operand:DI 1 "register_operand" "w,w,r")
4675             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4676           )))
4677      (clobber (reg:CC CC_REGNUM))]
4678   "TARGET_SIMD"
4679   "#"
4680   "&& reload_completed"
4681   [(set (match_operand:DI 0 "register_operand")
4682         (neg:DI
4683           (COMPARISONS:DI
4684             (match_operand:DI 1 "register_operand")
4685             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4686           )))]
4687   {
4688     /* If we are in the general purpose register file,
4689        we split to a sequence of comparison and store.  */
4690     if (GP_REGNUM_P (REGNO (operands[0]))
4691         && GP_REGNUM_P (REGNO (operands[1])))
4692       {
4693         machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4694         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4695         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4696         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4697         DONE;
4698       }
4699     /* Otherwise, we expand to a similar pattern which does not
4700        clobber CC_REGNUM.  */
4701   }
4702   [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4703 )
4704
4705 (define_insn "*aarch64_cm<optab>di"
4706   [(set (match_operand:DI 0 "register_operand" "=w,w")
4707         (neg:DI
4708           (COMPARISONS:DI
4709             (match_operand:DI 1 "register_operand" "w,w")
4710             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4711           )))]
4712   "TARGET_SIMD && reload_completed"
4713   "@
4714   cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4715   cm<optab>\t%d0, %d1, #0"
4716   [(set_attr "type" "neon_compare, neon_compare_zero")]
4717 )
4718
4719 ;; cm(hs|hi)
4720
4721 (define_insn "aarch64_cm<optab><mode>"
4722   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4723         (neg:<V_INT_EQUIV>
4724           (UCOMPARISONS:<V_INT_EQUIV>
4725             (match_operand:VDQ_I 1 "register_operand" "w")
4726             (match_operand:VDQ_I 2 "register_operand" "w")
4727           )))]
4728   "TARGET_SIMD"
4729   "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4730   [(set_attr "type" "neon_compare<q>")]
4731 )
4732
4733 (define_insn_and_split "aarch64_cm<optab>di"
4734   [(set (match_operand:DI 0 "register_operand" "=w,r")
4735         (neg:DI
4736           (UCOMPARISONS:DI
4737             (match_operand:DI 1 "register_operand" "w,r")
4738             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4739           )))
4740     (clobber (reg:CC CC_REGNUM))]
4741   "TARGET_SIMD"
4742   "#"
4743   "&& reload_completed"
4744   [(set (match_operand:DI 0 "register_operand")
4745         (neg:DI
4746           (UCOMPARISONS:DI
4747             (match_operand:DI 1 "register_operand")
4748             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4749           )))]
4750   {
4751     /* If we are in the general purpose register file,
4752        we split to a sequence of comparison and store.  */
4753     if (GP_REGNUM_P (REGNO (operands[0]))
4754         && GP_REGNUM_P (REGNO (operands[1])))
4755       {
4756         machine_mode mode = CCmode;
4757         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4758         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4759         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4760         DONE;
4761       }
4762     /* Otherwise, we expand to a similar pattern which does not
4763        clobber CC_REGNUM.  */
4764   }
4765   [(set_attr "type" "neon_compare,multiple")]
4766 )
4767
4768 (define_insn "*aarch64_cm<optab>di"
4769   [(set (match_operand:DI 0 "register_operand" "=w")
4770         (neg:DI
4771           (UCOMPARISONS:DI
4772             (match_operand:DI 1 "register_operand" "w")
4773             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4774           )))]
4775   "TARGET_SIMD && reload_completed"
4776   "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4777   [(set_attr "type" "neon_compare")]
4778 )
4779
4780 ;; cmtst
4781
4782 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4783 ;; we don't have any insns using ne, and aarch64_vcond outputs
4784 ;; not (neg (eq (and x y) 0))
4785 ;; which is rewritten by simplify_rtx as
4786 ;; plus (eq (and x y) 0) -1.
4787
4788 (define_insn "aarch64_cmtst<mode>"
4789   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4790         (plus:<V_INT_EQUIV>
4791           (eq:<V_INT_EQUIV>
4792             (and:VDQ_I
4793               (match_operand:VDQ_I 1 "register_operand" "w")
4794               (match_operand:VDQ_I 2 "register_operand" "w"))
4795             (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4796           (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4797   ]
4798   "TARGET_SIMD"
4799   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4800   [(set_attr "type" "neon_tst<q>")]
4801 )
4802
4803 (define_insn_and_split "aarch64_cmtstdi"
4804   [(set (match_operand:DI 0 "register_operand" "=w,r")
4805         (neg:DI
4806           (ne:DI
4807             (and:DI
4808               (match_operand:DI 1 "register_operand" "w,r")
4809               (match_operand:DI 2 "register_operand" "w,r"))
4810             (const_int 0))))
4811     (clobber (reg:CC CC_REGNUM))]
4812   "TARGET_SIMD"
4813   "#"
4814   "&& reload_completed"
4815   [(set (match_operand:DI 0 "register_operand")
4816         (neg:DI
4817           (ne:DI
4818             (and:DI
4819               (match_operand:DI 1 "register_operand")
4820               (match_operand:DI 2 "register_operand"))
4821             (const_int 0))))]
4822   {
4823     /* If we are in the general purpose register file,
4824        we split to a sequence of comparison and store.  */
4825     if (GP_REGNUM_P (REGNO (operands[0]))
4826         && GP_REGNUM_P (REGNO (operands[1])))
4827       {
4828         rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4829         machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4830         rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4831         rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4832         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4833         DONE;
4834       }
4835     /* Otherwise, we expand to a similar pattern which does not
4836        clobber CC_REGNUM.  */
4837   }
4838   [(set_attr "type" "neon_tst,multiple")]
4839 )
4840
4841 (define_insn "*aarch64_cmtstdi"
4842   [(set (match_operand:DI 0 "register_operand" "=w")
4843         (neg:DI
4844           (ne:DI
4845             (and:DI
4846               (match_operand:DI 1 "register_operand" "w")
4847               (match_operand:DI 2 "register_operand" "w"))
4848             (const_int 0))))]
4849   "TARGET_SIMD"
4850   "cmtst\t%d0, %d1, %d2"
4851   [(set_attr "type" "neon_tst")]
4852 )
4853
4854 ;; fcm(eq|ge|gt|le|lt)
4855
4856 (define_insn "aarch64_cm<optab><mode>"
4857   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4858         (neg:<V_INT_EQUIV>
4859           (COMPARISONS:<V_INT_EQUIV>
4860             (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4861             (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4862           )))]
4863   "TARGET_SIMD"
4864   "@
4865   fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4866   fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4867   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4868 )
4869
4870 ;; fac(ge|gt)
4871 ;; Note we can also handle what would be fac(le|lt) by
4872 ;; generating fac(ge|gt).
4873
4874 (define_insn "aarch64_fac<optab><mode>"
4875   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4876         (neg:<V_INT_EQUIV>
4877           (FAC_COMPARISONS:<V_INT_EQUIV>
4878             (abs:VHSDF_HSDF
4879               (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4880             (abs:VHSDF_HSDF
4881               (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4882   )))]
4883   "TARGET_SIMD"
4884   "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4885   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4886 )
4887
4888 ;; addp
4889
4890 (define_insn "aarch64_addp<mode>"
4891   [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4892         (unspec:VD_BHSI
4893           [(match_operand:VD_BHSI 1 "register_operand" "w")
4894            (match_operand:VD_BHSI 2 "register_operand" "w")]
4895           UNSPEC_ADDP))]
4896   "TARGET_SIMD"
4897   "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4898   [(set_attr "type" "neon_reduc_add<q>")]
4899 )
4900
4901 (define_insn "aarch64_addpdi"
4902   [(set (match_operand:DI 0 "register_operand" "=w")
4903         (unspec:DI
4904           [(match_operand:V2DI 1 "register_operand" "w")]
4905           UNSPEC_ADDP))]
4906   "TARGET_SIMD"
4907   "addp\t%d0, %1.2d"
4908   [(set_attr "type" "neon_reduc_add")]
4909 )
4910
4911 ;; sqrt
4912
4913 (define_expand "sqrt<mode>2"
4914   [(set (match_operand:VHSDF 0 "register_operand")
4915         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))]
4916   "TARGET_SIMD"
4917 {
4918   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4919     DONE;
4920 })
4921
4922 (define_insn "*sqrt<mode>2"
4923   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4924         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4925   "TARGET_SIMD"
4926   "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4927   [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4928 )
4929
4930 ;; Patterns for vector struct loads and stores.
4931
4932 (define_insn "aarch64_simd_ld2<mode>"
4933   [(set (match_operand:OI 0 "register_operand" "=w")
4934         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4935                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4936                    UNSPEC_LD2))]
4937   "TARGET_SIMD"
4938   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4939   [(set_attr "type" "neon_load2_2reg<q>")]
4940 )
4941
4942 (define_insn "aarch64_simd_ld2r<mode>"
4943   [(set (match_operand:OI 0 "register_operand" "=w")
4944        (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4945                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4946                   UNSPEC_LD2_DUP))]
4947   "TARGET_SIMD"
4948   "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4949   [(set_attr "type" "neon_load2_all_lanes<q>")]
4950 )
4951
4952 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4953   [(set (match_operand:OI 0 "register_operand" "=w")
4954         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4955                     (match_operand:OI 2 "register_operand" "0")
4956                     (match_operand:SI 3 "immediate_operand" "i")
4957                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4958                    UNSPEC_LD2_LANE))]
4959   "TARGET_SIMD"
4960   {
4961     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4962     return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4963   }
4964   [(set_attr "type" "neon_load2_one_lane")]
4965 )
4966
4967 (define_expand "vec_load_lanesoi<mode>"
4968   [(set (match_operand:OI 0 "register_operand")
4969         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand")
4970                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4971                    UNSPEC_LD2))]
4972   "TARGET_SIMD"
4973 {
4974   if (BYTES_BIG_ENDIAN)
4975     {
4976       rtx tmp = gen_reg_rtx (OImode);
4977       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4978       emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4979       emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4980     }
4981   else
4982     emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4983   DONE;
4984 })
4985
4986 (define_insn "aarch64_simd_st2<mode>"
4987   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4988         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4989                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4990                    UNSPEC_ST2))]
4991   "TARGET_SIMD"
4992   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4993   [(set_attr "type" "neon_store2_2reg<q>")]
4994 )
4995
4996 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4997 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4998   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4999         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5000                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5001                     (match_operand:SI 2 "immediate_operand" "i")]
5002                    UNSPEC_ST2_LANE))]
5003   "TARGET_SIMD"
5004   {
5005     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5006     return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
5007   }
5008   [(set_attr "type" "neon_store2_one_lane<q>")]
5009 )
5010
5011 (define_expand "vec_store_lanesoi<mode>"
5012   [(set (match_operand:OI 0 "aarch64_simd_struct_operand")
5013         (unspec:OI [(match_operand:OI 1 "register_operand")
5014                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5015                    UNSPEC_ST2))]
5016   "TARGET_SIMD"
5017 {
5018   if (BYTES_BIG_ENDIAN)
5019     {
5020       rtx tmp = gen_reg_rtx (OImode);
5021       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5022       emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
5023       emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
5024     }
5025   else
5026     emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
5027   DONE;
5028 })
5029
5030 (define_insn "aarch64_simd_ld3<mode>"
5031   [(set (match_operand:CI 0 "register_operand" "=w")
5032         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5033                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5034                    UNSPEC_LD3))]
5035   "TARGET_SIMD"
5036   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5037   [(set_attr "type" "neon_load3_3reg<q>")]
5038 )
5039
5040 (define_insn "aarch64_simd_ld3r<mode>"
5041   [(set (match_operand:CI 0 "register_operand" "=w")
5042        (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5043                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5044                   UNSPEC_LD3_DUP))]
5045   "TARGET_SIMD"
5046   "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5047   [(set_attr "type" "neon_load3_all_lanes<q>")]
5048 )
5049
5050 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
5051   [(set (match_operand:CI 0 "register_operand" "=w")
5052         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5053                     (match_operand:CI 2 "register_operand" "0")
5054                     (match_operand:SI 3 "immediate_operand" "i")
5055                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5056                    UNSPEC_LD3_LANE))]
5057   "TARGET_SIMD"
5058 {
5059     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5060     return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
5061 }
5062   [(set_attr "type" "neon_load3_one_lane")]
5063 )
5064
5065 (define_expand "vec_load_lanesci<mode>"
5066   [(set (match_operand:CI 0 "register_operand")
5067         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand")
5068                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5069                    UNSPEC_LD3))]
5070   "TARGET_SIMD"
5071 {
5072   if (BYTES_BIG_ENDIAN)
5073     {
5074       rtx tmp = gen_reg_rtx (CImode);
5075       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5076       emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
5077       emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
5078     }
5079   else
5080     emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
5081   DONE;
5082 })
5083
5084 (define_insn "aarch64_simd_st3<mode>"
5085   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5086         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
5087                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5088                    UNSPEC_ST3))]
5089   "TARGET_SIMD"
5090   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5091   [(set_attr "type" "neon_store3_3reg<q>")]
5092 )
5093
5094 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5095 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
5096   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5097         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5098                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5099                      (match_operand:SI 2 "immediate_operand" "i")]
5100                     UNSPEC_ST3_LANE))]
5101   "TARGET_SIMD"
5102   {
5103     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5104     return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
5105   }
5106   [(set_attr "type" "neon_store3_one_lane<q>")]
5107 )
5108
5109 (define_expand "vec_store_lanesci<mode>"
5110   [(set (match_operand:CI 0 "aarch64_simd_struct_operand")
5111         (unspec:CI [(match_operand:CI 1 "register_operand")
5112                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5113                    UNSPEC_ST3))]
5114   "TARGET_SIMD"
5115 {
5116   if (BYTES_BIG_ENDIAN)
5117     {
5118       rtx tmp = gen_reg_rtx (CImode);
5119       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5120       emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
5121       emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
5122     }
5123   else
5124     emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
5125   DONE;
5126 })
5127
5128 (define_insn "aarch64_simd_ld4<mode>"
5129   [(set (match_operand:XI 0 "register_operand" "=w")
5130         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5131                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5132                    UNSPEC_LD4))]
5133   "TARGET_SIMD"
5134   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5135   [(set_attr "type" "neon_load4_4reg<q>")]
5136 )
5137
5138 (define_insn "aarch64_simd_ld4r<mode>"
5139   [(set (match_operand:XI 0 "register_operand" "=w")
5140        (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5141                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5142                   UNSPEC_LD4_DUP))]
5143   "TARGET_SIMD"
5144   "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5145   [(set_attr "type" "neon_load4_all_lanes<q>")]
5146 )
5147
5148 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
5149   [(set (match_operand:XI 0 "register_operand" "=w")
5150         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5151                     (match_operand:XI 2 "register_operand" "0")
5152                     (match_operand:SI 3 "immediate_operand" "i")
5153                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5154                    UNSPEC_LD4_LANE))]
5155   "TARGET_SIMD"
5156 {
5157     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5158     return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
5159 }
5160   [(set_attr "type" "neon_load4_one_lane")]
5161 )
5162
5163 (define_expand "vec_load_lanesxi<mode>"
5164   [(set (match_operand:XI 0 "register_operand")
5165         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand")
5166                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5167                    UNSPEC_LD4))]
5168   "TARGET_SIMD"
5169 {
5170   if (BYTES_BIG_ENDIAN)
5171     {
5172       rtx tmp = gen_reg_rtx (XImode);
5173       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5174       emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
5175       emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
5176     }
5177   else
5178     emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
5179   DONE;
5180 })
5181
5182 (define_insn "aarch64_simd_st4<mode>"
5183   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5184         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5185                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5186                    UNSPEC_ST4))]
5187   "TARGET_SIMD"
5188   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5189   [(set_attr "type" "neon_store4_4reg<q>")]
5190 )
5191
5192 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5193 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
5194   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5195         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5196                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5197                      (match_operand:SI 2 "immediate_operand" "i")]
5198                     UNSPEC_ST4_LANE))]
5199   "TARGET_SIMD"
5200   {
5201     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5202     return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5203   }
5204   [(set_attr "type" "neon_store4_one_lane<q>")]
5205 )
5206
5207 (define_expand "vec_store_lanesxi<mode>"
5208   [(set (match_operand:XI 0 "aarch64_simd_struct_operand")
5209         (unspec:XI [(match_operand:XI 1 "register_operand")
5210                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5211                    UNSPEC_ST4))]
5212   "TARGET_SIMD"
5213 {
5214   if (BYTES_BIG_ENDIAN)
5215     {
5216       rtx tmp = gen_reg_rtx (XImode);
5217       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5218       emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5219       emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5220     }
5221   else
5222     emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5223   DONE;
5224 })
5225
5226 (define_insn_and_split "aarch64_rev_reglist<mode>"
5227 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5228         (unspec:VSTRUCT
5229                    [(match_operand:VSTRUCT 1 "register_operand" "w")
5230                     (match_operand:V16QI 2 "register_operand" "w")]
5231                    UNSPEC_REV_REGLIST))]
5232   "TARGET_SIMD"
5233   "#"
5234   "&& reload_completed"
5235   [(const_int 0)]
5236 {
5237   int i;
5238   int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5239   for (i = 0; i < nregs; i++)
5240     {
5241       rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5242       rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5243       emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5244     }
5245   DONE;
5246 }
5247   [(set_attr "type" "neon_tbl1_q")
5248    (set_attr "length" "<insn_count>")]
5249 )
5250
5251 ;; Reload patterns for AdvSIMD register list operands.
5252
5253 (define_expand "mov<mode>"
5254   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
5255         (match_operand:VSTRUCT 1 "general_operand"))]
5256   "TARGET_SIMD"
5257 {
5258   if (can_create_pseudo_p ())
5259     {
5260       if (GET_CODE (operands[0]) != REG)
5261         operands[1] = force_reg (<MODE>mode, operands[1]);
5262     }
5263 })
5264
5265
5266 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5267   [(match_operand:CI 0 "register_operand")
5268    (match_operand:DI 1 "register_operand")
5269    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5270   "TARGET_SIMD"
5271 {
5272   rtx mem = gen_rtx_MEM (CImode, operands[1]);
5273   emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5274   DONE;
5275 })
5276
5277 (define_insn "aarch64_ld1_x3_<mode>"
5278   [(set (match_operand:CI 0 "register_operand" "=w")
5279         (unspec:CI
5280           [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5281            (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5282   "TARGET_SIMD"
5283   "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5284   [(set_attr "type" "neon_load1_3reg<q>")]
5285 )
5286
5287 (define_expand "aarch64_ld1x4<VALLDIF:mode>"
5288   [(match_operand:XI 0 "register_operand" "=w")
5289    (match_operand:DI 1 "register_operand" "r")
5290    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5291   "TARGET_SIMD"
5292 {
5293   rtx mem = gen_rtx_MEM (XImode, operands[1]);
5294   emit_insn (gen_aarch64_ld1_x4_<VALLDIF:mode> (operands[0], mem));
5295   DONE;
5296 })
5297
5298 (define_insn "aarch64_ld1_x4_<mode>"
5299   [(set (match_operand:XI 0 "register_operand" "=w")
5300         (unspec:XI
5301           [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5302            (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)]
5303         UNSPEC_LD1))]
5304   "TARGET_SIMD"
5305   "ld1\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5306   [(set_attr "type" "neon_load1_4reg<q>")]
5307 )
5308
5309 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5310   [(match_operand:DI 0 "register_operand")
5311    (match_operand:OI 1 "register_operand")
5312    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5313   "TARGET_SIMD"
5314 {
5315   rtx mem = gen_rtx_MEM (OImode, operands[0]);
5316   emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5317   DONE;
5318 })
5319
5320 (define_insn "aarch64_st1_x2_<mode>"
5321    [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5322          (unspec:OI
5323           [(match_operand:OI 1 "register_operand" "w")
5324           (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5325   "TARGET_SIMD"
5326   "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5327   [(set_attr "type" "neon_store1_2reg<q>")]
5328 )
5329
5330 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5331   [(match_operand:DI 0 "register_operand")
5332    (match_operand:CI 1 "register_operand")
5333    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5334   "TARGET_SIMD"
5335 {
5336   rtx mem = gen_rtx_MEM (CImode, operands[0]);
5337   emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5338   DONE;
5339 })
5340
5341 (define_insn "aarch64_st1_x3_<mode>"
5342    [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5343         (unspec:CI
5344          [(match_operand:CI 1 "register_operand" "w")
5345           (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5346   "TARGET_SIMD"
5347   "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5348   [(set_attr "type" "neon_store1_3reg<q>")]
5349 )
5350
5351 (define_expand "aarch64_st1x4<VALLDIF:mode>"
5352   [(match_operand:DI 0 "register_operand" "")
5353    (match_operand:XI 1 "register_operand" "")
5354    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5355   "TARGET_SIMD"
5356 {
5357   rtx mem = gen_rtx_MEM (XImode, operands[0]);
5358   emit_insn (gen_aarch64_st1_x4_<VALLDIF:mode> (mem, operands[1]));
5359   DONE;
5360 })
5361
5362 (define_insn "aarch64_st1_x4_<mode>"
5363   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5364         (unspec:XI
5365            [(match_operand:XI 1 "register_operand" "w")
5366            (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)]
5367         UNSPEC_ST1))]
5368   "TARGET_SIMD"
5369   "st1\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5370   [(set_attr "type" "neon_store1_4reg<q>")]
5371 )
5372
5373 (define_insn "*aarch64_mov<mode>"
5374   [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5375         (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5376   "TARGET_SIMD && !BYTES_BIG_ENDIAN
5377    && (register_operand (operands[0], <MODE>mode)
5378        || register_operand (operands[1], <MODE>mode))"
5379   "@
5380    #
5381    st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5382    ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5383   [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5384                      neon_load<nregs>_<nregs>reg_q")
5385    (set_attr "length" "<insn_count>,4,4")]
5386 )
5387
5388 (define_insn "aarch64_be_ld1<mode>"
5389   [(set (match_operand:VALLDI_F16 0     "register_operand" "=w")
5390         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5391                              "aarch64_simd_struct_operand" "Utv")]
5392         UNSPEC_LD1))]
5393   "TARGET_SIMD"
5394   "ld1\\t{%0<Vmtype>}, %1"
5395   [(set_attr "type" "neon_load1_1reg<q>")]
5396 )
5397
5398 (define_insn "aarch64_be_st1<mode>"
5399   [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5400         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5401         UNSPEC_ST1))]
5402   "TARGET_SIMD"
5403   "st1\\t{%1<Vmtype>}, %0"
5404   [(set_attr "type" "neon_store1_1reg<q>")]
5405 )
5406
5407 (define_insn "*aarch64_be_movoi"
5408   [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5409         (match_operand:OI 1 "general_operand"      " w,w,m"))]
5410   "TARGET_SIMD && BYTES_BIG_ENDIAN
5411    && (register_operand (operands[0], OImode)
5412        || register_operand (operands[1], OImode))"
5413   "@
5414    #
5415    stp\\t%q1, %R1, %0
5416    ldp\\t%q0, %R0, %1"
5417   [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5418    (set_attr "length" "8,4,4")]
5419 )
5420
5421 (define_insn "*aarch64_be_movci"
5422   [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5423         (match_operand:CI 1 "general_operand"      " w,w,o"))]
5424   "TARGET_SIMD && BYTES_BIG_ENDIAN
5425    && (register_operand (operands[0], CImode)
5426        || register_operand (operands[1], CImode))"
5427   "#"
5428   [(set_attr "type" "multiple")
5429    (set_attr "length" "12,4,4")]
5430 )
5431
5432 (define_insn "*aarch64_be_movxi"
5433   [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5434         (match_operand:XI 1 "general_operand"      " w,w,o"))]
5435   "TARGET_SIMD && BYTES_BIG_ENDIAN
5436    && (register_operand (operands[0], XImode)
5437        || register_operand (operands[1], XImode))"
5438   "#"
5439   [(set_attr "type" "multiple")
5440    (set_attr "length" "16,4,4")]
5441 )
5442
5443 (define_split
5444   [(set (match_operand:OI 0 "register_operand")
5445         (match_operand:OI 1 "register_operand"))]
5446   "TARGET_SIMD && reload_completed"
5447   [(const_int 0)]
5448 {
5449   aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5450   DONE;
5451 })
5452
5453 (define_split
5454   [(set (match_operand:CI 0 "nonimmediate_operand")
5455         (match_operand:CI 1 "general_operand"))]
5456   "TARGET_SIMD && reload_completed"
5457   [(const_int 0)]
5458 {
5459   if (register_operand (operands[0], CImode)
5460       && register_operand (operands[1], CImode))
5461     {
5462       aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5463       DONE;
5464     }
5465   else if (BYTES_BIG_ENDIAN)
5466     {
5467       emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5468                       simplify_gen_subreg (OImode, operands[1], CImode, 0));
5469       emit_move_insn (gen_lowpart (V16QImode,
5470                                    simplify_gen_subreg (TImode, operands[0],
5471                                                         CImode, 32)),
5472                       gen_lowpart (V16QImode,
5473                                    simplify_gen_subreg (TImode, operands[1],
5474                                                         CImode, 32)));
5475       DONE;
5476     }
5477   else
5478     FAIL;
5479 })
5480
5481 (define_split
5482   [(set (match_operand:XI 0 "nonimmediate_operand")
5483         (match_operand:XI 1 "general_operand"))]
5484   "TARGET_SIMD && reload_completed"
5485   [(const_int 0)]
5486 {
5487   if (register_operand (operands[0], XImode)
5488       && register_operand (operands[1], XImode))
5489     {
5490       aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5491       DONE;
5492     }
5493   else if (BYTES_BIG_ENDIAN)
5494     {
5495       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5496                       simplify_gen_subreg (OImode, operands[1], XImode, 0));
5497       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5498                       simplify_gen_subreg (OImode, operands[1], XImode, 32));
5499       DONE;
5500     }
5501   else
5502     FAIL;
5503 })
5504
5505 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5506   [(match_operand:VSTRUCT 0 "register_operand")
5507    (match_operand:DI 1 "register_operand")
5508    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5509   "TARGET_SIMD"
5510 {
5511   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5512   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5513                      * <VSTRUCT:nregs>);
5514
5515   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5516                                                                 mem));
5517   DONE;
5518 })
5519
5520 (define_insn "aarch64_ld2<mode>_dreg"
5521   [(set (match_operand:OI 0 "register_operand" "=w")
5522         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5523                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5524                    UNSPEC_LD2_DREG))]
5525   "TARGET_SIMD"
5526   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5527   [(set_attr "type" "neon_load2_2reg<q>")]
5528 )
5529
5530 (define_insn "aarch64_ld2<mode>_dreg"
5531   [(set (match_operand:OI 0 "register_operand" "=w")
5532         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5533                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5534                    UNSPEC_LD2_DREG))]
5535   "TARGET_SIMD"
5536   "ld1\\t{%S0.1d - %T0.1d}, %1"
5537   [(set_attr "type" "neon_load1_2reg<q>")]
5538 )
5539
5540 (define_insn "aarch64_ld3<mode>_dreg"
5541   [(set (match_operand:CI 0 "register_operand" "=w")
5542         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5543                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5544                    UNSPEC_LD3_DREG))]
5545   "TARGET_SIMD"
5546   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5547   [(set_attr "type" "neon_load3_3reg<q>")]
5548 )
5549
5550 (define_insn "aarch64_ld3<mode>_dreg"
5551   [(set (match_operand:CI 0 "register_operand" "=w")
5552         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5553                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5554                    UNSPEC_LD3_DREG))]
5555   "TARGET_SIMD"
5556   "ld1\\t{%S0.1d - %U0.1d}, %1"
5557   [(set_attr "type" "neon_load1_3reg<q>")]
5558 )
5559
5560 (define_insn "aarch64_ld4<mode>_dreg"
5561   [(set (match_operand:XI 0 "register_operand" "=w")
5562         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5563                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5564                    UNSPEC_LD4_DREG))]
5565   "TARGET_SIMD"
5566   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5567   [(set_attr "type" "neon_load4_4reg<q>")]
5568 )
5569
5570 (define_insn "aarch64_ld4<mode>_dreg"
5571   [(set (match_operand:XI 0 "register_operand" "=w")
5572         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5573                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5574                    UNSPEC_LD4_DREG))]
5575   "TARGET_SIMD"
5576   "ld1\\t{%S0.1d - %V0.1d}, %1"
5577   [(set_attr "type" "neon_load1_4reg<q>")]
5578 )
5579
5580 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5581  [(match_operand:VSTRUCT 0 "register_operand")
5582   (match_operand:DI 1 "register_operand")
5583   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5584   "TARGET_SIMD"
5585 {
5586   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5587   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5588
5589   emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5590   DONE;
5591 })
5592
5593 (define_expand "aarch64_ld1<VALL_F16:mode>"
5594  [(match_operand:VALL_F16 0 "register_operand")
5595   (match_operand:DI 1 "register_operand")]
5596   "TARGET_SIMD"
5597 {
5598   machine_mode mode = <VALL_F16:MODE>mode;
5599   rtx mem = gen_rtx_MEM (mode, operands[1]);
5600
5601   if (BYTES_BIG_ENDIAN)
5602     emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5603   else
5604     emit_move_insn (operands[0], mem);
5605   DONE;
5606 })
5607
5608 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5609  [(match_operand:VSTRUCT 0 "register_operand")
5610   (match_operand:DI 1 "register_operand")
5611   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5612   "TARGET_SIMD"
5613 {
5614   machine_mode mode = <VSTRUCT:MODE>mode;
5615   rtx mem = gen_rtx_MEM (mode, operands[1]);
5616
5617   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5618   DONE;
5619 })
5620
5621 (define_expand "aarch64_ld1x2<VQ:mode>"
5622  [(match_operand:OI 0 "register_operand")
5623   (match_operand:DI 1 "register_operand")
5624   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5625   "TARGET_SIMD"
5626 {
5627   machine_mode mode = OImode;
5628   rtx mem = gen_rtx_MEM (mode, operands[1]);
5629
5630   emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5631   DONE;
5632 })
5633
5634 (define_expand "aarch64_ld1x2<VDC:mode>"
5635  [(match_operand:OI 0 "register_operand")
5636   (match_operand:DI 1 "register_operand")
5637   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5638   "TARGET_SIMD"
5639 {
5640   machine_mode mode = OImode;
5641   rtx mem = gen_rtx_MEM (mode, operands[1]);
5642
5643   emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5644   DONE;
5645 })
5646
5647
5648 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5649   [(match_operand:VSTRUCT 0 "register_operand")
5650         (match_operand:DI 1 "register_operand")
5651         (match_operand:VSTRUCT 2 "register_operand")
5652         (match_operand:SI 3 "immediate_operand")
5653         (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5654   "TARGET_SIMD"
5655 {
5656   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5657   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5658                      * <VSTRUCT:nregs>);
5659
5660   aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5661   emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5662         operands[0], mem, operands[2], operands[3]));
5663   DONE;
5664 })
5665
5666 ;; Expanders for builtins to extract vector registers from large
5667 ;; opaque integer modes.
5668
5669 ;; D-register list.
5670
5671 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5672  [(match_operand:VDC 0 "register_operand")
5673   (match_operand:VSTRUCT 1 "register_operand")
5674   (match_operand:SI 2 "immediate_operand")]
5675   "TARGET_SIMD"
5676 {
5677   int part = INTVAL (operands[2]);
5678   rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5679   int offset = part * 16;
5680
5681   emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5682   emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5683   DONE;
5684 })
5685
5686 ;; Q-register list.
5687
5688 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5689  [(match_operand:VQ 0 "register_operand")
5690   (match_operand:VSTRUCT 1 "register_operand")
5691   (match_operand:SI 2 "immediate_operand")]
5692   "TARGET_SIMD"
5693 {
5694   int part = INTVAL (operands[2]);
5695   int offset = part * 16;
5696
5697   emit_move_insn (operands[0],
5698                   gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5699   DONE;
5700 })
5701
5702 ;; Permuted-store expanders for neon intrinsics.
5703
5704 ;; Permute instructions
5705
5706 ;; vec_perm support
5707
5708 (define_expand "vec_perm<mode>"
5709   [(match_operand:VB 0 "register_operand")
5710    (match_operand:VB 1 "register_operand")
5711    (match_operand:VB 2 "register_operand")
5712    (match_operand:VB 3 "register_operand")]
5713   "TARGET_SIMD"
5714 {
5715   aarch64_expand_vec_perm (operands[0], operands[1],
5716                            operands[2], operands[3], <nunits>);
5717   DONE;
5718 })
5719
5720 (define_insn "aarch64_tbl1<mode>"
5721   [(set (match_operand:VB 0 "register_operand" "=w")
5722         (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5723                     (match_operand:VB 2 "register_operand" "w")]
5724                    UNSPEC_TBL))]
5725   "TARGET_SIMD"
5726   "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5727   [(set_attr "type" "neon_tbl1<q>")]
5728 )
5729
5730 ;; Two source registers.
5731
5732 (define_insn "aarch64_tbl2v16qi"
5733   [(set (match_operand:V16QI 0 "register_operand" "=w")
5734         (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5735                        (match_operand:V16QI 2 "register_operand" "w")]
5736                       UNSPEC_TBL))]
5737   "TARGET_SIMD"
5738   "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5739   [(set_attr "type" "neon_tbl2_q")]
5740 )
5741
5742 (define_insn "aarch64_tbl3<mode>"
5743   [(set (match_operand:VB 0 "register_operand" "=w")
5744         (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5745                       (match_operand:VB 2 "register_operand" "w")]
5746                       UNSPEC_TBL))]
5747   "TARGET_SIMD"
5748   "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5749   [(set_attr "type" "neon_tbl3")]
5750 )
5751
5752 (define_insn "aarch64_tbx4<mode>"
5753   [(set (match_operand:VB 0 "register_operand" "=w")
5754         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5755                       (match_operand:OI 2 "register_operand" "w")
5756                       (match_operand:VB 3 "register_operand" "w")]
5757                       UNSPEC_TBX))]
5758   "TARGET_SIMD"
5759   "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5760   [(set_attr "type" "neon_tbl4")]
5761 )
5762
5763 ;; Three source registers.
5764
5765 (define_insn "aarch64_qtbl3<mode>"
5766   [(set (match_operand:VB 0 "register_operand" "=w")
5767         (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5768                       (match_operand:VB 2 "register_operand" "w")]
5769                       UNSPEC_TBL))]
5770   "TARGET_SIMD"
5771   "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5772   [(set_attr "type" "neon_tbl3")]
5773 )
5774
5775 (define_insn "aarch64_qtbx3<mode>"
5776   [(set (match_operand:VB 0 "register_operand" "=w")
5777         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5778                       (match_operand:CI 2 "register_operand" "w")
5779                       (match_operand:VB 3 "register_operand" "w")]
5780                       UNSPEC_TBX))]
5781   "TARGET_SIMD"
5782   "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5783   [(set_attr "type" "neon_tbl3")]
5784 )
5785
5786 ;; Four source registers.
5787
5788 (define_insn "aarch64_qtbl4<mode>"
5789   [(set (match_operand:VB 0 "register_operand" "=w")
5790         (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5791                       (match_operand:VB 2 "register_operand" "w")]
5792                       UNSPEC_TBL))]
5793   "TARGET_SIMD"
5794   "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5795   [(set_attr "type" "neon_tbl4")]
5796 )
5797
5798 (define_insn "aarch64_qtbx4<mode>"
5799   [(set (match_operand:VB 0 "register_operand" "=w")
5800         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5801                       (match_operand:XI 2 "register_operand" "w")
5802                       (match_operand:VB 3 "register_operand" "w")]
5803                       UNSPEC_TBX))]
5804   "TARGET_SIMD"
5805   "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5806   [(set_attr "type" "neon_tbl4")]
5807 )
5808
5809 (define_insn_and_split "aarch64_combinev16qi"
5810   [(set (match_operand:OI 0 "register_operand" "=w")
5811         (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5812                     (match_operand:V16QI 2 "register_operand" "w")]
5813                    UNSPEC_CONCAT))]
5814   "TARGET_SIMD"
5815   "#"
5816   "&& reload_completed"
5817   [(const_int 0)]
5818 {
5819   aarch64_split_combinev16qi (operands);
5820   DONE;
5821 }
5822 [(set_attr "type" "multiple")]
5823 )
5824
5825 ;; This instruction's pattern is generated directly by
5826 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5827 ;; need corresponding changes there.
5828 (define_insn "aarch64_<PERMUTE:perm_insn><mode>"
5829   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5830         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5831                           (match_operand:VALL_F16 2 "register_operand" "w")]
5832          PERMUTE))]
5833   "TARGET_SIMD"
5834   "<PERMUTE:perm_insn>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5835   [(set_attr "type" "neon_permute<q>")]
5836 )
5837
5838 ;; This instruction's pattern is generated directly by
5839 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5840 ;; need corresponding changes there.  Note that the immediate (third)
5841 ;; operand is a lane index not a byte index.
5842 (define_insn "aarch64_ext<mode>"
5843   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5844         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5845                           (match_operand:VALL_F16 2 "register_operand" "w")
5846                           (match_operand:SI 3 "immediate_operand" "i")]
5847          UNSPEC_EXT))]
5848   "TARGET_SIMD"
5849 {
5850   operands[3] = GEN_INT (INTVAL (operands[3])
5851       * GET_MODE_UNIT_SIZE (<MODE>mode));
5852   return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5853 }
5854   [(set_attr "type" "neon_ext<q>")]
5855 )
5856
5857 ;; This instruction's pattern is generated directly by
5858 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5859 ;; need corresponding changes there.
5860 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5861   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5862         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5863                     REVERSE))]
5864   "TARGET_SIMD"
5865   "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5866   [(set_attr "type" "neon_rev<q>")]
5867 )
5868
5869 (define_insn "aarch64_st2<mode>_dreg"
5870   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5871         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5872                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5873                    UNSPEC_ST2))]
5874   "TARGET_SIMD"
5875   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5876   [(set_attr "type" "neon_store2_2reg")]
5877 )
5878
5879 (define_insn "aarch64_st2<mode>_dreg"
5880   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5881         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5882                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5883                    UNSPEC_ST2))]
5884   "TARGET_SIMD"
5885   "st1\\t{%S1.1d - %T1.1d}, %0"
5886   [(set_attr "type" "neon_store1_2reg")]
5887 )
5888
5889 (define_insn "aarch64_st3<mode>_dreg"
5890   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5891         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5892                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5893                    UNSPEC_ST3))]
5894   "TARGET_SIMD"
5895   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5896   [(set_attr "type" "neon_store3_3reg")]
5897 )
5898
5899 (define_insn "aarch64_st3<mode>_dreg"
5900   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5901         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5902                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5903                    UNSPEC_ST3))]
5904   "TARGET_SIMD"
5905   "st1\\t{%S1.1d - %U1.1d}, %0"
5906   [(set_attr "type" "neon_store1_3reg")]
5907 )
5908
5909 (define_insn "aarch64_st4<mode>_dreg"
5910   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5911         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5912                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5913                    UNSPEC_ST4))]
5914   "TARGET_SIMD"
5915   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5916   [(set_attr "type" "neon_store4_4reg")]
5917 )
5918
5919 (define_insn "aarch64_st4<mode>_dreg"
5920   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5921         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5922                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5923                    UNSPEC_ST4))]
5924   "TARGET_SIMD"
5925   "st1\\t{%S1.1d - %V1.1d}, %0"
5926   [(set_attr "type" "neon_store1_4reg")]
5927 )
5928
5929 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5930  [(match_operand:DI 0 "register_operand")
5931   (match_operand:VSTRUCT 1 "register_operand")
5932   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5933   "TARGET_SIMD"
5934 {
5935   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5936   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5937
5938   emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5939   DONE;
5940 })
5941
5942 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5943  [(match_operand:DI 0 "register_operand")
5944   (match_operand:VSTRUCT 1 "register_operand")
5945   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5946   "TARGET_SIMD"
5947 {
5948   machine_mode mode = <VSTRUCT:MODE>mode;
5949   rtx mem = gen_rtx_MEM (mode, operands[0]);
5950
5951   emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5952   DONE;
5953 })
5954
5955 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5956  [(match_operand:DI 0 "register_operand")
5957   (match_operand:VSTRUCT 1 "register_operand")
5958   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5959   (match_operand:SI 2 "immediate_operand")]
5960   "TARGET_SIMD"
5961 {
5962   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5963   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5964                      * <VSTRUCT:nregs>);
5965
5966   emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5967                 mem, operands[1], operands[2]));
5968   DONE;
5969 })
5970
5971 (define_expand "aarch64_st1<VALL_F16:mode>"
5972  [(match_operand:DI 0 "register_operand")
5973   (match_operand:VALL_F16 1 "register_operand")]
5974   "TARGET_SIMD"
5975 {
5976   machine_mode mode = <VALL_F16:MODE>mode;
5977   rtx mem = gen_rtx_MEM (mode, operands[0]);
5978
5979   if (BYTES_BIG_ENDIAN)
5980     emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5981   else
5982     emit_move_insn (mem, operands[1]);
5983   DONE;
5984 })
5985
5986 ;; Expander for builtins to insert vector registers into large
5987 ;; opaque integer modes.
5988
5989 ;; Q-register list.  We don't need a D-reg inserter as we zero
5990 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5991
5992 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5993  [(match_operand:VSTRUCT 0 "register_operand")
5994   (match_operand:VSTRUCT 1 "register_operand")
5995   (match_operand:VQ 2 "register_operand")
5996   (match_operand:SI 3 "immediate_operand")]
5997   "TARGET_SIMD"
5998 {
5999   int part = INTVAL (operands[3]);
6000   int offset = part * 16;
6001
6002   emit_move_insn (operands[0], operands[1]);
6003   emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
6004                   operands[2]);
6005   DONE;
6006 })
6007
6008 ;; Standard pattern name vec_init<mode><Vel>.
6009
6010 (define_expand "vec_init<mode><Vel>"
6011   [(match_operand:VALL_F16 0 "register_operand")
6012    (match_operand 1 "" "")]
6013   "TARGET_SIMD"
6014 {
6015   aarch64_expand_vector_init (operands[0], operands[1]);
6016   DONE;
6017 })
6018
6019 (define_expand "vec_init<mode><Vhalf>"
6020   [(match_operand:VQ_NO2E 0 "register_operand")
6021    (match_operand 1 "" "")]
6022   "TARGET_SIMD"
6023 {
6024   aarch64_expand_vector_init (operands[0], operands[1]);
6025   DONE;
6026 })
6027
6028 (define_insn "*aarch64_simd_ld1r<mode>"
6029   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
6030         (vec_duplicate:VALL_F16
6031           (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
6032   "TARGET_SIMD"
6033   "ld1r\\t{%0.<Vtype>}, %1"
6034   [(set_attr "type" "neon_load1_all_lanes")]
6035 )
6036
6037 (define_insn "aarch64_simd_ld1<mode>_x2"
6038   [(set (match_operand:OI 0 "register_operand" "=w")
6039         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
6040                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6041                    UNSPEC_LD1))]
6042   "TARGET_SIMD"
6043   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
6044   [(set_attr "type" "neon_load1_2reg<q>")]
6045 )
6046
6047 (define_insn "aarch64_simd_ld1<mode>_x2"
6048   [(set (match_operand:OI 0 "register_operand" "=w")
6049         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
6050                     (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6051                    UNSPEC_LD1))]
6052   "TARGET_SIMD"
6053   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
6054   [(set_attr "type" "neon_load1_2reg<q>")]
6055 )
6056
6057
6058 (define_insn "@aarch64_frecpe<mode>"
6059   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
6060         (unspec:VHSDF_HSDF
6061          [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
6062          UNSPEC_FRECPE))]
6063   "TARGET_SIMD"
6064   "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
6065   [(set_attr "type" "neon_fp_recpe_<stype><q>")]
6066 )
6067
6068 (define_insn "aarch64_frecpx<mode>"
6069   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
6070         (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
6071          UNSPEC_FRECPX))]
6072   "TARGET_SIMD"
6073   "frecpx\t%<s>0, %<s>1"
6074   [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
6075 )
6076
6077 (define_insn "@aarch64_frecps<mode>"
6078   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
6079         (unspec:VHSDF_HSDF
6080           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
6081           (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
6082           UNSPEC_FRECPS))]
6083   "TARGET_SIMD"
6084   "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6085   [(set_attr "type" "neon_fp_recps_<stype><q>")]
6086 )
6087
6088 (define_insn "aarch64_urecpe<mode>"
6089   [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
6090         (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
6091                 UNSPEC_URECPE))]
6092  "TARGET_SIMD"
6093  "urecpe\\t%0.<Vtype>, %1.<Vtype>"
6094   [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
6095
6096 ;; Standard pattern name vec_extract<mode><Vel>.
6097
6098 (define_expand "vec_extract<mode><Vel>"
6099   [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
6100    (match_operand:VALL_F16 1 "register_operand")
6101    (match_operand:SI 2 "immediate_operand")]
6102   "TARGET_SIMD"
6103 {
6104     emit_insn
6105       (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
6106     DONE;
6107 })
6108
6109 ;; aes
6110
6111 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
6112   [(set (match_operand:V16QI 0 "register_operand" "=w")
6113         (unspec:V16QI
6114                 [(xor:V16QI
6115                  (match_operand:V16QI 1 "register_operand" "%0")
6116                  (match_operand:V16QI 2 "register_operand" "w"))]
6117          CRYPTO_AES))]
6118   "TARGET_SIMD && TARGET_AES"
6119   "aes<aes_op>\\t%0.16b, %2.16b"
6120   [(set_attr "type" "crypto_aese")]
6121 )
6122
6123 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
6124   [(set (match_operand:V16QI 0 "register_operand" "=w")
6125         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
6126          CRYPTO_AESMC))]
6127   "TARGET_SIMD && TARGET_AES"
6128   "aes<aesmc_op>\\t%0.16b, %1.16b"
6129   [(set_attr "type" "crypto_aesmc")]
6130 )
6131
6132 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
6133 ;; and enforce the register dependency without scheduling or register
6134 ;; allocation messing up the order or introducing moves inbetween.
6135 ;;  Mash the two together during combine.
6136
6137 (define_insn "*aarch64_crypto_aese_fused"
6138   [(set (match_operand:V16QI 0 "register_operand" "=w")
6139         (unspec:V16QI
6140           [(unspec:V16QI
6141            [(xor:V16QI
6142                 (match_operand:V16QI 1 "register_operand" "%0")
6143                 (match_operand:V16QI 2 "register_operand" "w"))]
6144              UNSPEC_AESE)]
6145         UNSPEC_AESMC))]
6146   "TARGET_SIMD && TARGET_AES
6147    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6148   "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
6149   [(set_attr "type" "crypto_aese")
6150    (set_attr "length" "8")]
6151 )
6152
6153 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
6154 ;; and enforce the register dependency without scheduling or register
6155 ;; allocation messing up the order or introducing moves inbetween.
6156 ;;  Mash the two together during combine.
6157
6158 (define_insn "*aarch64_crypto_aesd_fused"
6159   [(set (match_operand:V16QI 0 "register_operand" "=w")
6160         (unspec:V16QI
6161           [(unspec:V16QI
6162                     [(xor:V16QI
6163                         (match_operand:V16QI 1 "register_operand" "%0")
6164                         (match_operand:V16QI 2 "register_operand" "w"))]
6165                 UNSPEC_AESD)]
6166           UNSPEC_AESIMC))]
6167   "TARGET_SIMD && TARGET_AES
6168    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6169   "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
6170   [(set_attr "type" "crypto_aese")
6171    (set_attr "length" "8")]
6172 )
6173
6174 ;; sha1
6175
6176 (define_insn "aarch64_crypto_sha1hsi"
6177   [(set (match_operand:SI 0 "register_operand" "=w")
6178         (unspec:SI [(match_operand:SI 1
6179                        "register_operand" "w")]
6180          UNSPEC_SHA1H))]
6181   "TARGET_SIMD && TARGET_SHA2"
6182   "sha1h\\t%s0, %s1"
6183   [(set_attr "type" "crypto_sha1_fast")]
6184 )
6185
6186 (define_insn "aarch64_crypto_sha1hv4si"
6187   [(set (match_operand:SI 0 "register_operand" "=w")
6188         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6189                      (parallel [(const_int 0)]))]
6190          UNSPEC_SHA1H))]
6191   "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
6192   "sha1h\\t%s0, %s1"
6193   [(set_attr "type" "crypto_sha1_fast")]
6194 )
6195
6196 (define_insn "aarch64_be_crypto_sha1hv4si"
6197   [(set (match_operand:SI 0 "register_operand" "=w")
6198         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6199                      (parallel [(const_int 3)]))]
6200          UNSPEC_SHA1H))]
6201   "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
6202   "sha1h\\t%s0, %s1"
6203   [(set_attr "type" "crypto_sha1_fast")]
6204 )
6205
6206 (define_insn "aarch64_crypto_sha1su1v4si"
6207   [(set (match_operand:V4SI 0 "register_operand" "=w")
6208         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6209                       (match_operand:V4SI 2 "register_operand" "w")]
6210          UNSPEC_SHA1SU1))]
6211   "TARGET_SIMD && TARGET_SHA2"
6212   "sha1su1\\t%0.4s, %2.4s"
6213   [(set_attr "type" "crypto_sha1_fast")]
6214 )
6215
6216 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
6217   [(set (match_operand:V4SI 0 "register_operand" "=w")
6218         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6219                       (match_operand:SI 2 "register_operand" "w")
6220                       (match_operand:V4SI 3 "register_operand" "w")]
6221          CRYPTO_SHA1))]
6222   "TARGET_SIMD && TARGET_SHA2"
6223   "sha1<sha1_op>\\t%q0, %s2, %3.4s"
6224   [(set_attr "type" "crypto_sha1_slow")]
6225 )
6226
6227 (define_insn "aarch64_crypto_sha1su0v4si"
6228   [(set (match_operand:V4SI 0 "register_operand" "=w")
6229         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6230                       (match_operand:V4SI 2 "register_operand" "w")
6231                       (match_operand:V4SI 3 "register_operand" "w")]
6232          UNSPEC_SHA1SU0))]
6233   "TARGET_SIMD && TARGET_SHA2"
6234   "sha1su0\\t%0.4s, %2.4s, %3.4s"
6235   [(set_attr "type" "crypto_sha1_xor")]
6236 )
6237
6238 ;; sha256
6239
6240 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6241   [(set (match_operand:V4SI 0 "register_operand" "=w")
6242         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6243                       (match_operand:V4SI 2 "register_operand" "w")
6244                       (match_operand:V4SI 3 "register_operand" "w")]
6245          CRYPTO_SHA256))]
6246   "TARGET_SIMD && TARGET_SHA2"
6247   "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
6248   [(set_attr "type" "crypto_sha256_slow")]
6249 )
6250
6251 (define_insn "aarch64_crypto_sha256su0v4si"
6252   [(set (match_operand:V4SI 0 "register_operand" "=w")
6253         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6254                       (match_operand:V4SI 2 "register_operand" "w")]
6255          UNSPEC_SHA256SU0))]
6256   "TARGET_SIMD && TARGET_SHA2"
6257   "sha256su0\\t%0.4s, %2.4s"
6258   [(set_attr "type" "crypto_sha256_fast")]
6259 )
6260
6261 (define_insn "aarch64_crypto_sha256su1v4si"
6262   [(set (match_operand:V4SI 0 "register_operand" "=w")
6263         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6264                       (match_operand:V4SI 2 "register_operand" "w")
6265                       (match_operand:V4SI 3 "register_operand" "w")]
6266          UNSPEC_SHA256SU1))]
6267   "TARGET_SIMD && TARGET_SHA2"
6268   "sha256su1\\t%0.4s, %2.4s, %3.4s"
6269   [(set_attr "type" "crypto_sha256_slow")]
6270 )
6271
6272 ;; sha512
6273
6274 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6275   [(set (match_operand:V2DI 0 "register_operand" "=w")
6276         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6277                       (match_operand:V2DI 2 "register_operand" "w")
6278                       (match_operand:V2DI 3 "register_operand" "w")]
6279          CRYPTO_SHA512))]
6280   "TARGET_SIMD && TARGET_SHA3"
6281   "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6282   [(set_attr "type" "crypto_sha512")]
6283 )
6284
6285 (define_insn "aarch64_crypto_sha512su0qv2di"
6286   [(set (match_operand:V2DI 0 "register_operand" "=w")
6287         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6288                       (match_operand:V2DI 2 "register_operand" "w")]
6289          UNSPEC_SHA512SU0))]
6290   "TARGET_SIMD && TARGET_SHA3"
6291   "sha512su0\\t%0.2d, %2.2d"
6292   [(set_attr "type" "crypto_sha512")]
6293 )
6294
6295 (define_insn "aarch64_crypto_sha512su1qv2di"
6296   [(set (match_operand:V2DI 0 "register_operand" "=w")
6297         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6298                       (match_operand:V2DI 2 "register_operand" "w")
6299                       (match_operand:V2DI 3 "register_operand" "w")]
6300          UNSPEC_SHA512SU1))]
6301   "TARGET_SIMD && TARGET_SHA3"
6302   "sha512su1\\t%0.2d, %2.2d, %3.2d"
6303   [(set_attr "type" "crypto_sha512")]
6304 )
6305
6306 ;; sha3
6307
6308 (define_insn "eor3q<mode>4"
6309   [(set (match_operand:VQ_I 0 "register_operand" "=w")
6310         (xor:VQ_I
6311          (xor:VQ_I
6312           (match_operand:VQ_I 2 "register_operand" "w")
6313           (match_operand:VQ_I 3 "register_operand" "w"))
6314          (match_operand:VQ_I 1 "register_operand" "w")))]
6315   "TARGET_SIMD && TARGET_SHA3"
6316   "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6317   [(set_attr "type" "crypto_sha3")]
6318 )
6319
6320 (define_insn "aarch64_rax1qv2di"
6321   [(set (match_operand:V2DI 0 "register_operand" "=w")
6322         (xor:V2DI
6323          (rotate:V2DI
6324           (match_operand:V2DI 2 "register_operand" "w")
6325           (const_int 1))
6326          (match_operand:V2DI 1 "register_operand" "w")))]
6327   "TARGET_SIMD && TARGET_SHA3"
6328   "rax1\\t%0.2d, %1.2d, %2.2d"
6329   [(set_attr "type" "crypto_sha3")]
6330 )
6331
6332 (define_insn "aarch64_xarqv2di"
6333   [(set (match_operand:V2DI 0 "register_operand" "=w")
6334         (rotatert:V2DI
6335          (xor:V2DI
6336           (match_operand:V2DI 1 "register_operand" "%w")
6337           (match_operand:V2DI 2 "register_operand" "w"))
6338          (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6339   "TARGET_SIMD && TARGET_SHA3"
6340   "xar\\t%0.2d, %1.2d, %2.2d, %3"
6341   [(set_attr "type" "crypto_sha3")]
6342 )
6343
6344 (define_insn "bcaxq<mode>4"
6345   [(set (match_operand:VQ_I 0 "register_operand" "=w")
6346         (xor:VQ_I
6347          (and:VQ_I
6348           (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6349           (match_operand:VQ_I 2 "register_operand" "w"))
6350          (match_operand:VQ_I 1 "register_operand" "w")))]
6351   "TARGET_SIMD && TARGET_SHA3"
6352   "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6353   [(set_attr "type" "crypto_sha3")]
6354 )
6355
6356 ;; SM3
6357
6358 (define_insn "aarch64_sm3ss1qv4si"
6359   [(set (match_operand:V4SI 0 "register_operand" "=w")
6360         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6361                       (match_operand:V4SI 2 "register_operand" "w")
6362                       (match_operand:V4SI 3 "register_operand" "w")]
6363          UNSPEC_SM3SS1))]
6364   "TARGET_SIMD && TARGET_SM4"
6365   "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6366   [(set_attr "type" "crypto_sm3")]
6367 )
6368
6369
6370 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6371   [(set (match_operand:V4SI 0 "register_operand" "=w")
6372         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6373                       (match_operand:V4SI 2 "register_operand" "w")
6374                       (match_operand:V4SI 3 "register_operand" "w")
6375                       (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6376          CRYPTO_SM3TT))]
6377   "TARGET_SIMD && TARGET_SM4"
6378   "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6379   [(set_attr "type" "crypto_sm3")]
6380 )
6381
6382 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6383   [(set (match_operand:V4SI 0 "register_operand" "=w")
6384         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6385                       (match_operand:V4SI 2 "register_operand" "w")
6386                       (match_operand:V4SI 3 "register_operand" "w")]
6387          CRYPTO_SM3PART))]
6388   "TARGET_SIMD && TARGET_SM4"
6389   "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6390   [(set_attr "type" "crypto_sm3")]
6391 )
6392
6393 ;; SM4
6394
6395 (define_insn "aarch64_sm4eqv4si"
6396   [(set (match_operand:V4SI 0 "register_operand" "=w")
6397         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6398                       (match_operand:V4SI 2 "register_operand" "w")]
6399          UNSPEC_SM4E))]
6400   "TARGET_SIMD && TARGET_SM4"
6401   "sm4e\\t%0.4s, %2.4s"
6402   [(set_attr "type" "crypto_sm4")]
6403 )
6404
6405 (define_insn "aarch64_sm4ekeyqv4si"
6406   [(set (match_operand:V4SI 0 "register_operand" "=w")
6407         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6408                       (match_operand:V4SI 2 "register_operand" "w")]
6409          UNSPEC_SM4EKEY))]
6410   "TARGET_SIMD && TARGET_SM4"
6411   "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6412   [(set_attr "type" "crypto_sm4")]
6413 )
6414
6415 ;; fp16fml
6416
6417 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6418   [(set (match_operand:VDQSF 0 "register_operand")
6419         (unspec:VDQSF
6420          [(match_operand:VDQSF 1 "register_operand")
6421           (match_operand:<VFMLA_W> 2 "register_operand")
6422           (match_operand:<VFMLA_W> 3 "register_operand")]
6423          VFMLA16_LOW))]
6424   "TARGET_F16FML"
6425 {
6426   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6427                                             <nunits> * 2, false);
6428   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6429                                             <nunits> * 2, false);
6430
6431   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6432                                                                 operands[1],
6433                                                                 operands[2],
6434                                                                 operands[3],
6435                                                                 p1, p2));
6436   DONE;
6437
6438 })
6439
6440 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6441   [(set (match_operand:VDQSF 0 "register_operand")
6442         (unspec:VDQSF
6443          [(match_operand:VDQSF 1 "register_operand")
6444           (match_operand:<VFMLA_W> 2 "register_operand")
6445           (match_operand:<VFMLA_W> 3 "register_operand")]
6446          VFMLA16_HIGH))]
6447   "TARGET_F16FML"
6448 {
6449   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6450   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6451
6452   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6453                                                                  operands[1],
6454                                                                  operands[2],
6455                                                                  operands[3],
6456                                                                  p1, p2));
6457   DONE;
6458 })
6459
6460 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6461   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6462         (fma:VDQSF
6463          (float_extend:VDQSF
6464           (vec_select:<VFMLA_SEL_W>
6465            (match_operand:<VFMLA_W> 2 "register_operand" "w")
6466            (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6467          (float_extend:VDQSF
6468           (vec_select:<VFMLA_SEL_W>
6469            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6470            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6471          (match_operand:VDQSF 1 "register_operand" "0")))]
6472   "TARGET_F16FML"
6473   "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6474   [(set_attr "type" "neon_fp_mul_s")]
6475 )
6476
6477 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6478   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6479         (fma:VDQSF
6480          (float_extend:VDQSF
6481           (neg:<VFMLA_SEL_W>
6482            (vec_select:<VFMLA_SEL_W>
6483             (match_operand:<VFMLA_W> 2 "register_operand" "w")
6484             (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6485          (float_extend:VDQSF
6486           (vec_select:<VFMLA_SEL_W>
6487            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6488            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6489          (match_operand:VDQSF 1 "register_operand" "0")))]
6490   "TARGET_F16FML"
6491   "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6492   [(set_attr "type" "neon_fp_mul_s")]
6493 )
6494
6495 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6496   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6497         (fma:VDQSF
6498          (float_extend:VDQSF
6499           (vec_select:<VFMLA_SEL_W>
6500            (match_operand:<VFMLA_W> 2 "register_operand" "w")
6501            (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6502          (float_extend:VDQSF
6503           (vec_select:<VFMLA_SEL_W>
6504            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6505            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6506          (match_operand:VDQSF 1 "register_operand" "0")))]
6507   "TARGET_F16FML"
6508   "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6509   [(set_attr "type" "neon_fp_mul_s")]
6510 )
6511
6512 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6513   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6514         (fma:VDQSF
6515          (float_extend:VDQSF
6516           (neg:<VFMLA_SEL_W>
6517            (vec_select:<VFMLA_SEL_W>
6518             (match_operand:<VFMLA_W> 2 "register_operand" "w")
6519             (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6520          (float_extend:VDQSF
6521           (vec_select:<VFMLA_SEL_W>
6522            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6523            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6524          (match_operand:VDQSF 1 "register_operand" "0")))]
6525   "TARGET_F16FML"
6526   "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6527   [(set_attr "type" "neon_fp_mul_s")]
6528 )
6529
6530 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6531   [(set (match_operand:V2SF 0 "register_operand")
6532         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6533                            (match_operand:V4HF 2 "register_operand")
6534                            (match_operand:V4HF 3 "register_operand")
6535                            (match_operand:SI 4 "aarch64_imm2")]
6536          VFMLA16_LOW))]
6537   "TARGET_F16FML"
6538 {
6539     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6540     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6541
6542     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6543                                                             operands[1],
6544                                                             operands[2],
6545                                                             operands[3],
6546                                                             p1, lane));
6547     DONE;
6548 }
6549 )
6550
6551 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6552   [(set (match_operand:V2SF 0 "register_operand")
6553         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6554                            (match_operand:V4HF 2 "register_operand")
6555                            (match_operand:V4HF 3 "register_operand")
6556                            (match_operand:SI 4 "aarch64_imm2")]
6557          VFMLA16_HIGH))]
6558   "TARGET_F16FML"
6559 {
6560     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6561     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6562
6563     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6564                                                              operands[1],
6565                                                              operands[2],
6566                                                              operands[3],
6567                                                              p1, lane));
6568     DONE;
6569 })
6570
6571 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6572   [(set (match_operand:V2SF 0 "register_operand" "=w")
6573         (fma:V2SF
6574          (float_extend:V2SF
6575            (vec_select:V2HF
6576             (match_operand:V4HF 2 "register_operand" "w")
6577             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6578          (float_extend:V2SF
6579            (vec_duplicate:V2HF
6580             (vec_select:HF
6581              (match_operand:V4HF 3 "register_operand" "x")
6582              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6583          (match_operand:V2SF 1 "register_operand" "0")))]
6584   "TARGET_F16FML"
6585   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6586   [(set_attr "type" "neon_fp_mul_s")]
6587 )
6588
6589 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6590   [(set (match_operand:V2SF 0 "register_operand" "=w")
6591         (fma:V2SF
6592          (float_extend:V2SF
6593           (neg:V2HF
6594            (vec_select:V2HF
6595             (match_operand:V4HF 2 "register_operand" "w")
6596             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6597          (float_extend:V2SF
6598           (vec_duplicate:V2HF
6599            (vec_select:HF
6600             (match_operand:V4HF 3 "register_operand" "x")
6601             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6602          (match_operand:V2SF 1 "register_operand" "0")))]
6603   "TARGET_F16FML"
6604   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6605   [(set_attr "type" "neon_fp_mul_s")]
6606 )
6607
6608 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6609   [(set (match_operand:V2SF 0 "register_operand" "=w")
6610         (fma:V2SF
6611          (float_extend:V2SF
6612            (vec_select:V2HF
6613             (match_operand:V4HF 2 "register_operand" "w")
6614             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6615          (float_extend:V2SF
6616            (vec_duplicate:V2HF
6617             (vec_select:HF
6618              (match_operand:V4HF 3 "register_operand" "x")
6619              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6620          (match_operand:V2SF 1 "register_operand" "0")))]
6621   "TARGET_F16FML"
6622   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6623   [(set_attr "type" "neon_fp_mul_s")]
6624 )
6625
6626 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6627   [(set (match_operand:V2SF 0 "register_operand" "=w")
6628         (fma:V2SF
6629          (float_extend:V2SF
6630            (neg:V2HF
6631             (vec_select:V2HF
6632              (match_operand:V4HF 2 "register_operand" "w")
6633              (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6634          (float_extend:V2SF
6635            (vec_duplicate:V2HF
6636             (vec_select:HF
6637              (match_operand:V4HF 3 "register_operand" "x")
6638              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6639          (match_operand:V2SF 1 "register_operand" "0")))]
6640   "TARGET_F16FML"
6641   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6642   [(set_attr "type" "neon_fp_mul_s")]
6643 )
6644
6645 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6646   [(set (match_operand:V4SF 0 "register_operand")
6647         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6648                            (match_operand:V8HF 2 "register_operand")
6649                            (match_operand:V8HF 3 "register_operand")
6650                            (match_operand:SI 4 "aarch64_lane_imm3")]
6651          VFMLA16_LOW))]
6652   "TARGET_F16FML"
6653 {
6654     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6655     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6656
6657     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6658                                                               operands[1],
6659                                                               operands[2],
6660                                                               operands[3],
6661                                                               p1, lane));
6662     DONE;
6663 })
6664
6665 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6666   [(set (match_operand:V4SF 0 "register_operand")
6667         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6668                            (match_operand:V8HF 2 "register_operand")
6669                            (match_operand:V8HF 3 "register_operand")
6670                            (match_operand:SI 4 "aarch64_lane_imm3")]
6671          VFMLA16_HIGH))]
6672   "TARGET_F16FML"
6673 {
6674     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6675     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6676
6677     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6678                                                                operands[1],
6679                                                                operands[2],
6680                                                                operands[3],
6681                                                                p1, lane));
6682     DONE;
6683 })
6684
6685 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6686   [(set (match_operand:V4SF 0 "register_operand" "=w")
6687         (fma:V4SF
6688          (float_extend:V4SF
6689           (vec_select:V4HF
6690             (match_operand:V8HF 2 "register_operand" "w")
6691             (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6692          (float_extend:V4SF
6693           (vec_duplicate:V4HF
6694            (vec_select:HF
6695             (match_operand:V8HF 3 "register_operand" "x")
6696             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6697          (match_operand:V4SF 1 "register_operand" "0")))]
6698   "TARGET_F16FML"
6699   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6700   [(set_attr "type" "neon_fp_mul_s")]
6701 )
6702
6703 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6704   [(set (match_operand:V4SF 0 "register_operand" "=w")
6705         (fma:V4SF
6706           (float_extend:V4SF
6707            (neg:V4HF
6708             (vec_select:V4HF
6709              (match_operand:V8HF 2 "register_operand" "w")
6710              (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6711          (float_extend:V4SF
6712           (vec_duplicate:V4HF
6713            (vec_select:HF
6714             (match_operand:V8HF 3 "register_operand" "x")
6715             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6716          (match_operand:V4SF 1 "register_operand" "0")))]
6717   "TARGET_F16FML"
6718   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6719   [(set_attr "type" "neon_fp_mul_s")]
6720 )
6721
6722 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6723   [(set (match_operand:V4SF 0 "register_operand" "=w")
6724         (fma:V4SF
6725          (float_extend:V4SF
6726           (vec_select:V4HF
6727             (match_operand:V8HF 2 "register_operand" "w")
6728             (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6729          (float_extend:V4SF
6730           (vec_duplicate:V4HF
6731            (vec_select:HF
6732             (match_operand:V8HF 3 "register_operand" "x")
6733             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6734          (match_operand:V4SF 1 "register_operand" "0")))]
6735   "TARGET_F16FML"
6736   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6737   [(set_attr "type" "neon_fp_mul_s")]
6738 )
6739
6740 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6741   [(set (match_operand:V4SF 0 "register_operand" "=w")
6742         (fma:V4SF
6743          (float_extend:V4SF
6744           (neg:V4HF
6745            (vec_select:V4HF
6746             (match_operand:V8HF 2 "register_operand" "w")
6747             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6748          (float_extend:V4SF
6749           (vec_duplicate:V4HF
6750            (vec_select:HF
6751             (match_operand:V8HF 3 "register_operand" "x")
6752             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6753          (match_operand:V4SF 1 "register_operand" "0")))]
6754   "TARGET_F16FML"
6755   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6756   [(set_attr "type" "neon_fp_mul_s")]
6757 )
6758
6759 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6760   [(set (match_operand:V2SF 0 "register_operand")
6761         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6762                       (match_operand:V4HF 2 "register_operand")
6763                       (match_operand:V8HF 3 "register_operand")
6764                       (match_operand:SI 4 "aarch64_lane_imm3")]
6765          VFMLA16_LOW))]
6766   "TARGET_F16FML"
6767 {
6768     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6769     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6770
6771     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6772                                                              operands[1],
6773                                                              operands[2],
6774                                                              operands[3],
6775                                                              p1, lane));
6776     DONE;
6777
6778 })
6779
6780 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6781   [(set (match_operand:V2SF 0 "register_operand")
6782         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6783                       (match_operand:V4HF 2 "register_operand")
6784                       (match_operand:V8HF 3 "register_operand")
6785                       (match_operand:SI 4 "aarch64_lane_imm3")]
6786          VFMLA16_HIGH))]
6787   "TARGET_F16FML"
6788 {
6789     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6790     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6791
6792     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6793                                                               operands[1],
6794                                                               operands[2],
6795                                                               operands[3],
6796                                                               p1, lane));
6797     DONE;
6798
6799 })
6800
6801 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6802   [(set (match_operand:V2SF 0 "register_operand" "=w")
6803         (fma:V2SF
6804          (float_extend:V2SF
6805            (vec_select:V2HF
6806             (match_operand:V4HF 2 "register_operand" "w")
6807             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6808          (float_extend:V2SF
6809           (vec_duplicate:V2HF
6810            (vec_select:HF
6811             (match_operand:V8HF 3 "register_operand" "x")
6812             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6813          (match_operand:V2SF 1 "register_operand" "0")))]
6814   "TARGET_F16FML"
6815   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6816   [(set_attr "type" "neon_fp_mul_s")]
6817 )
6818
6819 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6820   [(set (match_operand:V2SF 0 "register_operand" "=w")
6821         (fma:V2SF
6822          (float_extend:V2SF
6823           (neg:V2HF
6824            (vec_select:V2HF
6825             (match_operand:V4HF 2 "register_operand" "w")
6826             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6827          (float_extend:V2SF
6828           (vec_duplicate:V2HF
6829            (vec_select:HF
6830             (match_operand:V8HF 3 "register_operand" "x")
6831             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6832          (match_operand:V2SF 1 "register_operand" "0")))]
6833   "TARGET_F16FML"
6834   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6835   [(set_attr "type" "neon_fp_mul_s")]
6836 )
6837
6838 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6839   [(set (match_operand:V2SF 0 "register_operand" "=w")
6840         (fma:V2SF
6841          (float_extend:V2SF
6842            (vec_select:V2HF
6843             (match_operand:V4HF 2 "register_operand" "w")
6844             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6845          (float_extend:V2SF
6846           (vec_duplicate:V2HF
6847            (vec_select:HF
6848             (match_operand:V8HF 3 "register_operand" "x")
6849             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6850          (match_operand:V2SF 1 "register_operand" "0")))]
6851   "TARGET_F16FML"
6852   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6853   [(set_attr "type" "neon_fp_mul_s")]
6854 )
6855
6856 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6857   [(set (match_operand:V2SF 0 "register_operand" "=w")
6858         (fma:V2SF
6859          (float_extend:V2SF
6860           (neg:V2HF
6861            (vec_select:V2HF
6862             (match_operand:V4HF 2 "register_operand" "w")
6863             (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6864          (float_extend:V2SF
6865           (vec_duplicate:V2HF
6866            (vec_select:HF
6867             (match_operand:V8HF 3 "register_operand" "x")
6868             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6869          (match_operand:V2SF 1 "register_operand" "0")))]
6870   "TARGET_F16FML"
6871   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6872   [(set_attr "type" "neon_fp_mul_s")]
6873 )
6874
6875 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6876   [(set (match_operand:V4SF 0 "register_operand")
6877         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6878                       (match_operand:V8HF 2 "register_operand")
6879                       (match_operand:V4HF 3 "register_operand")
6880                       (match_operand:SI 4 "aarch64_imm2")]
6881          VFMLA16_LOW))]
6882   "TARGET_F16FML"
6883 {
6884     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6885     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6886
6887     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6888                                                              operands[1],
6889                                                              operands[2],
6890                                                              operands[3],
6891                                                              p1, lane));
6892     DONE;
6893 })
6894
6895 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6896   [(set (match_operand:V4SF 0 "register_operand")
6897         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6898                       (match_operand:V8HF 2 "register_operand")
6899                       (match_operand:V4HF 3 "register_operand")
6900                       (match_operand:SI 4 "aarch64_imm2")]
6901          VFMLA16_HIGH))]
6902   "TARGET_F16FML"
6903 {
6904     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6905     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6906
6907     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6908                                                               operands[1],
6909                                                               operands[2],
6910                                                               operands[3],
6911                                                               p1, lane));
6912     DONE;
6913 })
6914
6915 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6916   [(set (match_operand:V4SF 0 "register_operand" "=w")
6917         (fma:V4SF
6918          (float_extend:V4SF
6919           (vec_select:V4HF
6920            (match_operand:V8HF 2 "register_operand" "w")
6921            (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6922          (float_extend:V4SF
6923           (vec_duplicate:V4HF
6924            (vec_select:HF
6925             (match_operand:V4HF 3 "register_operand" "x")
6926             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6927          (match_operand:V4SF 1 "register_operand" "0")))]
6928   "TARGET_F16FML"
6929   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6930   [(set_attr "type" "neon_fp_mul_s")]
6931 )
6932
6933 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6934   [(set (match_operand:V4SF 0 "register_operand" "=w")
6935         (fma:V4SF
6936          (float_extend:V4SF
6937           (neg:V4HF
6938            (vec_select:V4HF
6939             (match_operand:V8HF 2 "register_operand" "w")
6940             (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6941          (float_extend:V4SF
6942           (vec_duplicate:V4HF
6943            (vec_select:HF
6944             (match_operand:V4HF 3 "register_operand" "x")
6945             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6946          (match_operand:V4SF 1 "register_operand" "0")))]
6947   "TARGET_F16FML"
6948   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6949   [(set_attr "type" "neon_fp_mul_s")]
6950 )
6951
6952 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6953   [(set (match_operand:V4SF 0 "register_operand" "=w")
6954         (fma:V4SF
6955          (float_extend:V4SF
6956           (vec_select:V4HF
6957            (match_operand:V8HF 2 "register_operand" "w")
6958            (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6959          (float_extend:V4SF
6960           (vec_duplicate:V4HF
6961            (vec_select:HF
6962             (match_operand:V4HF 3 "register_operand" "x")
6963             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6964          (match_operand:V4SF 1 "register_operand" "0")))]
6965   "TARGET_F16FML"
6966   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6967   [(set_attr "type" "neon_fp_mul_s")]
6968 )
6969
6970 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6971   [(set (match_operand:V4SF 0 "register_operand" "=w")
6972         (fma:V4SF
6973          (float_extend:V4SF
6974           (neg:V4HF
6975            (vec_select:V4HF
6976             (match_operand:V8HF 2 "register_operand" "w")
6977             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6978          (float_extend:V4SF
6979           (vec_duplicate:V4HF
6980            (vec_select:HF
6981             (match_operand:V4HF 3 "register_operand" "x")
6982             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6983          (match_operand:V4SF 1 "register_operand" "0")))]
6984   "TARGET_F16FML"
6985   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6986   [(set_attr "type" "neon_fp_mul_s")]
6987 )
6988
6989 ;; pmull
6990
6991 (define_insn "aarch64_crypto_pmulldi"
6992   [(set (match_operand:TI 0 "register_operand" "=w")
6993         (unspec:TI  [(match_operand:DI 1 "register_operand" "w")
6994                      (match_operand:DI 2 "register_operand" "w")]
6995                     UNSPEC_PMULL))]
6996  "TARGET_SIMD && TARGET_AES"
6997  "pmull\\t%0.1q, %1.1d, %2.1d"
6998   [(set_attr "type" "crypto_pmull")]
6999 )
7000
7001 (define_insn "aarch64_crypto_pmullv2di"
7002  [(set (match_operand:TI 0 "register_operand" "=w")
7003        (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
7004                    (match_operand:V2DI 2 "register_operand" "w")]
7005                   UNSPEC_PMULL2))]
7006   "TARGET_SIMD && TARGET_AES"
7007   "pmull2\\t%0.1q, %1.2d, %2.2d"
7008   [(set_attr "type" "crypto_pmull")]
7009 )
7010
7011 ;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector.
7012 (define_insn "<optab><Vnarrowq><mode>2"
7013   [(set (match_operand:VQN 0 "register_operand" "=w")
7014         (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
7015   "TARGET_SIMD"
7016   "<su>xtl\t%0.<Vtype>, %1.<Vntype>"
7017   [(set_attr "type" "neon_shift_imm_long")]
7018 )
7019
7020 ;; Truncate a 128-bit integer vector to a 64-bit vector.
7021 (define_insn "trunc<mode><Vnarrowq>2"
7022   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
7023         (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
7024   "TARGET_SIMD"
7025   "xtn\t%0.<Vntype>, %1.<Vtype>"
7026   [(set_attr "type" "neon_shift_imm_narrow_q")]
7027 )