gcc/config/aarch64/aarch64-simd.md

   1 ;; Machine description for AArch64 AdvSIMD architecture.
   2 ;; Copyright (C) 2011-2019 Free Software Foundation, Inc.
   3 ;; Contributed by ARM Ltd.
   4 ;;
   5 ;; This file is part of GCC.
   6 ;;
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published by
   9 ;; the Free Software Foundation; either version 3, or (at your option)
  10 ;; any later version.
  11 ;;
  12 ;; GCC is distributed in the hope that it will be useful, but
  13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 ;; General Public License for more details.
  16 ;;
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 (define_expand "mov<mode>"
  22   [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
  23         (match_operand:VALL_F16 1 "general_operand"))]
  24   "TARGET_SIMD"
  25   "
  26   /* Force the operand into a register if it is not an
  27      immediate whose use can be replaced with xzr.
  28      If the mode is 16 bytes wide, then we will be doing
  29      a stp in DI mode, so we check the validity of that.
  30      If the mode is 8 bytes wide, then we will do doing a
  31      normal str, so the check need not apply.  */
  32   if (GET_CODE (operands[0]) == MEM
  33       && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
  34            && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
  35                 && aarch64_mem_pair_operand (operands[0], DImode))
  36                || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
  37       operands[1] = force_reg (<MODE>mode, operands[1]);
  38   "
  39 )
  40
  41 (define_expand "movmisalign<mode>"
  42   [(set (match_operand:VALL 0 "nonimmediate_operand")
  43         (match_operand:VALL 1 "general_operand"))]
  44   "TARGET_SIMD"
  45 {
  46   /* This pattern is not permitted to fail during expansion: if both arguments
  47      are non-registers (e.g. memory := constant, which can be created by the
  48      auto-vectorizer), force operand 1 into a register.  */
  49   if (!register_operand (operands[0], <MODE>mode)
  50       && !register_operand (operands[1], <MODE>mode))
  51     operands[1] = force_reg (<MODE>mode, operands[1]);
  52 })
  53
  54 (define_insn "aarch64_simd_dup<mode>"
  55   [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
  56         (vec_duplicate:VDQ_I
  57           (match_operand:<VEL> 1 "register_operand" "w,?r")))]
  58   "TARGET_SIMD"
  59   "@
  60    dup\\t%0.<Vtype>, %1.<Vetype>[0]
  61    dup\\t%0.<Vtype>, %<vw>1"
  62   [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
  63 )
  64
  65 (define_insn "aarch64_simd_dup<mode>"
  66   [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
  67         (vec_duplicate:VDQF_F16
  68           (match_operand:<VEL> 1 "register_operand" "w")))]
  69   "TARGET_SIMD"
  70   "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
  71   [(set_attr "type" "neon_dup<q>")]
  72 )
  73
  74 (define_insn "aarch64_dup_lane<mode>"
  75   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
  76         (vec_duplicate:VALL_F16
  77           (vec_select:<VEL>
  78             (match_operand:VALL_F16 1 "register_operand" "w")
  79             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  80           )))]
  81   "TARGET_SIMD"
  82   {
  83     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
  84     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
  85   }
  86   [(set_attr "type" "neon_dup<q>")]
  87 )
  88
  89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
  90   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
  91         (vec_duplicate:VALL_F16_NO_V2Q
  92           (vec_select:<VEL>
  93             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
  94             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  95           )))]
  96   "TARGET_SIMD"
  97   {
  98     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
  99     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
 100   }
 101   [(set_attr "type" "neon_dup<q>")]
 102 )
 103
 104 (define_insn "*aarch64_simd_mov<VD:mode>"
 105   [(set (match_operand:VD 0 "nonimmediate_operand"
 106                 "=w, m,  m,  w, ?r, ?w, ?r, w")
 107         (match_operand:VD 1 "general_operand"
 108                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 109   "TARGET_SIMD
 110    && (register_operand (operands[0], <MODE>mode)
 111        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 112 {
 113    switch (which_alternative)
 114      {
 115      case 0: return "ldr\t%d0, %1";
 116      case 1: return "str\txzr, %0";
 117      case 2: return "str\t%d1, %0";
 118      case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 119      case 4: return "umov\t%0, %1.d[0]";
 120      case 5: return "fmov\t%d0, %1";
 121      case 6: return "mov\t%0, %1";
 122      case 7:
 123         return aarch64_output_simd_mov_immediate (operands[1], 64);
 124      default: gcc_unreachable ();
 125      }
 126 }
 127   [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
 128                      neon_logic<q>, neon_to_gp<q>, f_mcr,\
 129                      mov_reg, neon_move<q>")]
 130 )
 131
 132 (define_insn "*aarch64_simd_mov<VQ:mode>"
 133   [(set (match_operand:VQ 0 "nonimmediate_operand"
 134                 "=w, Umn,  m,  w, ?r, ?w, ?r, w")
 135         (match_operand:VQ 1 "general_operand"
 136                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 137   "TARGET_SIMD
 138    && (register_operand (operands[0], <MODE>mode)
 139        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 140 {
 141   switch (which_alternative)
 142     {
 143     case 0:
 144         return "ldr\t%q0, %1";
 145     case 1:
 146         return "stp\txzr, xzr, %0";
 147     case 2:
 148         return "str\t%q1, %0";
 149     case 3:
 150         return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 151     case 4:
 152     case 5:
 153     case 6:
 154         return "#";
 155     case 7:
 156         return aarch64_output_simd_mov_immediate (operands[1], 128);
 157     default:
 158         gcc_unreachable ();
 159     }
 160 }
 161   [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
 162                      neon_logic<q>, multiple, multiple,\
 163                      multiple, neon_move<q>")
 164    (set_attr "length" "4,4,4,4,8,8,8,4")]
 165 )
 166
 167 ;; When storing lane zero we can use the normal STR and its more permissive
 168 ;; addressing modes.
 169
 170 (define_insn "aarch64_store_lane0<mode>"
 171   [(set (match_operand:<VEL> 0 "memory_operand" "=m")
 172         (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
 173                         (parallel [(match_operand 2 "const_int_operand" "n")])))]
 174   "TARGET_SIMD
 175    && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
 176   "str\\t%<Vetype>1, %0"
 177   [(set_attr "type" "neon_store1_1reg<q>")]
 178 )
 179
 180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
 181   [(set (match_operand:DREG 0 "register_operand" "=w")
 182         (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
 183    (set (match_operand:DREG2 2 "register_operand" "=w")
 184         (match_operand:DREG2 3 "memory_operand" "m"))]
 185   "TARGET_SIMD
 186    && rtx_equal_p (XEXP (operands[3], 0),
 187                    plus_constant (Pmode,
 188                                   XEXP (operands[1], 0),
 189                                   GET_MODE_SIZE (<DREG:MODE>mode)))"
 190   "ldp\\t%d0, %d2, %1"
 191   [(set_attr "type" "neon_ldp")]
 192 )
 193
 194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
 195   [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
 196         (match_operand:DREG 1 "register_operand" "w"))
 197    (set (match_operand:DREG2 2 "memory_operand" "=m")
 198         (match_operand:DREG2 3 "register_operand" "w"))]
 199   "TARGET_SIMD
 200    && rtx_equal_p (XEXP (operands[2], 0),
 201                    plus_constant (Pmode,
 202                                   XEXP (operands[0], 0),
 203                                   GET_MODE_SIZE (<DREG:MODE>mode)))"
 204   "stp\\t%d1, %d3, %0"
 205   [(set_attr "type" "neon_stp")]
 206 )
 207
 208 (define_insn "load_pair<VQ:mode><VQ2:mode>"
 209   [(set (match_operand:VQ 0 "register_operand" "=w")
 210         (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
 211    (set (match_operand:VQ2 2 "register_operand" "=w")
 212         (match_operand:VQ2 3 "memory_operand" "m"))]
 213   "TARGET_SIMD
 214     && rtx_equal_p (XEXP (operands[3], 0),
 215                     plus_constant (Pmode,
 216                                XEXP (operands[1], 0),
 217                                GET_MODE_SIZE (<VQ:MODE>mode)))"
 218   "ldp\\t%q0, %q2, %1"
 219   [(set_attr "type" "neon_ldp_q")]
 220 )
 221
 222 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
 223   [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
 224         (match_operand:VQ 1 "register_operand" "w"))
 225    (set (match_operand:VQ2 2 "memory_operand" "=m")
 226         (match_operand:VQ2 3 "register_operand" "w"))]
 227   "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
 228                 plus_constant (Pmode,
 229                                XEXP (operands[0], 0),
 230                                GET_MODE_SIZE (<VQ:MODE>mode)))"
 231   "stp\\t%q1, %q3, %0"
 232   [(set_attr "type" "neon_stp_q")]
 233 )
 234
 235
 236 (define_split
 237   [(set (match_operand:VQ 0 "register_operand" "")
 238       (match_operand:VQ 1 "register_operand" ""))]
 239   "TARGET_SIMD && reload_completed
 240    && GP_REGNUM_P (REGNO (operands[0]))
 241    && GP_REGNUM_P (REGNO (operands[1]))"
 242   [(const_int 0)]
 243 {
 244   aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
 245   DONE;
 246 })
 247
 248 (define_split
 249   [(set (match_operand:VQ 0 "register_operand" "")
 250         (match_operand:VQ 1 "register_operand" ""))]
 251   "TARGET_SIMD && reload_completed
 252    && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
 253        || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
 254   [(const_int 0)]
 255 {
 256   aarch64_split_simd_move (operands[0], operands[1]);
 257   DONE;
 258 })
 259
 260 (define_expand "@aarch64_split_simd_mov<mode>"
 261   [(set (match_operand:VQ 0)
 262         (match_operand:VQ 1))]
 263   "TARGET_SIMD"
 264   {
 265     rtx dst = operands[0];
 266     rtx src = operands[1];
 267
 268     if (GP_REGNUM_P (REGNO (src)))
 269       {
 270         rtx src_low_part = gen_lowpart (<VHALF>mode, src);
 271         rtx src_high_part = gen_highpart (<VHALF>mode, src);
 272
 273         emit_insn
 274           (gen_move_lo_quad_<mode> (dst, src_low_part));
 275         emit_insn
 276           (gen_move_hi_quad_<mode> (dst, src_high_part));
 277       }
 278
 279     else
 280       {
 281         rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
 282         rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
 283         rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
 284         rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
 285
 286         emit_insn
 287           (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
 288         emit_insn
 289           (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
 290       }
 291     DONE;
 292   }
 293 )
 294
 295 (define_insn "aarch64_simd_mov_from_<mode>low"
 296   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
 297         (vec_select:<VHALF>
 298           (match_operand:VQ 1 "register_operand" "w")
 299           (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
 300   "TARGET_SIMD && reload_completed"
 301   "umov\t%0, %1.d[0]"
 302   [(set_attr "type" "neon_to_gp<q>")
 303    (set_attr "length" "4")
 304   ])
 305
 306 (define_insn "aarch64_simd_mov_from_<mode>high"
 307   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
 308         (vec_select:<VHALF>
 309           (match_operand:VQ 1 "register_operand" "w")
 310           (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
 311   "TARGET_SIMD && reload_completed"
 312   "umov\t%0, %1.d[1]"
 313   [(set_attr "type" "neon_to_gp<q>")
 314    (set_attr "length" "4")
 315   ])
 316
 317 (define_insn "orn<mode>3"
 318  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 319        (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 320                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 321  "TARGET_SIMD"
 322  "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 323   [(set_attr "type" "neon_logic<q>")]
 324 )
 325
 326 (define_insn "bic<mode>3"
 327  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 328        (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 329                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 330  "TARGET_SIMD"
 331  "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 332   [(set_attr "type" "neon_logic<q>")]
 333 )
 334
 335 (define_insn "add<mode>3"
 336   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 337         (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 338                   (match_operand:VDQ_I 2 "register_operand" "w")))]
 339   "TARGET_SIMD"
 340   "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 341   [(set_attr "type" "neon_add<q>")]
 342 )
 343
 344 (define_insn "sub<mode>3"
 345   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 346         (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 347                    (match_operand:VDQ_I 2 "register_operand" "w")))]
 348   "TARGET_SIMD"
 349   "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 350   [(set_attr "type" "neon_sub<q>")]
 351 )
 352
 353 (define_insn "mul<mode>3"
 354   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 355         (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
 356                    (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
 357   "TARGET_SIMD"
 358   "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 359   [(set_attr "type" "neon_mul_<Vetype><q>")]
 360 )
 361
 362 (define_insn "bswap<mode>2"
 363   [(set (match_operand:VDQHSD 0 "register_operand" "=w")
 364         (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
 365   "TARGET_SIMD"
 366   "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
 367   [(set_attr "type" "neon_rev<q>")]
 368 )
 369
 370 (define_insn "aarch64_rbit<mode>"
 371   [(set (match_operand:VB 0 "register_operand" "=w")
 372         (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
 373                    UNSPEC_RBIT))]
 374   "TARGET_SIMD"
 375   "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
 376   [(set_attr "type" "neon_rbit")]
 377 )
 378
 379 (define_expand "ctz<mode>2"
 380   [(set (match_operand:VS 0 "register_operand")
 381         (ctz:VS (match_operand:VS 1 "register_operand")))]
 382   "TARGET_SIMD"
 383   {
 384      emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
 385      rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
 386                                              <MODE>mode, 0);
 387      emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
 388      emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
 389      DONE;
 390   }
 391 )
 392
 393 (define_expand "xorsign<mode>3"
 394   [(match_operand:VHSDF 0 "register_operand")
 395    (match_operand:VHSDF 1 "register_operand")
 396    (match_operand:VHSDF 2 "register_operand")]
 397   "TARGET_SIMD"
 398 {
 399
 400   machine_mode imode = <V_INT_EQUIV>mode;
 401   rtx v_bitmask = gen_reg_rtx (imode);
 402   rtx op1x = gen_reg_rtx (imode);
 403   rtx op2x = gen_reg_rtx (imode);
 404
 405   rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
 406   rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
 407
 408   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 409
 410   emit_move_insn (v_bitmask,
 411                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 412                                                      HOST_WIDE_INT_M1U << bits));
 413
 414   emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
 415   emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
 416   emit_move_insn (operands[0],
 417                   lowpart_subreg (<MODE>mode, op1x, imode));
 418   DONE;
 419 }
 420 )
 421
 422 ;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
 423 ;; fact that their usage need to guarantee that the source vectors are
 424 ;; contiguous.  It would be wrong to describe the operation without being able
 425 ;; to describe the permute that is also required, but even if that is done
 426 ;; the permute would have been created as a LOAD_LANES which means the values
 427 ;; in the registers are in the wrong order.
 428 (define_insn "aarch64_fcadd<rot><mode>"
 429   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 430         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
 431                        (match_operand:VHSDF 2 "register_operand" "w")]
 432                        FCADD))]
 433   "TARGET_COMPLEX"
 434   "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
 435   [(set_attr "type" "neon_fcadd")]
 436 )
 437
 438 (define_insn "aarch64_fcmla<rot><mode>"
 439   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 440         (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
 441                     (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
 442                                    (match_operand:VHSDF 3 "register_operand" "w")]
 443                                    FCMLA)))]
 444   "TARGET_COMPLEX"
 445   "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
 446   [(set_attr "type" "neon_fcmla")]
 447 )
 448
 449
 450 (define_insn "aarch64_fcmla_lane<rot><mode>"
 451   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 452         (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
 453                     (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
 454                                    (match_operand:VHSDF 3 "register_operand" "w")
 455                                    (match_operand:SI 4 "const_int_operand" "n")]
 456                                    FCMLA)))]
 457   "TARGET_COMPLEX"
 458 {
 459   operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
 460   return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
 461 }
 462   [(set_attr "type" "neon_fcmla")]
 463 )
 464
 465 (define_insn "aarch64_fcmla_laneq<rot>v4hf"
 466   [(set (match_operand:V4HF 0 "register_operand" "=w")
 467         (plus:V4HF (match_operand:V4HF 1 "register_operand" "0")
 468                    (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
 469                                  (match_operand:V8HF 3 "register_operand" "w")
 470                                  (match_operand:SI 4 "const_int_operand" "n")]
 471                                  FCMLA)))]
 472   "TARGET_COMPLEX"
 473 {
 474   operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
 475   return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
 476 }
 477   [(set_attr "type" "neon_fcmla")]
 478 )
 479
 480 (define_insn "aarch64_fcmlaq_lane<rot><mode>"
 481   [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
 482         (plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0")
 483                      (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
 484                                      (match_operand:<VHALF> 3 "register_operand" "w")
 485                                      (match_operand:SI 4 "const_int_operand" "n")]
 486                                      FCMLA)))]
 487   "TARGET_COMPLEX"
 488 {
 489   int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
 490   operands[4]
 491     = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
 492   return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
 493 }
 494   [(set_attr "type" "neon_fcmla")]
 495 )
 496
 497 ;; These instructions map to the __builtins for the Dot Product operations.
 498 (define_insn "aarch64_<sur>dot<vsi2qi>"
 499   [(set (match_operand:VS 0 "register_operand" "=w")
 500         (plus:VS (match_operand:VS 1 "register_operand" "0")
 501                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 502                             (match_operand:<VSI2QI> 3 "register_operand" "w")]
 503                 DOTPROD)))]
 504   "TARGET_DOTPROD"
 505   "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
 506   [(set_attr "type" "neon_dot<q>")]
 507 )
 508
 509 ;; These expands map to the Dot Product optab the vectorizer checks for.
 510 ;; The auto-vectorizer expects a dot product builtin that also does an
 511 ;; accumulation into the provided register.
 512 ;; Given the following pattern
 513 ;;
 514 ;; for (i=0; i<len; i++) {
 515 ;;     c = a[i] * b[i];
 516 ;;     r += c;
 517 ;; }
 518 ;; return result;
 519 ;;
 520 ;; This can be auto-vectorized to
 521 ;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
 522 ;;
 523 ;; given enough iterations.  However the vectorizer can keep unrolling the loop
 524 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
 525 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
 526 ;; ...
 527 ;;
 528 ;; and so the vectorizer provides r, in which the result has to be accumulated.
 529 (define_expand "<sur>dot_prod<vsi2qi>"
 530   [(set (match_operand:VS 0 "register_operand")
 531         (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
 532                             (match_operand:<VSI2QI> 2 "register_operand")]
 533                  DOTPROD)
 534                 (match_operand:VS 3 "register_operand")))]
 535   "TARGET_DOTPROD"
 536 {
 537   emit_insn (
 538     gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
 539                                     operands[2]));
 540   emit_insn (gen_rtx_SET (operands[0], operands[3]));
 541   DONE;
 542 })
 543
 544 ;; These instructions map to the __builtins for the Dot Product
 545 ;; indexed operations.
 546 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
 547   [(set (match_operand:VS 0 "register_operand" "=w")
 548         (plus:VS (match_operand:VS 1 "register_operand" "0")
 549                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 550                             (match_operand:V8QI 3 "register_operand" "<h_con>")
 551                             (match_operand:SI 4 "immediate_operand" "i")]
 552                 DOTPROD)))]
 553   "TARGET_DOTPROD"
 554   {
 555     operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
 556     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 557   }
 558   [(set_attr "type" "neon_dot<q>")]
 559 )
 560
 561 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
 562   [(set (match_operand:VS 0 "register_operand" "=w")
 563         (plus:VS (match_operand:VS 1 "register_operand" "0")
 564                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 565                             (match_operand:V16QI 3 "register_operand" "<h_con>")
 566                             (match_operand:SI 4 "immediate_operand" "i")]
 567                 DOTPROD)))]
 568   "TARGET_DOTPROD"
 569   {
 570     operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
 571     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 572   }
 573   [(set_attr "type" "neon_dot<q>")]
 574 )
 575
 576 (define_expand "copysign<mode>3"
 577   [(match_operand:VHSDF 0 "register_operand")
 578    (match_operand:VHSDF 1 "register_operand")
 579    (match_operand:VHSDF 2 "register_operand")]
 580   "TARGET_FLOAT && TARGET_SIMD"
 581 {
 582   rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
 583   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 584
 585   emit_move_insn (v_bitmask,
 586                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 587                                                      HOST_WIDE_INT_M1U << bits));
 588   emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
 589                                          operands[2], operands[1]));
 590   DONE;
 591 }
 592 )
 593
 594 (define_insn "*aarch64_mul3_elt<mode>"
 595  [(set (match_operand:VMUL 0 "register_operand" "=w")
 596     (mult:VMUL
 597       (vec_duplicate:VMUL
 598           (vec_select:<VEL>
 599             (match_operand:VMUL 1 "register_operand" "<h_con>")
 600             (parallel [(match_operand:SI 2 "immediate_operand")])))
 601       (match_operand:VMUL 3 "register_operand" "w")))]
 602   "TARGET_SIMD"
 603   {
 604     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
 605     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 606   }
 607   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 608 )
 609
 610 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
 611   [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
 612      (mult:VMUL_CHANGE_NLANES
 613        (vec_duplicate:VMUL_CHANGE_NLANES
 614           (vec_select:<VEL>
 615             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
 616             (parallel [(match_operand:SI 2 "immediate_operand")])))
 617       (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
 618   "TARGET_SIMD"
 619   {
 620     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
 621     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 622   }
 623   [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
 624 )
 625
 626 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
 627  [(set (match_operand:VMUL 0 "register_operand" "=w")
 628     (mult:VMUL
 629       (vec_duplicate:VMUL
 630             (match_operand:<VEL> 1 "register_operand" "<h_con>"))
 631       (match_operand:VMUL 2 "register_operand" "w")))]
 632   "TARGET_SIMD"
 633   "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
 634   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 635 )
 636
 637 (define_insn "@aarch64_rsqrte<mode>"
 638   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 639         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
 640                      UNSPEC_RSQRTE))]
 641   "TARGET_SIMD"
 642   "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 643   [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
 644
 645 (define_insn "@aarch64_rsqrts<mode>"
 646   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 647         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
 648                             (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
 649          UNSPEC_RSQRTS))]
 650   "TARGET_SIMD"
 651   "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 652   [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
 653
 654 (define_expand "rsqrt<mode>2"
 655   [(set (match_operand:VALLF 0 "register_operand")
 656         (unspec:VALLF [(match_operand:VALLF 1 "register_operand")]
 657                      UNSPEC_RSQRT))]
 658   "TARGET_SIMD"
 659 {
 660   aarch64_emit_approx_sqrt (operands[0], operands[1], true);
 661   DONE;
 662 })
 663
 664 (define_insn "*aarch64_mul3_elt_to_64v2df"
 665   [(set (match_operand:DF 0 "register_operand" "=w")
 666      (mult:DF
 667        (vec_select:DF
 668          (match_operand:V2DF 1 "register_operand" "w")
 669          (parallel [(match_operand:SI 2 "immediate_operand")]))
 670        (match_operand:DF 3 "register_operand" "w")))]
 671   "TARGET_SIMD"
 672   {
 673     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
 674     return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
 675   }
 676   [(set_attr "type" "neon_fp_mul_d_scalar_q")]
 677 )
 678
 679 (define_insn "neg<mode>2"
 680   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 681         (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 682   "TARGET_SIMD"
 683   "neg\t%0.<Vtype>, %1.<Vtype>"
 684   [(set_attr "type" "neon_neg<q>")]
 685 )
 686
 687 (define_insn "abs<mode>2"
 688   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 689         (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 690   "TARGET_SIMD"
 691   "abs\t%0.<Vtype>, %1.<Vtype>"
 692   [(set_attr "type" "neon_abs<q>")]
 693 )
 694
 695 ;; The intrinsic version of integer ABS must not be allowed to
 696 ;; combine with any operation with an integerated ABS step, such
 697 ;; as SABD.
 698 (define_insn "aarch64_abs<mode>"
 699   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
 700           (unspec:VSDQ_I_DI
 701             [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
 702            UNSPEC_ABS))]
 703   "TARGET_SIMD"
 704   "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 705   [(set_attr "type" "neon_abs<q>")]
 706 )
 707
 708 ;; It's tempting to represent SABD as ABS (MINUS op1 op2).
 709 ;; This isn't accurate as ABS treats always its input as a signed value.
 710 ;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
 711 ;; Whereas SABD would return 192 (-64 signed) on the above example.
 712 ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
 713 (define_insn "aarch64_<su>abd<mode>_3"
 714   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 715         (minus:VDQ_BHSI
 716           (USMAX:VDQ_BHSI
 717             (match_operand:VDQ_BHSI 1 "register_operand" "w")
 718             (match_operand:VDQ_BHSI 2 "register_operand" "w"))
 719           (<max_opp>:VDQ_BHSI
 720             (match_dup 1)
 721             (match_dup 2))))]
 722   "TARGET_SIMD"
 723   "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 724   [(set_attr "type" "neon_abd<q>")]
 725 )
 726
 727 (define_insn "aarch64_<sur>abdl2<mode>_3"
 728   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 729         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 730                           (match_operand:VDQV_S 2 "register_operand" "w")]
 731         ABDL2))]
 732   "TARGET_SIMD"
 733   "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
 734   [(set_attr "type" "neon_abd<q>")]
 735 )
 736
 737 (define_insn "aarch64_<sur>abal<mode>_4"
 738   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 739         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 740                           (match_operand:VDQV_S 2 "register_operand" "w")
 741                          (match_operand:<VDBLW> 3 "register_operand" "0")]
 742         ABAL))]
 743   "TARGET_SIMD"
 744   "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
 745   [(set_attr "type" "neon_arith_acc<q>")]
 746 )
 747
 748 (define_insn "aarch64_<sur>adalp<mode>_3"
 749   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 750         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 751                           (match_operand:<VDBLW> 2 "register_operand" "0")]
 752         ADALP))]
 753   "TARGET_SIMD"
 754   "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
 755   [(set_attr "type" "neon_reduc_add<q>")]
 756 )
 757
 758 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
 759 ;; inputs in operands 1 and 2.  The sequence also has to perform a widening
 760 ;; reduction of the difference into a V4SI vector and accumulate that into
 761 ;; operand 3 before copying that into the result operand 0.
 762 ;; Perform that with a sequence of:
 763 ;; UABDL2       tmp.8h, op1.16b, op2.16b
 764 ;; UABAL        tmp.8h, op1.16b, op2.16b
 765 ;; UADALP       op3.4s, tmp.8h
 766 ;; MOV          op0, op3 // should be eliminated in later passes.
 767 ;;
 768 ;; For TARGET_DOTPROD we do:
 769 ;; MOV  tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
 770 ;; UABD tmp2.16b, op1.16b, op2.16b
 771 ;; UDOT op3.4s, tmp2.16b, tmp1.16b
 772 ;; MOV  op0, op3 // RA will tie the operands of UDOT appropriately.
 773 ;;
 774 ;; The signed version just uses the signed variants of the above instructions
 775 ;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
 776 ;; unsigned.
 777
 778 (define_expand "<sur>sadv16qi"
 779   [(use (match_operand:V4SI 0 "register_operand"))
 780    (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
 781                   (use (match_operand:V16QI 2 "register_operand"))] ABAL)
 782    (use (match_operand:V4SI 3 "register_operand"))]
 783   "TARGET_SIMD"
 784   {
 785     if (TARGET_DOTPROD)
 786       {
 787         rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
 788         rtx abd = gen_reg_rtx (V16QImode);
 789         emit_insn (gen_aarch64_<sur>abdv16qi_3 (abd, operands[1], operands[2]));
 790         emit_insn (gen_aarch64_udotv16qi (operands[0], operands[3],
 791                                           abd, ones));
 792         DONE;
 793       }
 794     rtx reduc = gen_reg_rtx (V8HImode);
 795     emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
 796                                                operands[2]));
 797     emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
 798                                               operands[2], reduc));
 799     emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
 800                                               operands[3]));
 801     emit_move_insn (operands[0], operands[3]);
 802     DONE;
 803   }
 804 )
 805
 806 (define_insn "aba<mode>_3"
 807   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 808         (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
 809                          (match_operand:VDQ_BHSI 1 "register_operand" "w")
 810                          (match_operand:VDQ_BHSI 2 "register_operand" "w")))
 811                        (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
 812   "TARGET_SIMD"
 813   "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 814   [(set_attr "type" "neon_arith_acc<q>")]
 815 )
 816
 817 (define_insn "fabd<mode>3"
 818   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 819         (abs:VHSDF_HSDF
 820           (minus:VHSDF_HSDF
 821             (match_operand:VHSDF_HSDF 1 "register_operand" "w")
 822             (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
 823   "TARGET_SIMD"
 824   "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 825   [(set_attr "type" "neon_fp_abd_<stype><q>")]
 826 )
 827
 828 ;; For AND (vector, register) and BIC (vector, immediate)
 829 (define_insn "and<mode>3"
 830   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
 831         (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
 832                    (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
 833   "TARGET_SIMD"
 834   {
 835     switch (which_alternative)
 836       {
 837       case 0:
 838         return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
 839       case 1:
 840         return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
 841                                                   AARCH64_CHECK_BIC);
 842       default:
 843         gcc_unreachable ();
 844       }
 845   }
 846   [(set_attr "type" "neon_logic<q>")]
 847 )
 848
 849 ;; For ORR (vector, register) and ORR (vector, immediate)
 850 (define_insn "ior<mode>3"
 851   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
 852         (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
 853                    (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
 854   "TARGET_SIMD"
 855   {
 856     switch (which_alternative)
 857       {
 858       case 0:
 859         return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
 860       case 1:
 861         return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
 862                                                   AARCH64_CHECK_ORR);
 863       default:
 864         gcc_unreachable ();
 865       }
 866   }
 867   [(set_attr "type" "neon_logic<q>")]
 868 )
 869
 870 (define_insn "xor<mode>3"
 871   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 872         (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 873                  (match_operand:VDQ_I 2 "register_operand" "w")))]
 874   "TARGET_SIMD"
 875   "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
 876   [(set_attr "type" "neon_logic<q>")]
 877 )
 878
 879 (define_insn "one_cmpl<mode>2"
 880   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 881         (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 882   "TARGET_SIMD"
 883   "not\t%0.<Vbtype>, %1.<Vbtype>"
 884   [(set_attr "type" "neon_logic<q>")]
 885 )
 886
 887 (define_insn "aarch64_simd_vec_set<mode>"
 888   [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
 889         (vec_merge:VALL_F16
 890             (vec_duplicate:VALL_F16
 891                 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
 892             (match_operand:VALL_F16 3 "register_operand" "0,0,0")
 893             (match_operand:SI 2 "immediate_operand" "i,i,i")))]
 894   "TARGET_SIMD"
 895   {
 896    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 897    operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
 898    switch (which_alternative)
 899      {
 900      case 0:
 901         return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
 902      case 1:
 903         return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
 904      case 2:
 905         return "ld1\\t{%0.<Vetype>}[%p2], %1";
 906      default:
 907         gcc_unreachable ();
 908      }
 909   }
 910   [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
 911 )
 912
 913 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
 914   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
 915         (vec_merge:VALL_F16
 916             (vec_duplicate:VALL_F16
 917               (vec_select:<VEL>
 918                 (match_operand:VALL_F16 3 "register_operand" "w")
 919                 (parallel
 920                   [(match_operand:SI 4 "immediate_operand" "i")])))
 921             (match_operand:VALL_F16 1 "register_operand" "0")
 922             (match_operand:SI 2 "immediate_operand" "i")))]
 923   "TARGET_SIMD"
 924   {
 925     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 926     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
 927     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
 928
 929     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
 930   }
 931   [(set_attr "type" "neon_ins<q>")]
 932 )
 933
 934 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
 935   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
 936         (vec_merge:VALL_F16_NO_V2Q
 937             (vec_duplicate:VALL_F16_NO_V2Q
 938               (vec_select:<VEL>
 939                 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
 940                 (parallel
 941                   [(match_operand:SI 4 "immediate_operand" "i")])))
 942             (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
 943             (match_operand:SI 2 "immediate_operand" "i")))]
 944   "TARGET_SIMD"
 945   {
 946     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 947     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
 948     operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
 949                                            INTVAL (operands[4]));
 950
 951     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
 952   }
 953   [(set_attr "type" "neon_ins<q>")]
 954 )
 955
 956 (define_expand "signbit<mode>2"
 957   [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
 958    (use (match_operand:VDQSF 1 "register_operand"))]
 959   "TARGET_SIMD"
 960 {
 961   int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
 962   rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 963                                                         shift_amount);
 964   operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
 965
 966   emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
 967                                                  shift_vector));
 968   DONE;
 969 })
 970
 971 (define_insn "aarch64_simd_lshr<mode>"
 972  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 973        (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 974                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
 975  "TARGET_SIMD"
 976  "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
 977   [(set_attr "type" "neon_shift_imm<q>")]
 978 )
 979
 980 (define_insn "aarch64_simd_ashr<mode>"
 981  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 982        (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 983                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
 984  "TARGET_SIMD"
 985  "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
 986   [(set_attr "type" "neon_shift_imm<q>")]
 987 )
 988
 989 (define_insn "aarch64_simd_imm_shl<mode>"
 990  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 991        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 992                    (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm" "Dl")))]
 993  "TARGET_SIMD"
 994   "shl\t%0.<Vtype>, %1.<Vtype>, %2"
 995   [(set_attr "type" "neon_shift_imm<q>")]
 996 )
 997
 998 (define_insn "aarch64_simd_reg_sshl<mode>"
 999  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1000        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1001                    (match_operand:VDQ_I 2 "register_operand" "w")))]
1002  "TARGET_SIMD"
1003  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1004   [(set_attr "type" "neon_shift_reg<q>")]
1005 )
1006
1007 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
1008  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1009        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1010                     (match_operand:VDQ_I 2 "register_operand" "w")]
1011                    UNSPEC_ASHIFT_UNSIGNED))]
1012  "TARGET_SIMD"
1013  "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1014   [(set_attr "type" "neon_shift_reg<q>")]
1015 )
1016
1017 (define_insn "aarch64_simd_reg_shl<mode>_signed"
1018  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1019        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1020                     (match_operand:VDQ_I 2 "register_operand" "w")]
1021                    UNSPEC_ASHIFT_SIGNED))]
1022  "TARGET_SIMD"
1023  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1024   [(set_attr "type" "neon_shift_reg<q>")]
1025 )
1026
1027 (define_expand "ashl<mode>3"
1028   [(match_operand:VDQ_I 0 "register_operand")
1029    (match_operand:VDQ_I 1 "register_operand")
1030    (match_operand:SI  2 "general_operand")]
1031  "TARGET_SIMD"
1032 {
1033   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1034   int shift_amount;
1035
1036   if (CONST_INT_P (operands[2]))
1037     {
1038       shift_amount = INTVAL (operands[2]);
1039       if (shift_amount >= 0 && shift_amount < bit_width)
1040         {
1041           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1042                                                        shift_amount);
1043           emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
1044                                                      operands[1],
1045                                                      tmp));
1046           DONE;
1047         }
1048       else
1049         {
1050           operands[2] = force_reg (SImode, operands[2]);
1051         }
1052     }
1053   else if (MEM_P (operands[2]))
1054     {
1055       operands[2] = force_reg (SImode, operands[2]);
1056     }
1057
1058   if (REG_P (operands[2]))
1059     {
1060       rtx tmp = gen_reg_rtx (<MODE>mode);
1061       emit_insn (gen_aarch64_simd_dup<mode> (tmp,
1062                                              convert_to_mode (<VEL>mode,
1063                                                               operands[2],
1064                                                               0)));
1065       emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1066                                                   tmp));
1067       DONE;
1068     }
1069   else
1070     FAIL;
1071 }
1072 )
1073
1074 (define_expand "lshr<mode>3"
1075   [(match_operand:VDQ_I 0 "register_operand")
1076    (match_operand:VDQ_I 1 "register_operand")
1077    (match_operand:SI  2 "general_operand")]
1078  "TARGET_SIMD"
1079 {
1080   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1081   int shift_amount;
1082
1083   if (CONST_INT_P (operands[2]))
1084     {
1085       shift_amount = INTVAL (operands[2]);
1086       if (shift_amount > 0 && shift_amount <= bit_width)
1087         {
1088           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1089                                                        shift_amount);
1090           emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
1091                                                   operands[1],
1092                                                   tmp));
1093           DONE;
1094         }
1095       else
1096         operands[2] = force_reg (SImode, operands[2]);
1097     }
1098   else if (MEM_P (operands[2]))
1099     {
1100       operands[2] = force_reg (SImode, operands[2]);
1101     }
1102
1103   if (REG_P (operands[2]))
1104     {
1105       rtx tmp = gen_reg_rtx (SImode);
1106       rtx tmp1 = gen_reg_rtx (<MODE>mode);
1107       emit_insn (gen_negsi2 (tmp, operands[2]));
1108       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1109                                              convert_to_mode (<VEL>mode,
1110                                                               tmp, 0)));
1111       emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
1112                                                           operands[1],
1113                                                           tmp1));
1114       DONE;
1115     }
1116   else
1117     FAIL;
1118 }
1119 )
1120
1121 (define_expand "ashr<mode>3"
1122   [(match_operand:VDQ_I 0 "register_operand")
1123    (match_operand:VDQ_I 1 "register_operand")
1124    (match_operand:SI  2 "general_operand")]
1125  "TARGET_SIMD"
1126 {
1127   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1128   int shift_amount;
1129
1130   if (CONST_INT_P (operands[2]))
1131     {
1132       shift_amount = INTVAL (operands[2]);
1133       if (shift_amount > 0 && shift_amount <= bit_width)
1134         {
1135           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1136                                                        shift_amount);
1137           emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1138                                                   operands[1],
1139                                                   tmp));
1140           DONE;
1141         }
1142       else
1143         operands[2] = force_reg (SImode, operands[2]);
1144     }
1145   else if (MEM_P (operands[2]))
1146     {
1147       operands[2] = force_reg (SImode, operands[2]);
1148     }
1149
1150   if (REG_P (operands[2]))
1151     {
1152       rtx tmp = gen_reg_rtx (SImode);
1153       rtx tmp1 = gen_reg_rtx (<MODE>mode);
1154       emit_insn (gen_negsi2 (tmp, operands[2]));
1155       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1156                                              convert_to_mode (<VEL>mode,
1157                                                               tmp, 0)));
1158       emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
1159                                                         operands[1],
1160                                                         tmp1));
1161       DONE;
1162     }
1163   else
1164     FAIL;
1165 }
1166 )
1167
1168 (define_expand "vashl<mode>3"
1169  [(match_operand:VDQ_I 0 "register_operand")
1170   (match_operand:VDQ_I 1 "register_operand")
1171   (match_operand:VDQ_I 2 "register_operand")]
1172  "TARGET_SIMD"
1173 {
1174   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1175                                               operands[2]));
1176   DONE;
1177 })
1178
1179 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1180 ;; Negating individual lanes most certainly offsets the
1181 ;; gain from vectorization.
1182 (define_expand "vashr<mode>3"
1183  [(match_operand:VDQ_BHSI 0 "register_operand")
1184   (match_operand:VDQ_BHSI 1 "register_operand")
1185   (match_operand:VDQ_BHSI 2 "register_operand")]
1186  "TARGET_SIMD"
1187 {
1188   rtx neg = gen_reg_rtx (<MODE>mode);
1189   emit (gen_neg<mode>2 (neg, operands[2]));
1190   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1191                                                     neg));
1192   DONE;
1193 })
1194
1195 ;; DI vector shift
1196 (define_expand "aarch64_ashr_simddi"
1197   [(match_operand:DI 0 "register_operand")
1198    (match_operand:DI 1 "register_operand")
1199    (match_operand:SI 2 "aarch64_shift_imm64_di")]
1200   "TARGET_SIMD"
1201   {
1202     /* An arithmetic shift right by 64 fills the result with copies of the sign
1203        bit, just like asr by 63 - however the standard pattern does not handle
1204        a shift by 64.  */
1205     if (INTVAL (operands[2]) == 64)
1206       operands[2] = GEN_INT (63);
1207     emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1208     DONE;
1209   }
1210 )
1211
1212 (define_expand "vlshr<mode>3"
1213  [(match_operand:VDQ_BHSI 0 "register_operand")
1214   (match_operand:VDQ_BHSI 1 "register_operand")
1215   (match_operand:VDQ_BHSI 2 "register_operand")]
1216  "TARGET_SIMD"
1217 {
1218   rtx neg = gen_reg_rtx (<MODE>mode);
1219   emit (gen_neg<mode>2 (neg, operands[2]));
1220   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1221                                                       neg));
1222   DONE;
1223 })
1224
1225 (define_expand "aarch64_lshr_simddi"
1226   [(match_operand:DI 0 "register_operand")
1227    (match_operand:DI 1 "register_operand")
1228    (match_operand:SI 2 "aarch64_shift_imm64_di")]
1229   "TARGET_SIMD"
1230   {
1231     if (INTVAL (operands[2]) == 64)
1232       emit_move_insn (operands[0], const0_rtx);
1233     else
1234       emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1235     DONE;
1236   }
1237 )
1238
1239 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1240 (define_insn "vec_shr_<mode>"
1241   [(set (match_operand:VD 0 "register_operand" "=w")
1242         (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1243                     (match_operand:SI 2 "immediate_operand" "i")]
1244                    UNSPEC_VEC_SHR))]
1245   "TARGET_SIMD"
1246   {
1247     if (BYTES_BIG_ENDIAN)
1248       return "shl %d0, %d1, %2";
1249     else
1250       return "ushr %d0, %d1, %2";
1251   }
1252   [(set_attr "type" "neon_shift_imm")]
1253 )
1254
1255 (define_expand "vec_set<mode>"
1256   [(match_operand:VALL_F16 0 "register_operand")
1257    (match_operand:<VEL> 1 "register_operand")
1258    (match_operand:SI 2 "immediate_operand")]
1259   "TARGET_SIMD"
1260   {
1261     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1262     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1263                                           GEN_INT (elem), operands[0]));
1264     DONE;
1265   }
1266 )
1267
1268
1269 (define_insn "aarch64_mla<mode>"
1270  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1271        (plus:VDQ_BHSI (mult:VDQ_BHSI
1272                         (match_operand:VDQ_BHSI 2 "register_operand" "w")
1273                         (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1274                       (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1275  "TARGET_SIMD"
1276  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1277   [(set_attr "type" "neon_mla_<Vetype><q>")]
1278 )
1279
1280 (define_insn "*aarch64_mla_elt<mode>"
1281  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1282        (plus:VDQHS
1283          (mult:VDQHS
1284            (vec_duplicate:VDQHS
1285               (vec_select:<VEL>
1286                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1287                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1288            (match_operand:VDQHS 3 "register_operand" "w"))
1289          (match_operand:VDQHS 4 "register_operand" "0")))]
1290  "TARGET_SIMD"
1291   {
1292     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1293     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1294   }
1295   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1296 )
1297
1298 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1299  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1300        (plus:VDQHS
1301          (mult:VDQHS
1302            (vec_duplicate:VDQHS
1303               (vec_select:<VEL>
1304                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1305                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1306            (match_operand:VDQHS 3 "register_operand" "w"))
1307          (match_operand:VDQHS 4 "register_operand" "0")))]
1308  "TARGET_SIMD"
1309   {
1310     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1311     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1312   }
1313   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1314 )
1315
1316 (define_insn "*aarch64_mla_elt_merge<mode>"
1317   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1318         (plus:VDQHS
1319           (mult:VDQHS (vec_duplicate:VDQHS
1320                   (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1321                 (match_operand:VDQHS 2 "register_operand" "w"))
1322           (match_operand:VDQHS 3 "register_operand" "0")))]
1323  "TARGET_SIMD"
1324  "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1325   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1326 )
1327
1328 (define_insn "aarch64_mls<mode>"
1329  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1330        (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1331                    (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1332                               (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1333  "TARGET_SIMD"
1334  "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1335   [(set_attr "type" "neon_mla_<Vetype><q>")]
1336 )
1337
1338 (define_insn "*aarch64_mls_elt<mode>"
1339  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1340        (minus:VDQHS
1341          (match_operand:VDQHS 4 "register_operand" "0")
1342          (mult:VDQHS
1343            (vec_duplicate:VDQHS
1344               (vec_select:<VEL>
1345                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1346                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1347            (match_operand:VDQHS 3 "register_operand" "w"))))]
1348  "TARGET_SIMD"
1349   {
1350     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1351     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1352   }
1353   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1354 )
1355
1356 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1357  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1358        (minus:VDQHS
1359          (match_operand:VDQHS 4 "register_operand" "0")
1360          (mult:VDQHS
1361            (vec_duplicate:VDQHS
1362               (vec_select:<VEL>
1363                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1364                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1365            (match_operand:VDQHS 3 "register_operand" "w"))))]
1366  "TARGET_SIMD"
1367   {
1368     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1369     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1370   }
1371   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1372 )
1373
1374 (define_insn "*aarch64_mls_elt_merge<mode>"
1375   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1376         (minus:VDQHS
1377           (match_operand:VDQHS 1 "register_operand" "0")
1378           (mult:VDQHS (vec_duplicate:VDQHS
1379                   (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1380                 (match_operand:VDQHS 3 "register_operand" "w"))))]
1381   "TARGET_SIMD"
1382   "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1383   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1384 )
1385
1386 ;; Max/Min operations.
1387 (define_insn "<su><maxmin><mode>3"
1388  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1389        (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1390                     (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1391  "TARGET_SIMD"
1392  "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1393   [(set_attr "type" "neon_minmax<q>")]
1394 )
1395
1396 (define_expand "<su><maxmin>v2di3"
1397  [(set (match_operand:V2DI 0 "register_operand")
1398        (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand")
1399                     (match_operand:V2DI 2 "register_operand")))]
1400  "TARGET_SIMD"
1401 {
1402   enum rtx_code cmp_operator;
1403   rtx cmp_fmt;
1404
1405   switch (<CODE>)
1406     {
1407     case UMIN:
1408       cmp_operator = LTU;
1409       break;
1410     case SMIN:
1411       cmp_operator = LT;
1412       break;
1413     case UMAX:
1414       cmp_operator = GTU;
1415       break;
1416     case SMAX:
1417       cmp_operator = GT;
1418       break;
1419     default:
1420       gcc_unreachable ();
1421     }
1422
1423   cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1424   emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1425               operands[2], cmp_fmt, operands[1], operands[2]));
1426   DONE;
1427 })
1428
1429 ;; Pairwise Integer Max/Min operations.
1430 (define_insn "aarch64_<maxmin_uns>p<mode>"
1431  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1432        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1433                          (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1434                         MAXMINV))]
1435  "TARGET_SIMD"
1436  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1437   [(set_attr "type" "neon_minmax<q>")]
1438 )
1439
1440 ;; Pairwise FP Max/Min operations.
1441 (define_insn "aarch64_<maxmin_uns>p<mode>"
1442  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1443        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1444                       (match_operand:VHSDF 2 "register_operand" "w")]
1445                       FMAXMINV))]
1446  "TARGET_SIMD"
1447  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1448   [(set_attr "type" "neon_minmax<q>")]
1449 )
1450
1451 ;; vec_concat gives a new vector with the low elements from operand 1, and
1452 ;; the high elements from operand 2.  That is to say, given op1 = { a, b }
1453 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1454 ;; What that means, is that the RTL descriptions of the below patterns
1455 ;; need to change depending on endianness.
1456
1457 ;; Move to the low architectural bits of the register.
1458 ;; On little-endian this is { operand, zeroes }
1459 ;; On big-endian this is { zeroes, operand }
1460
1461 (define_insn "move_lo_quad_internal_<mode>"
1462   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1463         (vec_concat:VQ_NO2E
1464           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1465           (vec_duplicate:<VHALF> (const_int 0))))]
1466   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1467   "@
1468    dup\\t%d0, %1.d[0]
1469    fmov\\t%d0, %1
1470    dup\\t%d0, %1"
1471   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1472    (set_attr "length" "4")
1473    (set_attr "arch" "simd,fp,simd")]
1474 )
1475
1476 (define_insn "move_lo_quad_internal_<mode>"
1477   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1478         (vec_concat:VQ_2E
1479           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1480           (const_int 0)))]
1481   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1482   "@
1483    dup\\t%d0, %1.d[0]
1484    fmov\\t%d0, %1
1485    dup\\t%d0, %1"
1486   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1487    (set_attr "length" "4")
1488    (set_attr "arch" "simd,fp,simd")]
1489 )
1490
1491 (define_insn "move_lo_quad_internal_be_<mode>"
1492   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1493         (vec_concat:VQ_NO2E
1494           (vec_duplicate:<VHALF> (const_int 0))
1495           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1496   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1497   "@
1498    dup\\t%d0, %1.d[0]
1499    fmov\\t%d0, %1
1500    dup\\t%d0, %1"
1501   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1502    (set_attr "length" "4")
1503    (set_attr "arch" "simd,fp,simd")]
1504 )
1505
1506 (define_insn "move_lo_quad_internal_be_<mode>"
1507   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1508         (vec_concat:VQ_2E
1509           (const_int 0)
1510           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1511   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1512   "@
1513    dup\\t%d0, %1.d[0]
1514    fmov\\t%d0, %1
1515    dup\\t%d0, %1"
1516   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1517    (set_attr "length" "4")
1518    (set_attr "arch" "simd,fp,simd")]
1519 )
1520
1521 (define_expand "move_lo_quad_<mode>"
1522   [(match_operand:VQ 0 "register_operand")
1523    (match_operand:VQ 1 "register_operand")]
1524   "TARGET_SIMD"
1525 {
1526   if (BYTES_BIG_ENDIAN)
1527     emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1528   else
1529     emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1530   DONE;
1531 }
1532 )
1533
1534 ;; Move operand1 to the high architectural bits of the register, keeping
1535 ;; the low architectural bits of operand2.
1536 ;; For little-endian this is { operand2, operand1 }
1537 ;; For big-endian this is { operand1, operand2 }
1538
1539 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1540   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1541         (vec_concat:VQ
1542           (vec_select:<VHALF>
1543                 (match_dup 0)
1544                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1545           (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1546   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1547   "@
1548    ins\\t%0.d[1], %1.d[0]
1549    ins\\t%0.d[1], %1"
1550   [(set_attr "type" "neon_ins")]
1551 )
1552
1553 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1554   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1555         (vec_concat:VQ
1556           (match_operand:<VHALF> 1 "register_operand" "w,r")
1557           (vec_select:<VHALF>
1558                 (match_dup 0)
1559                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1560   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1561   "@
1562    ins\\t%0.d[1], %1.d[0]
1563    ins\\t%0.d[1], %1"
1564   [(set_attr "type" "neon_ins")]
1565 )
1566
1567 (define_expand "move_hi_quad_<mode>"
1568  [(match_operand:VQ 0 "register_operand")
1569   (match_operand:<VHALF> 1 "register_operand")]
1570  "TARGET_SIMD"
1571 {
1572   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1573   if (BYTES_BIG_ENDIAN)
1574     emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1575                     operands[1], p));
1576   else
1577     emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1578                     operands[1], p));
1579   DONE;
1580 })
1581
1582 ;; Narrowing operations.
1583
1584 ;; For doubles.
1585 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1586  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1587        (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1588  "TARGET_SIMD"
1589  "xtn\\t%0.<Vntype>, %1.<Vtype>"
1590   [(set_attr "type" "neon_shift_imm_narrow_q")]
1591 )
1592
1593 (define_expand "vec_pack_trunc_<mode>"
1594  [(match_operand:<VNARROWD> 0 "register_operand")
1595   (match_operand:VDN 1 "register_operand")
1596   (match_operand:VDN 2 "register_operand")]
1597  "TARGET_SIMD"
1598 {
1599   rtx tempreg = gen_reg_rtx (<VDBL>mode);
1600   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1601   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1602
1603   emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1604   emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1605   emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1606   DONE;
1607 })
1608
1609 ;; For quads.
1610
1611 (define_insn "vec_pack_trunc_<mode>"
1612  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1613        (vec_concat:<VNARROWQ2>
1614          (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1615          (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1616  "TARGET_SIMD"
1617  {
1618    if (BYTES_BIG_ENDIAN)
1619      return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1620    else
1621      return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1622  }
1623   [(set_attr "type" "multiple")
1624    (set_attr "length" "8")]
1625 )
1626
1627 ;; Widening operations.
1628
1629 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1630   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1631         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1632                                (match_operand:VQW 1 "register_operand" "w")
1633                                (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1634                             )))]
1635   "TARGET_SIMD"
1636   "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1637   [(set_attr "type" "neon_shift_imm_long")]
1638 )
1639
1640 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1641   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1642         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1643                                (match_operand:VQW 1 "register_operand" "w")
1644                                (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1645                             )))]
1646   "TARGET_SIMD"
1647   "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1648   [(set_attr "type" "neon_shift_imm_long")]
1649 )
1650
1651 (define_expand "vec_unpack<su>_hi_<mode>"
1652   [(match_operand:<VWIDE> 0 "register_operand")
1653    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1654   "TARGET_SIMD"
1655   {
1656     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1657     emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1658                                                           operands[1], p));
1659     DONE;
1660   }
1661 )
1662
1663 (define_expand "vec_unpack<su>_lo_<mode>"
1664   [(match_operand:<VWIDE> 0 "register_operand")
1665    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1666   "TARGET_SIMD"
1667   {
1668     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1669     emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1670                                                           operands[1], p));
1671     DONE;
1672   }
1673 )
1674
1675 ;; Widening arithmetic.
1676
1677 (define_insn "*aarch64_<su>mlal_lo<mode>"
1678   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1679         (plus:<VWIDE>
1680           (mult:<VWIDE>
1681               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1682                  (match_operand:VQW 2 "register_operand" "w")
1683                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1684               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1685                  (match_operand:VQW 4 "register_operand" "w")
1686                  (match_dup 3))))
1687           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1688   "TARGET_SIMD"
1689   "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1690   [(set_attr "type" "neon_mla_<Vetype>_long")]
1691 )
1692
1693 (define_insn "*aarch64_<su>mlal_hi<mode>"
1694   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1695         (plus:<VWIDE>
1696           (mult:<VWIDE>
1697               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1698                  (match_operand:VQW 2 "register_operand" "w")
1699                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1700               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1701                  (match_operand:VQW 4 "register_operand" "w")
1702                  (match_dup 3))))
1703           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1704   "TARGET_SIMD"
1705   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1706   [(set_attr "type" "neon_mla_<Vetype>_long")]
1707 )
1708
1709 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1710   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1711         (minus:<VWIDE>
1712           (match_operand:<VWIDE> 1 "register_operand" "0")
1713           (mult:<VWIDE>
1714               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1715                  (match_operand:VQW 2 "register_operand" "w")
1716                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1717               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1718                  (match_operand:VQW 4 "register_operand" "w")
1719                  (match_dup 3))))))]
1720   "TARGET_SIMD"
1721   "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1722   [(set_attr "type" "neon_mla_<Vetype>_long")]
1723 )
1724
1725 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1726   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1727         (minus:<VWIDE>
1728           (match_operand:<VWIDE> 1 "register_operand" "0")
1729           (mult:<VWIDE>
1730               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1731                  (match_operand:VQW 2 "register_operand" "w")
1732                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1733               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1734                  (match_operand:VQW 4 "register_operand" "w")
1735                  (match_dup 3))))))]
1736   "TARGET_SIMD"
1737   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1738   [(set_attr "type" "neon_mla_<Vetype>_long")]
1739 )
1740
1741 (define_insn "*aarch64_<su>mlal<mode>"
1742   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1743         (plus:<VWIDE>
1744           (mult:<VWIDE>
1745             (ANY_EXTEND:<VWIDE>
1746               (match_operand:VD_BHSI 1 "register_operand" "w"))
1747             (ANY_EXTEND:<VWIDE>
1748               (match_operand:VD_BHSI 2 "register_operand" "w")))
1749           (match_operand:<VWIDE> 3 "register_operand" "0")))]
1750   "TARGET_SIMD"
1751   "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1752   [(set_attr "type" "neon_mla_<Vetype>_long")]
1753 )
1754
1755 (define_insn "*aarch64_<su>mlsl<mode>"
1756   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1757         (minus:<VWIDE>
1758           (match_operand:<VWIDE> 1 "register_operand" "0")
1759           (mult:<VWIDE>
1760             (ANY_EXTEND:<VWIDE>
1761               (match_operand:VD_BHSI 2 "register_operand" "w"))
1762             (ANY_EXTEND:<VWIDE>
1763               (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1764   "TARGET_SIMD"
1765   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1766   [(set_attr "type" "neon_mla_<Vetype>_long")]
1767 )
1768
1769 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1770  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1771        (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1772                            (match_operand:VQW 1 "register_operand" "w")
1773                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1774                      (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1775                            (match_operand:VQW 2 "register_operand" "w")
1776                            (match_dup 3)))))]
1777   "TARGET_SIMD"
1778   "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1779   [(set_attr "type" "neon_mul_<Vetype>_long")]
1780 )
1781
1782 (define_expand "vec_widen_<su>mult_lo_<mode>"
1783   [(match_operand:<VWIDE> 0 "register_operand")
1784    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
1785    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
1786  "TARGET_SIMD"
1787  {
1788    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1789    emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1790                                                        operands[1],
1791                                                        operands[2], p));
1792    DONE;
1793  }
1794 )
1795
1796 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1797  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1798       (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1799                             (match_operand:VQW 1 "register_operand" "w")
1800                             (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1801                     (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1802                             (match_operand:VQW 2 "register_operand" "w")
1803                             (match_dup 3)))))]
1804   "TARGET_SIMD"
1805   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1806   [(set_attr "type" "neon_mul_<Vetype>_long")]
1807 )
1808
1809 (define_expand "vec_widen_<su>mult_hi_<mode>"
1810   [(match_operand:<VWIDE> 0 "register_operand")
1811    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
1812    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
1813  "TARGET_SIMD"
1814  {
1815    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1816    emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1817                                                        operands[1],
1818                                                        operands[2], p));
1819    DONE;
1820
1821  }
1822 )
1823
1824 ;; FP vector operations.
1825 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1826 ;; double-precision (64-bit) floating-point data types and arithmetic as
1827 ;; defined by the IEEE 754-2008 standard.  This makes them vectorizable
1828 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1829 ;;
1830 ;; Floating-point operations can raise an exception.  Vectorizing such
1831 ;; operations are safe because of reasons explained below.
1832 ;;
1833 ;; ARMv8 permits an extension to enable trapped floating-point
1834 ;; exception handling, however this is an optional feature.  In the
1835 ;; event of a floating-point exception being raised by vectorised
1836 ;; code then:
1837 ;; 1.  If trapped floating-point exceptions are available, then a trap
1838 ;;     will be taken when any lane raises an enabled exception.  A trap
1839 ;;     handler may determine which lane raised the exception.
1840 ;; 2.  Alternatively a sticky exception flag is set in the
1841 ;;     floating-point status register (FPSR).  Software may explicitly
1842 ;;     test the exception flags, in which case the tests will either
1843 ;;     prevent vectorisation, allowing precise identification of the
1844 ;;     failing operation, or if tested outside of vectorisable regions
1845 ;;     then the specific operation and lane are not of interest.
1846
1847 ;; FP arithmetic operations.
1848
1849 (define_insn "add<mode>3"
1850  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1851        (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1852                    (match_operand:VHSDF 2 "register_operand" "w")))]
1853  "TARGET_SIMD"
1854  "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1855   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1856 )
1857
1858 (define_insn "sub<mode>3"
1859  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1860        (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1861                     (match_operand:VHSDF 2 "register_operand" "w")))]
1862  "TARGET_SIMD"
1863  "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1864   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1865 )
1866
1867 (define_insn "mul<mode>3"
1868  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1869        (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1870                    (match_operand:VHSDF 2 "register_operand" "w")))]
1871  "TARGET_SIMD"
1872  "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1873   [(set_attr "type" "neon_fp_mul_<stype><q>")]
1874 )
1875
1876 (define_expand "div<mode>3"
1877  [(set (match_operand:VHSDF 0 "register_operand")
1878        (div:VHSDF (match_operand:VHSDF 1 "register_operand")
1879                   (match_operand:VHSDF 2 "register_operand")))]
1880  "TARGET_SIMD"
1881 {
1882   if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1883     DONE;
1884
1885   operands[1] = force_reg (<MODE>mode, operands[1]);
1886 })
1887
1888 (define_insn "*div<mode>3"
1889  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1890        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1891                  (match_operand:VHSDF 2 "register_operand" "w")))]
1892  "TARGET_SIMD"
1893  "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1894   [(set_attr "type" "neon_fp_div_<stype><q>")]
1895 )
1896
1897 (define_insn "neg<mode>2"
1898  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1899        (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1900  "TARGET_SIMD"
1901  "fneg\\t%0.<Vtype>, %1.<Vtype>"
1902   [(set_attr "type" "neon_fp_neg_<stype><q>")]
1903 )
1904
1905 (define_insn "abs<mode>2"
1906  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1907        (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1908  "TARGET_SIMD"
1909  "fabs\\t%0.<Vtype>, %1.<Vtype>"
1910   [(set_attr "type" "neon_fp_abs_<stype><q>")]
1911 )
1912
1913 (define_insn "fma<mode>4"
1914   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1915        (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1916                   (match_operand:VHSDF 2 "register_operand" "w")
1917                   (match_operand:VHSDF 3 "register_operand" "0")))]
1918   "TARGET_SIMD"
1919  "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1920   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1921 )
1922
1923 (define_insn "*aarch64_fma4_elt<mode>"
1924   [(set (match_operand:VDQF 0 "register_operand" "=w")
1925     (fma:VDQF
1926       (vec_duplicate:VDQF
1927         (vec_select:<VEL>
1928           (match_operand:VDQF 1 "register_operand" "<h_con>")
1929           (parallel [(match_operand:SI 2 "immediate_operand")])))
1930       (match_operand:VDQF 3 "register_operand" "w")
1931       (match_operand:VDQF 4 "register_operand" "0")))]
1932   "TARGET_SIMD"
1933   {
1934     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1935     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1936   }
1937   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1938 )
1939
1940 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1941   [(set (match_operand:VDQSF 0 "register_operand" "=w")
1942     (fma:VDQSF
1943       (vec_duplicate:VDQSF
1944         (vec_select:<VEL>
1945           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1946           (parallel [(match_operand:SI 2 "immediate_operand")])))
1947       (match_operand:VDQSF 3 "register_operand" "w")
1948       (match_operand:VDQSF 4 "register_operand" "0")))]
1949   "TARGET_SIMD"
1950   {
1951     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1952     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1953   }
1954   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1955 )
1956
1957 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1958   [(set (match_operand:VMUL 0 "register_operand" "=w")
1959     (fma:VMUL
1960       (vec_duplicate:VMUL
1961           (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1962       (match_operand:VMUL 2 "register_operand" "w")
1963       (match_operand:VMUL 3 "register_operand" "0")))]
1964   "TARGET_SIMD"
1965   "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1966   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1967 )
1968
1969 (define_insn "*aarch64_fma4_elt_to_64v2df"
1970   [(set (match_operand:DF 0 "register_operand" "=w")
1971     (fma:DF
1972         (vec_select:DF
1973           (match_operand:V2DF 1 "register_operand" "w")
1974           (parallel [(match_operand:SI 2 "immediate_operand")]))
1975       (match_operand:DF 3 "register_operand" "w")
1976       (match_operand:DF 4 "register_operand" "0")))]
1977   "TARGET_SIMD"
1978   {
1979     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1980     return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1981   }
1982   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1983 )
1984
1985 (define_insn "fnma<mode>4"
1986   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1987         (fma:VHSDF
1988           (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1989           (match_operand:VHSDF 2 "register_operand" "w")
1990           (match_operand:VHSDF 3 "register_operand" "0")))]
1991   "TARGET_SIMD"
1992   "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1993   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1994 )
1995
1996 (define_insn "*aarch64_fnma4_elt<mode>"
1997   [(set (match_operand:VDQF 0 "register_operand" "=w")
1998     (fma:VDQF
1999       (neg:VDQF
2000         (match_operand:VDQF 3 "register_operand" "w"))
2001       (vec_duplicate:VDQF
2002         (vec_select:<VEL>
2003           (match_operand:VDQF 1 "register_operand" "<h_con>")
2004           (parallel [(match_operand:SI 2 "immediate_operand")])))
2005       (match_operand:VDQF 4 "register_operand" "0")))]
2006   "TARGET_SIMD"
2007   {
2008     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2009     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
2010   }
2011   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2012 )
2013
2014 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
2015   [(set (match_operand:VDQSF 0 "register_operand" "=w")
2016     (fma:VDQSF
2017       (neg:VDQSF
2018         (match_operand:VDQSF 3 "register_operand" "w"))
2019       (vec_duplicate:VDQSF
2020         (vec_select:<VEL>
2021           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2022           (parallel [(match_operand:SI 2 "immediate_operand")])))
2023       (match_operand:VDQSF 4 "register_operand" "0")))]
2024   "TARGET_SIMD"
2025   {
2026     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2027     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
2028   }
2029   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2030 )
2031
2032 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
2033   [(set (match_operand:VMUL 0 "register_operand" "=w")
2034     (fma:VMUL
2035       (neg:VMUL
2036         (match_operand:VMUL 2 "register_operand" "w"))
2037       (vec_duplicate:VMUL
2038         (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2039       (match_operand:VMUL 3 "register_operand" "0")))]
2040   "TARGET_SIMD"
2041   "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2042   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2043 )
2044
2045 (define_insn "*aarch64_fnma4_elt_to_64v2df"
2046   [(set (match_operand:DF 0 "register_operand" "=w")
2047     (fma:DF
2048       (vec_select:DF
2049         (match_operand:V2DF 1 "register_operand" "w")
2050         (parallel [(match_operand:SI 2 "immediate_operand")]))
2051       (neg:DF
2052         (match_operand:DF 3 "register_operand" "w"))
2053       (match_operand:DF 4 "register_operand" "0")))]
2054   "TARGET_SIMD"
2055   {
2056     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2057     return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
2058   }
2059   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2060 )
2061
2062 ;; Vector versions of the floating-point frint patterns.
2063 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
2064 (define_insn "<frint_pattern><mode>2"
2065   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2066         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2067                        FRINT))]
2068   "TARGET_SIMD"
2069   "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
2070   [(set_attr "type" "neon_fp_round_<stype><q>")]
2071 )
2072
2073 ;; Vector versions of the fcvt standard patterns.
2074 ;; Expands to lbtrunc, lround, lceil, lfloor
2075 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
2076   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2077         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2078                                [(match_operand:VHSDF 1 "register_operand" "w")]
2079                                FCVT)))]
2080   "TARGET_SIMD"
2081   "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
2082   [(set_attr "type" "neon_fp_to_int_<stype><q>")]
2083 )
2084
2085 ;; HF Scalar variants of related SIMD instructions.
2086 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
2087   [(set (match_operand:HI 0 "register_operand" "=w")
2088         (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
2089                       FCVT)))]
2090   "TARGET_SIMD_F16INST"
2091   "fcvt<frint_suffix><su>\t%h0, %h1"
2092   [(set_attr "type" "neon_fp_to_int_s")]
2093 )
2094
2095 (define_insn "<optab>_trunchfhi2"
2096   [(set (match_operand:HI 0 "register_operand" "=w")
2097         (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
2098   "TARGET_SIMD_F16INST"
2099   "fcvtz<su>\t%h0, %h1"
2100   [(set_attr "type" "neon_fp_to_int_s")]
2101 )
2102
2103 (define_insn "<optab>hihf2"
2104   [(set (match_operand:HF 0 "register_operand" "=w")
2105         (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
2106   "TARGET_SIMD_F16INST"
2107   "<su_optab>cvtf\t%h0, %h1"
2108   [(set_attr "type" "neon_int_to_fp_s")]
2109 )
2110
2111 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
2112   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2113         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2114                                [(mult:VDQF
2115          (match_operand:VDQF 1 "register_operand" "w")
2116          (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
2117                                UNSPEC_FRINTZ)))]
2118   "TARGET_SIMD
2119    && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
2120                 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
2121   {
2122     int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
2123     char buf[64];
2124     snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
2125     output_asm_insn (buf, operands);
2126     return "";
2127   }
2128   [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
2129 )
2130
2131 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
2132   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2133         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2134                                [(match_operand:VHSDF 1 "register_operand")]
2135                                 UNSPEC_FRINTZ)))]
2136   "TARGET_SIMD"
2137   {})
2138
2139 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2140   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2141         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2142                                [(match_operand:VHSDF 1 "register_operand")]
2143                                 UNSPEC_FRINTZ)))]
2144   "TARGET_SIMD"
2145   {})
2146
2147 (define_expand "ftrunc<VHSDF:mode>2"
2148   [(set (match_operand:VHSDF 0 "register_operand")
2149         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2150                        UNSPEC_FRINTZ))]
2151   "TARGET_SIMD"
2152   {})
2153
2154 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2155   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2156         (FLOATUORS:VHSDF
2157           (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2158   "TARGET_SIMD"
2159   "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2160   [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2161 )
2162
2163 ;; Conversions between vectors of floats and doubles.
2164 ;; Contains a mix of patterns to match standard pattern names
2165 ;; and those for intrinsics.
2166
2167 ;; Float widening operations.
2168
2169 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2170   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2171         (float_extend:<VWIDE> (vec_select:<VHALF>
2172                                (match_operand:VQ_HSF 1 "register_operand" "w")
2173                                (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2174                             )))]
2175   "TARGET_SIMD"
2176   "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2177   [(set_attr "type" "neon_fp_cvt_widen_s")]
2178 )
2179
2180 ;; Convert between fixed-point and floating-point (vector modes)
2181
2182 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2183   [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2184         (unspec:<VHSDF:FCVT_TARGET>
2185           [(match_operand:VHSDF 1 "register_operand" "w")
2186            (match_operand:SI 2 "immediate_operand" "i")]
2187          FCVT_F2FIXED))]
2188   "TARGET_SIMD"
2189   "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2190   [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2191 )
2192
2193 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2194   [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2195         (unspec:<VDQ_HSDI:FCVT_TARGET>
2196           [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2197            (match_operand:SI 2 "immediate_operand" "i")]
2198          FCVT_FIXED2F))]
2199   "TARGET_SIMD"
2200   "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2201   [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2202 )
2203
2204 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2205 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2206 ;; the meaning of HI and LO changes depending on the target endianness.
2207 ;; While elsewhere we map the higher numbered elements of a vector to
2208 ;; the lower architectural lanes of the vector, for these patterns we want
2209 ;; to always treat "hi" as referring to the higher architectural lanes.
2210 ;; Consequently, while the patterns below look inconsistent with our
2211 ;; other big-endian patterns their behavior is as required.
2212
2213 (define_expand "vec_unpacks_lo_<mode>"
2214   [(match_operand:<VWIDE> 0 "register_operand")
2215    (match_operand:VQ_HSF 1 "register_operand")]
2216   "TARGET_SIMD"
2217   {
2218     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2219     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2220                                                        operands[1], p));
2221     DONE;
2222   }
2223 )
2224
2225 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2226   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2227         (float_extend:<VWIDE> (vec_select:<VHALF>
2228                                (match_operand:VQ_HSF 1 "register_operand" "w")
2229                                (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2230                             )))]
2231   "TARGET_SIMD"
2232   "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2233   [(set_attr "type" "neon_fp_cvt_widen_s")]
2234 )
2235
2236 (define_expand "vec_unpacks_hi_<mode>"
2237   [(match_operand:<VWIDE> 0 "register_operand")
2238    (match_operand:VQ_HSF 1 "register_operand")]
2239   "TARGET_SIMD"
2240   {
2241     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2242     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2243                                                        operands[1], p));
2244     DONE;
2245   }
2246 )
2247 (define_insn "aarch64_float_extend_lo_<Vwide>"
2248   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2249         (float_extend:<VWIDE>
2250           (match_operand:VDF 1 "register_operand" "w")))]
2251   "TARGET_SIMD"
2252   "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2253   [(set_attr "type" "neon_fp_cvt_widen_s")]
2254 )
2255
2256 ;; Float narrowing operations.
2257
2258 (define_insn "aarch64_float_truncate_lo_<mode>"
2259   [(set (match_operand:VDF 0 "register_operand" "=w")
2260       (float_truncate:VDF
2261         (match_operand:<VWIDE> 1 "register_operand" "w")))]
2262   "TARGET_SIMD"
2263   "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2264   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2265 )
2266
2267 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2268   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2269     (vec_concat:<VDBL>
2270       (match_operand:VDF 1 "register_operand" "0")
2271       (float_truncate:VDF
2272         (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2273   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2274   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2275   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2276 )
2277
2278 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2279   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2280     (vec_concat:<VDBL>
2281       (float_truncate:VDF
2282         (match_operand:<VWIDE> 2 "register_operand" "w"))
2283       (match_operand:VDF 1 "register_operand" "0")))]
2284   "TARGET_SIMD && BYTES_BIG_ENDIAN"
2285   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2286   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2287 )
2288
2289 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2290   [(match_operand:<VDBL> 0 "register_operand")
2291    (match_operand:VDF 1 "register_operand")
2292    (match_operand:<VWIDE> 2 "register_operand")]
2293   "TARGET_SIMD"
2294 {
2295   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2296                              ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2297                              : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2298   emit_insn (gen (operands[0], operands[1], operands[2]));
2299   DONE;
2300 }
2301 )
2302
2303 (define_expand "vec_pack_trunc_v2df"
2304   [(set (match_operand:V4SF 0 "register_operand")
2305       (vec_concat:V4SF
2306         (float_truncate:V2SF
2307             (match_operand:V2DF 1 "register_operand"))
2308         (float_truncate:V2SF
2309             (match_operand:V2DF 2 "register_operand"))
2310           ))]
2311   "TARGET_SIMD"
2312   {
2313     rtx tmp = gen_reg_rtx (V2SFmode);
2314     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2315     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2316
2317     emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2318     emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2319                                                    tmp, operands[hi]));
2320     DONE;
2321   }
2322 )
2323
2324 (define_expand "vec_pack_trunc_df"
2325   [(set (match_operand:V2SF 0 "register_operand")
2326       (vec_concat:V2SF
2327         (float_truncate:SF
2328             (match_operand:DF 1 "register_operand"))
2329         (float_truncate:SF
2330             (match_operand:DF 2 "register_operand"))
2331           ))]
2332   "TARGET_SIMD"
2333   {
2334     rtx tmp = gen_reg_rtx (V2SFmode);
2335     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2336     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2337
2338     emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2339     emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2340     emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2341     DONE;
2342   }
2343 )
2344
2345 ;; FP Max/Min
2346 ;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
2347 ;; expression like:
2348 ;;      a = (b < c) ? b : c;
2349 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
2350 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
2351 ;; -ffast-math.
2352 ;;
2353 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2354 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2355 ;; operand will be returned when both operands are zero (i.e. they may not
2356 ;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
2357 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2358 ;; NaNs.
2359
2360 (define_insn "<su><maxmin><mode>3"
2361   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2362         (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2363                        (match_operand:VHSDF 2 "register_operand" "w")))]
2364   "TARGET_SIMD"
2365   "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2366   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2367 )
2368
2369 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2370 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2371 ;; which implement the IEEE fmax ()/fmin () functions.
2372 (define_insn "<maxmin_uns><mode>3"
2373   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2374        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2375                       (match_operand:VHSDF 2 "register_operand" "w")]
2376                       FMAXMIN_UNS))]
2377   "TARGET_SIMD"
2378   "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2379   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2380 )
2381
2382 ;; 'across lanes' add.
2383
2384 (define_expand "reduc_plus_scal_<mode>"
2385   [(match_operand:<VEL> 0 "register_operand")
2386    (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand")]
2387                UNSPEC_ADDV)]
2388   "TARGET_SIMD"
2389   {
2390     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2391     rtx scratch = gen_reg_rtx (<MODE>mode);
2392     emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2393     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2394     DONE;
2395   }
2396 )
2397
2398 (define_insn "aarch64_faddp<mode>"
2399  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2400        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2401                       (match_operand:VHSDF 2 "register_operand" "w")]
2402         UNSPEC_FADDV))]
2403  "TARGET_SIMD"
2404  "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2405   [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2406 )
2407
2408 (define_insn "aarch64_reduc_plus_internal<mode>"
2409  [(set (match_operand:VDQV 0 "register_operand" "=w")
2410        (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2411                     UNSPEC_ADDV))]
2412  "TARGET_SIMD"
2413  "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2414   [(set_attr "type" "neon_reduc_add<q>")]
2415 )
2416
2417 (define_insn "aarch64_reduc_plus_internalv2si"
2418  [(set (match_operand:V2SI 0 "register_operand" "=w")
2419        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2420                     UNSPEC_ADDV))]
2421  "TARGET_SIMD"
2422  "addp\\t%0.2s, %1.2s, %1.2s"
2423   [(set_attr "type" "neon_reduc_add")]
2424 )
2425
2426 (define_insn "reduc_plus_scal_<mode>"
2427  [(set (match_operand:<VEL> 0 "register_operand" "=w")
2428        (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2429                    UNSPEC_FADDV))]
2430  "TARGET_SIMD"
2431  "faddp\\t%<Vetype>0, %1.<Vtype>"
2432   [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2433 )
2434
2435 (define_expand "reduc_plus_scal_v4sf"
2436  [(set (match_operand:SF 0 "register_operand")
2437        (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2438                     UNSPEC_FADDV))]
2439  "TARGET_SIMD"
2440 {
2441   rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2442   rtx scratch = gen_reg_rtx (V4SFmode);
2443   emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2444   emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2445   emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2446   DONE;
2447 })
2448
2449 (define_insn "clrsb<mode>2"
2450   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2451         (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2452   "TARGET_SIMD"
2453   "cls\\t%0.<Vtype>, %1.<Vtype>"
2454   [(set_attr "type" "neon_cls<q>")]
2455 )
2456
2457 (define_insn "clz<mode>2"
2458  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2459        (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2460  "TARGET_SIMD"
2461  "clz\\t%0.<Vtype>, %1.<Vtype>"
2462   [(set_attr "type" "neon_cls<q>")]
2463 )
2464
2465 (define_insn "popcount<mode>2"
2466   [(set (match_operand:VB 0 "register_operand" "=w")
2467         (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2468   "TARGET_SIMD"
2469   "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2470   [(set_attr "type" "neon_cnt<q>")]
2471 )
2472
2473 ;; 'across lanes' max and min ops.
2474
2475 ;; Template for outputting a scalar, so we can create __builtins which can be
2476 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function.  (This is FP smax/smin).
2477 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2478   [(match_operand:<VEL> 0 "register_operand")
2479    (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2480                   FMAXMINV)]
2481   "TARGET_SIMD"
2482   {
2483     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2484     rtx scratch = gen_reg_rtx (<MODE>mode);
2485     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2486                                                               operands[1]));
2487     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2488     DONE;
2489   }
2490 )
2491
2492 ;; Likewise for integer cases, signed and unsigned.
2493 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2494   [(match_operand:<VEL> 0 "register_operand")
2495    (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2496                     MAXMINV)]
2497   "TARGET_SIMD"
2498   {
2499     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2500     rtx scratch = gen_reg_rtx (<MODE>mode);
2501     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2502                                                               operands[1]));
2503     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2504     DONE;
2505   }
2506 )
2507
2508 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2509  [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2510        (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2511                     MAXMINV))]
2512  "TARGET_SIMD"
2513  "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2514   [(set_attr "type" "neon_reduc_minmax<q>")]
2515 )
2516
2517 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2518  [(set (match_operand:V2SI 0 "register_operand" "=w")
2519        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2520                     MAXMINV))]
2521  "TARGET_SIMD"
2522  "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2523   [(set_attr "type" "neon_reduc_minmax")]
2524 )
2525
2526 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2527  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2528        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2529                       FMAXMINV))]
2530  "TARGET_SIMD"
2531  "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2532   [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2533 )
2534
2535 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2536 ;; allocation.
2537 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2538 ;; to select.
2539 ;;
2540 ;; Thus our BSL is of the form:
2541 ;;   op0 = bsl (mask, op2, op3)
2542 ;; We can use any of:
2543 ;;
2544 ;;   if (op0 = mask)
2545 ;;     bsl mask, op1, op2
2546 ;;   if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2547 ;;     bit op0, op2, mask
2548 ;;   if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2549 ;;     bif op0, op1, mask
2550 ;;
2551 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2552 ;; Some forms of straight-line code may generate the equivalent form
2553 ;; in *aarch64_simd_bsl<mode>_alt.
2554
2555 (define_insn "aarch64_simd_bsl<mode>_internal"
2556   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2557         (xor:VDQ_I
2558            (and:VDQ_I
2559              (xor:VDQ_I
2560                (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2561                (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2562              (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2563           (match_dup:<V_INT_EQUIV> 3)
2564         ))]
2565   "TARGET_SIMD"
2566   "@
2567   bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2568   bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2569   bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2570   [(set_attr "type" "neon_bsl<q>")]
2571 )
2572
2573 ;; We need this form in addition to the above pattern to match the case
2574 ;; when combine tries merging three insns such that the second operand of
2575 ;; the outer XOR matches the second operand of the inner XOR rather than
2576 ;; the first.  The two are equivalent but since recog doesn't try all
2577 ;; permutations of commutative operations, we have to have a separate pattern.
2578
2579 (define_insn "*aarch64_simd_bsl<mode>_alt"
2580   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2581         (xor:VDQ_I
2582            (and:VDQ_I
2583              (xor:VDQ_I
2584                (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2585                (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2586               (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2587           (match_dup:<V_INT_EQUIV> 2)))]
2588   "TARGET_SIMD"
2589   "@
2590   bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2591   bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2592   bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2593   [(set_attr "type" "neon_bsl<q>")]
2594 )
2595
2596 ;; DImode is special, we want to avoid computing operations which are
2597 ;; more naturally computed in general purpose registers in the vector
2598 ;; registers.  If we do that, we need to move all three operands from general
2599 ;; purpose registers to vector registers, then back again.  However, we
2600 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2601 ;; optimizations based on the component operations of a BSL.
2602 ;;
2603 ;; That means we need a splitter back to the individual operations, if they
2604 ;; would be better calculated on the integer side.
2605
2606 (define_insn_and_split "aarch64_simd_bsldi_internal"
2607   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2608         (xor:DI
2609            (and:DI
2610              (xor:DI
2611                (match_operand:DI 3 "register_operand" "w,0,w,r")
2612                (match_operand:DI 2 "register_operand" "w,w,0,r"))
2613              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2614           (match_dup:DI 3)
2615         ))]
2616   "TARGET_SIMD"
2617   "@
2618   bsl\\t%0.8b, %2.8b, %3.8b
2619   bit\\t%0.8b, %2.8b, %1.8b
2620   bif\\t%0.8b, %3.8b, %1.8b
2621   #"
2622   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2623   [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2624 {
2625   /* Split back to individual operations.  If we're before reload, and
2626      able to create a temporary register, do so.  If we're after reload,
2627      we've got an early-clobber destination register, so use that.
2628      Otherwise, we can't create pseudos and we can't yet guarantee that
2629      operands[0] is safe to write, so FAIL to split.  */
2630
2631   rtx scratch;
2632   if (reload_completed)
2633     scratch = operands[0];
2634   else if (can_create_pseudo_p ())
2635     scratch = gen_reg_rtx (DImode);
2636   else
2637     FAIL;
2638
2639   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2640   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2641   emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2642   DONE;
2643 }
2644   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2645    (set_attr "length" "4,4,4,12")]
2646 )
2647
2648 (define_insn_and_split "aarch64_simd_bsldi_alt"
2649   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2650         (xor:DI
2651            (and:DI
2652              (xor:DI
2653                (match_operand:DI 3 "register_operand" "w,w,0,r")
2654                (match_operand:DI 2 "register_operand" "w,0,w,r"))
2655              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2656           (match_dup:DI 2)
2657         ))]
2658   "TARGET_SIMD"
2659   "@
2660   bsl\\t%0.8b, %3.8b, %2.8b
2661   bit\\t%0.8b, %3.8b, %1.8b
2662   bif\\t%0.8b, %2.8b, %1.8b
2663   #"
2664   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2665   [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2666 {
2667   /* Split back to individual operations.  If we're before reload, and
2668      able to create a temporary register, do so.  If we're after reload,
2669      we've got an early-clobber destination register, so use that.
2670      Otherwise, we can't create pseudos and we can't yet guarantee that
2671      operands[0] is safe to write, so FAIL to split.  */
2672
2673   rtx scratch;
2674   if (reload_completed)
2675     scratch = operands[0];
2676   else if (can_create_pseudo_p ())
2677     scratch = gen_reg_rtx (DImode);
2678   else
2679     FAIL;
2680
2681   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2682   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2683   emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2684   DONE;
2685 }
2686   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2687    (set_attr "length" "4,4,4,12")]
2688 )
2689
2690 (define_expand "aarch64_simd_bsl<mode>"
2691   [(match_operand:VALLDIF 0 "register_operand")
2692    (match_operand:<V_INT_EQUIV> 1 "register_operand")
2693    (match_operand:VALLDIF 2 "register_operand")
2694    (match_operand:VALLDIF 3 "register_operand")]
2695  "TARGET_SIMD"
2696 {
2697   /* We can't alias operands together if they have different modes.  */
2698   rtx tmp = operands[0];
2699   if (FLOAT_MODE_P (<MODE>mode))
2700     {
2701       operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2702       operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2703       tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2704     }
2705   operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2706   emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2707                                                          operands[1],
2708                                                          operands[2],
2709                                                          operands[3]));
2710   if (tmp != operands[0])
2711     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2712
2713   DONE;
2714 })
2715
2716 (define_expand "vcond_mask_<mode><v_int_equiv>"
2717   [(match_operand:VALLDI 0 "register_operand")
2718    (match_operand:VALLDI 1 "nonmemory_operand")
2719    (match_operand:VALLDI 2 "nonmemory_operand")
2720    (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2721   "TARGET_SIMD"
2722 {
2723   /* If we have (a = (P) ? -1 : 0);
2724      Then we can simply move the generated mask (result must be int).  */
2725   if (operands[1] == CONSTM1_RTX (<MODE>mode)
2726       && operands[2] == CONST0_RTX (<MODE>mode))
2727     emit_move_insn (operands[0], operands[3]);
2728   /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask.  */
2729   else if (operands[1] == CONST0_RTX (<MODE>mode)
2730            && operands[2] == CONSTM1_RTX (<MODE>mode))
2731     emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2732   else
2733     {
2734       if (!REG_P (operands[1]))
2735         operands[1] = force_reg (<MODE>mode, operands[1]);
2736       if (!REG_P (operands[2]))
2737         operands[2] = force_reg (<MODE>mode, operands[2]);
2738       emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2739                                              operands[1], operands[2]));
2740     }
2741
2742   DONE;
2743 })
2744
2745 ;; Patterns comparing two vectors to produce a mask.
2746
2747 (define_expand "vec_cmp<mode><mode>"
2748   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2749           (match_operator 1 "comparison_operator"
2750             [(match_operand:VSDQ_I_DI 2 "register_operand")
2751              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2752   "TARGET_SIMD"
2753 {
2754   rtx mask = operands[0];
2755   enum rtx_code code = GET_CODE (operands[1]);
2756
2757   switch (code)
2758     {
2759     case NE:
2760     case LE:
2761     case LT:
2762     case GE:
2763     case GT:
2764     case EQ:
2765       if (operands[3] == CONST0_RTX (<MODE>mode))
2766         break;
2767
2768       /* Fall through.  */
2769     default:
2770       if (!REG_P (operands[3]))
2771         operands[3] = force_reg (<MODE>mode, operands[3]);
2772
2773       break;
2774     }
2775
2776   switch (code)
2777     {
2778     case LT:
2779       emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2780       break;
2781
2782     case GE:
2783       emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2784       break;
2785
2786     case LE:
2787       emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2788       break;
2789
2790     case GT:
2791       emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2792       break;
2793
2794     case LTU:
2795       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2796       break;
2797
2798     case GEU:
2799       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2800       break;
2801
2802     case LEU:
2803       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2804       break;
2805
2806     case GTU:
2807       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2808       break;
2809
2810     case NE:
2811       /* Handle NE as !EQ.  */
2812       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2813       emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2814       break;
2815
2816     case EQ:
2817       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2818       break;
2819
2820     default:
2821       gcc_unreachable ();
2822     }
2823
2824   DONE;
2825 })
2826
2827 (define_expand "vec_cmp<mode><v_int_equiv>"
2828   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2829         (match_operator 1 "comparison_operator"
2830             [(match_operand:VDQF 2 "register_operand")
2831              (match_operand:VDQF 3 "nonmemory_operand")]))]
2832   "TARGET_SIMD"
2833 {
2834   int use_zero_form = 0;
2835   enum rtx_code code = GET_CODE (operands[1]);
2836   rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2837
2838   rtx (*comparison) (rtx, rtx, rtx) = NULL;
2839
2840   switch (code)
2841     {
2842     case LE:
2843     case LT:
2844     case GE:
2845     case GT:
2846     case EQ:
2847       if (operands[3] == CONST0_RTX (<MODE>mode))
2848         {
2849           use_zero_form = 1;
2850           break;
2851         }
2852       /* Fall through.  */
2853     default:
2854       if (!REG_P (operands[3]))
2855         operands[3] = force_reg (<MODE>mode, operands[3]);
2856
2857       break;
2858     }
2859
2860   switch (code)
2861     {
2862     case LT:
2863       if (use_zero_form)
2864         {
2865           comparison = gen_aarch64_cmlt<mode>;
2866           break;
2867         }
2868       /* Fall through.  */
2869     case UNLT:
2870       std::swap (operands[2], operands[3]);
2871       /* Fall through.  */
2872     case UNGT:
2873     case GT:
2874       comparison = gen_aarch64_cmgt<mode>;
2875       break;
2876     case LE:
2877       if (use_zero_form)
2878         {
2879           comparison = gen_aarch64_cmle<mode>;
2880           break;
2881         }
2882       /* Fall through.  */
2883     case UNLE:
2884       std::swap (operands[2], operands[3]);
2885       /* Fall through.  */
2886     case UNGE:
2887     case GE:
2888       comparison = gen_aarch64_cmge<mode>;
2889       break;
2890     case NE:
2891     case EQ:
2892       comparison = gen_aarch64_cmeq<mode>;
2893       break;
2894     case UNEQ:
2895     case ORDERED:
2896     case UNORDERED:
2897     case LTGT:
2898       break;
2899     default:
2900       gcc_unreachable ();
2901     }
2902
2903   switch (code)
2904     {
2905     case UNGE:
2906     case UNGT:
2907     case UNLE:
2908     case UNLT:
2909       {
2910         /* All of the above must not raise any FP exceptions.  Thus we first
2911            check each operand for NaNs and force any elements containing NaN to
2912            zero before using them in the compare.
2913            Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2914                                      (cm<cc> (isnan (a) ? 0.0 : a,
2915                                               isnan (b) ? 0.0 : b))
2916            We use the following transformations for doing the comparisions:
2917            a UNGE b -> a GE b
2918            a UNGT b -> a GT b
2919            a UNLE b -> b GE a
2920            a UNLT b -> b GT a.  */
2921
2922         rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2923         rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2924         rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2925         emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2926         emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2927         emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2928         emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2929                                           lowpart_subreg (<V_INT_EQUIV>mode,
2930                                                           operands[2],
2931                                                           <MODE>mode)));
2932         emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2933                                           lowpart_subreg (<V_INT_EQUIV>mode,
2934                                                           operands[3],
2935                                                           <MODE>mode)));
2936         gcc_assert (comparison != NULL);
2937         emit_insn (comparison (operands[0],
2938                                lowpart_subreg (<MODE>mode,
2939                                                tmp0, <V_INT_EQUIV>mode),
2940                                lowpart_subreg (<MODE>mode,
2941                                                tmp1, <V_INT_EQUIV>mode)));
2942         emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2943       }
2944       break;
2945
2946     case LT:
2947     case LE:
2948     case GT:
2949     case GE:
2950     case EQ:
2951     case NE:
2952       /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
2953          As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
2954          a GE b -> a GE b
2955          a GT b -> a GT b
2956          a LE b -> b GE a
2957          a LT b -> b GT a
2958          a EQ b -> a EQ b
2959          a NE b -> ~(a EQ b)  */
2960       gcc_assert (comparison != NULL);
2961       emit_insn (comparison (operands[0], operands[2], operands[3]));
2962       if (code == NE)
2963         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2964       break;
2965
2966     case LTGT:
2967       /* LTGT is not guranteed to not generate a FP exception.  So let's
2968          go the faster way : ((a > b) || (b > a)).  */
2969       emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2970                                          operands[2], operands[3]));
2971       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2972       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2973       break;
2974
2975     case ORDERED:
2976     case UNORDERED:
2977     case UNEQ:
2978       /* cmeq (a, a) & cmeq (b, b).  */
2979       emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2980                                          operands[2], operands[2]));
2981       emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2982       emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2983
2984       if (code == UNORDERED)
2985         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2986       else if (code == UNEQ)
2987         {
2988           emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
2989           emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
2990         }
2991       break;
2992
2993     default:
2994       gcc_unreachable ();
2995     }
2996
2997   DONE;
2998 })
2999
3000 (define_expand "vec_cmpu<mode><mode>"
3001   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3002           (match_operator 1 "comparison_operator"
3003             [(match_operand:VSDQ_I_DI 2 "register_operand")
3004              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
3005   "TARGET_SIMD"
3006 {
3007   emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
3008                                       operands[2], operands[3]));
3009   DONE;
3010 })
3011
3012 (define_expand "vcond<mode><mode>"
3013   [(set (match_operand:VALLDI 0 "register_operand")
3014         (if_then_else:VALLDI
3015           (match_operator 3 "comparison_operator"
3016             [(match_operand:VALLDI 4 "register_operand")
3017              (match_operand:VALLDI 5 "nonmemory_operand")])
3018           (match_operand:VALLDI 1 "nonmemory_operand")
3019           (match_operand:VALLDI 2 "nonmemory_operand")))]
3020   "TARGET_SIMD"
3021 {
3022   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3023   enum rtx_code code = GET_CODE (operands[3]);
3024
3025   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3026      it as well as switch operands 1/2 in order to avoid the additional
3027      NOT instruction.  */
3028   if (code == NE)
3029     {
3030       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3031                                     operands[4], operands[5]);
3032       std::swap (operands[1], operands[2]);
3033     }
3034   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3035                                              operands[4], operands[5]));
3036   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3037                                                  operands[2], mask));
3038
3039   DONE;
3040 })
3041
3042 (define_expand "vcond<v_cmp_mixed><mode>"
3043   [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
3044         (if_then_else:<V_cmp_mixed>
3045           (match_operator 3 "comparison_operator"
3046             [(match_operand:VDQF_COND 4 "register_operand")
3047              (match_operand:VDQF_COND 5 "nonmemory_operand")])
3048           (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
3049           (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
3050   "TARGET_SIMD"
3051 {
3052   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3053   enum rtx_code code = GET_CODE (operands[3]);
3054
3055   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3056      it as well as switch operands 1/2 in order to avoid the additional
3057      NOT instruction.  */
3058   if (code == NE)
3059     {
3060       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3061                                     operands[4], operands[5]);
3062       std::swap (operands[1], operands[2]);
3063     }
3064   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3065                                              operands[4], operands[5]));
3066   emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
3067                                                 operands[0], operands[1],
3068                                                 operands[2], mask));
3069
3070   DONE;
3071 })
3072
3073 (define_expand "vcondu<mode><mode>"
3074   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3075         (if_then_else:VSDQ_I_DI
3076           (match_operator 3 "comparison_operator"
3077             [(match_operand:VSDQ_I_DI 4 "register_operand")
3078              (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
3079           (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
3080           (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
3081   "TARGET_SIMD"
3082 {
3083   rtx mask = gen_reg_rtx (<MODE>mode);
3084   enum rtx_code code = GET_CODE (operands[3]);
3085
3086   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3087      it as well as switch operands 1/2 in order to avoid the additional
3088      NOT instruction.  */
3089   if (code == NE)
3090     {
3091       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3092                                     operands[4], operands[5]);
3093       std::swap (operands[1], operands[2]);
3094     }
3095   emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
3096                                       operands[4], operands[5]));
3097   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3098                                                  operands[2], mask));
3099   DONE;
3100 })
3101
3102 (define_expand "vcondu<mode><v_cmp_mixed>"
3103   [(set (match_operand:VDQF 0 "register_operand")
3104         (if_then_else:VDQF
3105           (match_operator 3 "comparison_operator"
3106             [(match_operand:<V_cmp_mixed> 4 "register_operand")
3107              (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
3108           (match_operand:VDQF 1 "nonmemory_operand")
3109           (match_operand:VDQF 2 "nonmemory_operand")))]
3110   "TARGET_SIMD"
3111 {
3112   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3113   enum rtx_code code = GET_CODE (operands[3]);
3114
3115   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3116      it as well as switch operands 1/2 in order to avoid the additional
3117      NOT instruction.  */
3118   if (code == NE)
3119     {
3120       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3121                                     operands[4], operands[5]);
3122       std::swap (operands[1], operands[2]);
3123     }
3124   emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
3125                                                   mask, operands[3],
3126                                                   operands[4], operands[5]));
3127   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3128                                                  operands[2], mask));
3129   DONE;
3130 })
3131
3132 ;; Patterns for AArch64 SIMD Intrinsics.
3133
3134 ;; Lane extraction with sign extension to general purpose register.
3135 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3136   [(set (match_operand:GPI 0 "register_operand" "=r")
3137         (sign_extend:GPI
3138           (vec_select:<VDQQH:VEL>
3139             (match_operand:VDQQH 1 "register_operand" "w")
3140             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3141   "TARGET_SIMD"
3142   {
3143     operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3144                                            INTVAL (operands[2]));
3145     return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3146   }
3147   [(set_attr "type" "neon_to_gp<VDQQH:q>")]
3148 )
3149
3150 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
3151   [(set (match_operand:GPI 0 "register_operand" "=r")
3152         (zero_extend:GPI
3153           (vec_select:<VDQQH:VEL>
3154             (match_operand:VDQQH 1 "register_operand" "w")
3155             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3156   "TARGET_SIMD"
3157   {
3158     operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3159                                            INTVAL (operands[2]));
3160     return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
3161   }
3162   [(set_attr "type" "neon_to_gp<VDQQH:q>")]
3163 )
3164
3165 ;; Lane extraction of a value, neither sign nor zero extension
3166 ;; is guaranteed so upper bits should be considered undefined.
3167 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3168 (define_insn "aarch64_get_lane<mode>"
3169   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3170         (vec_select:<VEL>
3171           (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3172           (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3173   "TARGET_SIMD"
3174   {
3175     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3176     switch (which_alternative)
3177       {
3178         case 0:
3179           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3180         case 1:
3181           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3182         case 2:
3183           return "st1\\t{%1.<Vetype>}[%2], %0";
3184         default:
3185           gcc_unreachable ();
3186       }
3187   }
3188   [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3189 )
3190
3191 (define_insn "load_pair_lanes<mode>"
3192   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3193         (vec_concat:<VDBL>
3194            (match_operand:VDC 1 "memory_operand" "Utq")
3195            (match_operand:VDC 2 "memory_operand" "m")))]
3196   "TARGET_SIMD && !STRICT_ALIGNMENT
3197    && rtx_equal_p (XEXP (operands[2], 0),
3198                    plus_constant (Pmode,
3199                                   XEXP (operands[1], 0),
3200                                   GET_MODE_SIZE (<MODE>mode)))"
3201   "ldr\\t%q0, %1"
3202   [(set_attr "type" "neon_load1_1reg_q")]
3203 )
3204
3205 (define_insn "store_pair_lanes<mode>"
3206   [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
3207         (vec_concat:<VDBL>
3208            (match_operand:VDC 1 "register_operand" "w, r")
3209            (match_operand:VDC 2 "register_operand" "w, r")))]
3210   "TARGET_SIMD"
3211   "@
3212    stp\\t%d1, %d2, %y0
3213    stp\\t%x1, %x2, %y0"
3214   [(set_attr "type" "neon_stp, store_16")]
3215 )
3216
3217 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3218 ;; dest vector.
3219
3220 (define_insn "@aarch64_combinez<mode>"
3221   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3222         (vec_concat:<VDBL>
3223           (match_operand:VDC 1 "general_operand" "w,?r,m")
3224           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3225   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3226   "@
3227    mov\\t%0.8b, %1.8b
3228    fmov\t%d0, %1
3229    ldr\\t%d0, %1"
3230   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3231    (set_attr "arch" "simd,fp,simd")]
3232 )
3233
3234 (define_insn "@aarch64_combinez_be<mode>"
3235   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3236         (vec_concat:<VDBL>
3237           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3238           (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3239   "TARGET_SIMD && BYTES_BIG_ENDIAN"
3240   "@
3241    mov\\t%0.8b, %1.8b
3242    fmov\t%d0, %1
3243    ldr\\t%d0, %1"
3244   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3245    (set_attr "arch" "simd,fp,simd")]
3246 )
3247
3248 (define_expand "aarch64_combine<mode>"
3249   [(match_operand:<VDBL> 0 "register_operand")
3250    (match_operand:VDC 1 "register_operand")
3251    (match_operand:VDC 2 "register_operand")]
3252   "TARGET_SIMD"
3253 {
3254   aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3255
3256   DONE;
3257 }
3258 )
3259
3260 (define_expand "@aarch64_simd_combine<mode>"
3261   [(match_operand:<VDBL> 0 "register_operand")
3262    (match_operand:VDC 1 "register_operand")
3263    (match_operand:VDC 2 "register_operand")]
3264   "TARGET_SIMD"
3265   {
3266     emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3267     emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3268     DONE;
3269   }
3270 [(set_attr "type" "multiple")]
3271 )
3272
3273 ;; <su><addsub>l<q>.
3274
3275 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3276  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3277        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3278                            (match_operand:VQW 1 "register_operand" "w")
3279                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3280                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3281                            (match_operand:VQW 2 "register_operand" "w")
3282                            (match_dup 3)))))]
3283   "TARGET_SIMD"
3284   "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3285   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3286 )
3287
3288 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3289  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3290        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3291                            (match_operand:VQW 1 "register_operand" "w")
3292                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3293                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3294                            (match_operand:VQW 2 "register_operand" "w")
3295                            (match_dup 3)))))]
3296   "TARGET_SIMD"
3297   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3298   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3299 )
3300
3301
3302 (define_expand "aarch64_saddl2<mode>"
3303   [(match_operand:<VWIDE> 0 "register_operand")
3304    (match_operand:VQW 1 "register_operand")
3305    (match_operand:VQW 2 "register_operand")]
3306   "TARGET_SIMD"
3307 {
3308   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3309   emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3310                                                   operands[2], p));
3311   DONE;
3312 })
3313
3314 (define_expand "aarch64_uaddl2<mode>"
3315   [(match_operand:<VWIDE> 0 "register_operand")
3316    (match_operand:VQW 1 "register_operand")
3317    (match_operand:VQW 2 "register_operand")]
3318   "TARGET_SIMD"
3319 {
3320   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3321   emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3322                                                   operands[2], p));
3323   DONE;
3324 })
3325
3326 (define_expand "aarch64_ssubl2<mode>"
3327   [(match_operand:<VWIDE> 0 "register_operand")
3328    (match_operand:VQW 1 "register_operand")
3329    (match_operand:VQW 2 "register_operand")]
3330   "TARGET_SIMD"
3331 {
3332   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3333   emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3334                                                 operands[2], p));
3335   DONE;
3336 })
3337
3338 (define_expand "aarch64_usubl2<mode>"
3339   [(match_operand:<VWIDE> 0 "register_operand")
3340    (match_operand:VQW 1 "register_operand")
3341    (match_operand:VQW 2 "register_operand")]
3342   "TARGET_SIMD"
3343 {
3344   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3345   emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3346                                                 operands[2], p));
3347   DONE;
3348 })
3349
3350 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3351  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3352        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3353                            (match_operand:VD_BHSI 1 "register_operand" "w"))
3354                        (ANY_EXTEND:<VWIDE>
3355                            (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3356   "TARGET_SIMD"
3357   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3358   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3359 )
3360
3361 ;; <su><addsub>w<q>.
3362
3363 (define_expand "widen_ssum<mode>3"
3364   [(set (match_operand:<VDBLW> 0 "register_operand")
3365         (plus:<VDBLW> (sign_extend:<VDBLW>
3366                         (match_operand:VQW 1 "register_operand"))
3367                       (match_operand:<VDBLW> 2 "register_operand")))]
3368   "TARGET_SIMD"
3369   {
3370     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3371     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3372
3373     emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3374                                                 operands[1], p));
3375     emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3376     DONE;
3377   }
3378 )
3379
3380 (define_expand "widen_ssum<mode>3"
3381   [(set (match_operand:<VWIDE> 0 "register_operand")
3382         (plus:<VWIDE> (sign_extend:<VWIDE>
3383                         (match_operand:VD_BHSI 1 "register_operand"))
3384                       (match_operand:<VWIDE> 2 "register_operand")))]
3385   "TARGET_SIMD"
3386 {
3387   emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3388   DONE;
3389 })
3390
3391 (define_expand "widen_usum<mode>3"
3392   [(set (match_operand:<VDBLW> 0 "register_operand")
3393         (plus:<VDBLW> (zero_extend:<VDBLW>
3394                         (match_operand:VQW 1 "register_operand"))
3395                       (match_operand:<VDBLW> 2 "register_operand")))]
3396   "TARGET_SIMD"
3397   {
3398     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3399     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3400
3401     emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3402                                                  operands[1], p));
3403     emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3404     DONE;
3405   }
3406 )
3407
3408 (define_expand "widen_usum<mode>3"
3409   [(set (match_operand:<VWIDE> 0 "register_operand")
3410         (plus:<VWIDE> (zero_extend:<VWIDE>
3411                         (match_operand:VD_BHSI 1 "register_operand"))
3412                       (match_operand:<VWIDE> 2 "register_operand")))]
3413   "TARGET_SIMD"
3414 {
3415   emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3416   DONE;
3417 })
3418
3419 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
3420   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3421         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3422           (ANY_EXTEND:<VWIDE>
3423             (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3424   "TARGET_SIMD"
3425   "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3426   [(set_attr "type" "neon_sub_widen")]
3427 )
3428
3429 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
3430   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3431         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3432           (ANY_EXTEND:<VWIDE>
3433             (vec_select:<VHALF>
3434               (match_operand:VQW 2 "register_operand" "w")
3435               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3436   "TARGET_SIMD"
3437   "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3438   [(set_attr "type" "neon_sub_widen")]
3439 )
3440
3441 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
3442   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3443         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3444           (ANY_EXTEND:<VWIDE>
3445             (vec_select:<VHALF>
3446               (match_operand:VQW 2 "register_operand" "w")
3447               (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3448   "TARGET_SIMD"
3449   "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3450   [(set_attr "type" "neon_sub_widen")]
3451 )
3452
3453 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
3454   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3455         (plus:<VWIDE>
3456           (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
3457           (match_operand:<VWIDE> 1 "register_operand" "w")))]
3458   "TARGET_SIMD"
3459   "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3460   [(set_attr "type" "neon_add_widen")]
3461 )
3462
3463 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
3464   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3465         (plus:<VWIDE>
3466           (ANY_EXTEND:<VWIDE>
3467             (vec_select:<VHALF>
3468               (match_operand:VQW 2 "register_operand" "w")
3469               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3470           (match_operand:<VWIDE> 1 "register_operand" "w")))]
3471   "TARGET_SIMD"
3472   "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3473   [(set_attr "type" "neon_add_widen")]
3474 )
3475
3476 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
3477   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3478         (plus:<VWIDE>
3479           (ANY_EXTEND:<VWIDE>
3480             (vec_select:<VHALF>
3481               (match_operand:VQW 2 "register_operand" "w")
3482               (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3483           (match_operand:<VWIDE> 1 "register_operand" "w")))]
3484   "TARGET_SIMD"
3485   "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3486   [(set_attr "type" "neon_add_widen")]
3487 )
3488
3489 (define_expand "aarch64_saddw2<mode>"
3490   [(match_operand:<VWIDE> 0 "register_operand")
3491    (match_operand:<VWIDE> 1 "register_operand")
3492    (match_operand:VQW 2 "register_operand")]
3493   "TARGET_SIMD"
3494 {
3495   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3496   emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3497                                                 operands[2], p));
3498   DONE;
3499 })
3500
3501 (define_expand "aarch64_uaddw2<mode>"
3502   [(match_operand:<VWIDE> 0 "register_operand")
3503    (match_operand:<VWIDE> 1 "register_operand")
3504    (match_operand:VQW 2 "register_operand")]
3505   "TARGET_SIMD"
3506 {
3507   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3508   emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3509                                                 operands[2], p));
3510   DONE;
3511 })
3512
3513
3514 (define_expand "aarch64_ssubw2<mode>"
3515   [(match_operand:<VWIDE> 0 "register_operand")
3516    (match_operand:<VWIDE> 1 "register_operand")
3517    (match_operand:VQW 2 "register_operand")]
3518   "TARGET_SIMD"
3519 {
3520   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3521   emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3522                                                 operands[2], p));
3523   DONE;
3524 })
3525
3526 (define_expand "aarch64_usubw2<mode>"
3527   [(match_operand:<VWIDE> 0 "register_operand")
3528    (match_operand:<VWIDE> 1 "register_operand")
3529    (match_operand:VQW 2 "register_operand")]
3530   "TARGET_SIMD"
3531 {
3532   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3533   emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3534                                                 operands[2], p));
3535   DONE;
3536 })
3537
3538 ;; <su><r>h<addsub>.
3539
3540 (define_expand "<u>avg<mode>3_floor"
3541   [(set (match_operand:VDQ_BHSI 0 "register_operand")
3542         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3543                           (match_operand:VDQ_BHSI 2 "register_operand")]
3544                          HADD))]
3545   "TARGET_SIMD"
3546 )
3547
3548 (define_expand "<u>avg<mode>3_ceil"
3549   [(set (match_operand:VDQ_BHSI 0 "register_operand")
3550         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3551                           (match_operand:VDQ_BHSI 2 "register_operand")]
3552                          RHADD))]
3553   "TARGET_SIMD"
3554 )
3555
3556 (define_insn "aarch64_<sur>h<addsub><mode>"
3557   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3558         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3559                       (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3560                      HADDSUB))]
3561   "TARGET_SIMD"
3562   "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3563   [(set_attr "type" "neon_<addsub>_halve<q>")]
3564 )
3565
3566 ;; <r><addsub>hn<q>.
3567
3568 (define_insn "aarch64_<sur><addsub>hn<mode>"
3569   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3570         (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3571                             (match_operand:VQN 2 "register_operand" "w")]
3572                            ADDSUBHN))]
3573   "TARGET_SIMD"
3574   "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3575   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3576 )
3577
3578 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3579   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3580         (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3581                              (match_operand:VQN 2 "register_operand" "w")
3582                              (match_operand:VQN 3 "register_operand" "w")]
3583                             ADDSUBHN2))]
3584   "TARGET_SIMD"
3585   "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3586   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3587 )
3588
3589 ;; pmul.
3590
3591 (define_insn "aarch64_pmul<mode>"
3592   [(set (match_operand:VB 0 "register_operand" "=w")
3593         (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3594                     (match_operand:VB 2 "register_operand" "w")]
3595                    UNSPEC_PMUL))]
3596  "TARGET_SIMD"
3597  "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3598   [(set_attr "type" "neon_mul_<Vetype><q>")]
3599 )
3600
3601 ;; fmulx.
3602
3603 (define_insn "aarch64_fmulx<mode>"
3604   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3605         (unspec:VHSDF_HSDF
3606           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3607            (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3608            UNSPEC_FMULX))]
3609  "TARGET_SIMD"
3610  "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3611  [(set_attr "type" "neon_fp_mul_<stype>")]
3612 )
3613
3614 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3615
3616 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3617   [(set (match_operand:VDQSF 0 "register_operand" "=w")
3618         (unspec:VDQSF
3619          [(match_operand:VDQSF 1 "register_operand" "w")
3620           (vec_duplicate:VDQSF
3621            (vec_select:<VEL>
3622             (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3623             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3624          UNSPEC_FMULX))]
3625   "TARGET_SIMD"
3626   {
3627     operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3628     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3629   }
3630   [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3631 )
3632
3633 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3634
3635 (define_insn "*aarch64_mulx_elt<mode>"
3636   [(set (match_operand:VDQF 0 "register_operand" "=w")
3637         (unspec:VDQF
3638          [(match_operand:VDQF 1 "register_operand" "w")
3639           (vec_duplicate:VDQF
3640            (vec_select:<VEL>
3641             (match_operand:VDQF 2 "register_operand" "w")
3642             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3643          UNSPEC_FMULX))]
3644   "TARGET_SIMD"
3645   {
3646     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3647     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3648   }
3649   [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3650 )
3651
3652 ;; vmulxq_lane
3653
3654 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3655   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3656         (unspec:VHSDF
3657          [(match_operand:VHSDF 1 "register_operand" "w")
3658           (vec_duplicate:VHSDF
3659             (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3660          UNSPEC_FMULX))]
3661   "TARGET_SIMD"
3662   "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3663   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3664 )
3665
3666 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3667 ;; vmulxd_lane_f64 ==  vmulx_lane_f64
3668 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3669
3670 (define_insn "*aarch64_vgetfmulx<mode>"
3671   [(set (match_operand:<VEL> 0 "register_operand" "=w")
3672         (unspec:<VEL>
3673          [(match_operand:<VEL> 1 "register_operand" "w")
3674           (vec_select:<VEL>
3675            (match_operand:VDQF 2 "register_operand" "w")
3676             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3677          UNSPEC_FMULX))]
3678   "TARGET_SIMD"
3679   {
3680     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3681     return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3682   }
3683   [(set_attr "type" "fmul<Vetype>")]
3684 )
3685 ;; <su>q<addsub>
3686
3687 (define_insn "aarch64_<su_optab><optab><mode>"
3688   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3689         (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3690                           (match_operand:VSDQ_I 2 "register_operand" "w")))]
3691   "TARGET_SIMD"
3692   "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3693   [(set_attr "type" "neon_<optab><q>")]
3694 )
3695
3696 ;; suqadd and usqadd
3697
3698 (define_insn "aarch64_<sur>qadd<mode>"
3699   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3700         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3701                         (match_operand:VSDQ_I 2 "register_operand" "w")]
3702                        USSUQADD))]
3703   "TARGET_SIMD"
3704   "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3705   [(set_attr "type" "neon_qadd<q>")]
3706 )
3707
3708 ;; sqmovun
3709
3710 (define_insn "aarch64_sqmovun<mode>"
3711   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3712         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3713                             UNSPEC_SQXTUN))]
3714    "TARGET_SIMD"
3715    "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3716    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3717 )
3718
3719 ;; sqmovn and uqmovn
3720
3721 (define_insn "aarch64_<sur>qmovn<mode>"
3722   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3723         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3724                             SUQMOVN))]
3725   "TARGET_SIMD"
3726   "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3727    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3728 )
3729
3730 ;; <su>q<absneg>
3731
3732 (define_insn "aarch64_s<optab><mode>"
3733   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3734         (UNQOPS:VSDQ_I
3735           (match_operand:VSDQ_I 1 "register_operand" "w")))]
3736   "TARGET_SIMD"
3737   "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3738   [(set_attr "type" "neon_<optab><q>")]
3739 )
3740
3741 ;; sq<r>dmulh.
3742
3743 (define_insn "aarch64_sq<r>dmulh<mode>"
3744   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3745         (unspec:VSDQ_HSI
3746           [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3747            (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3748          VQDMULH))]
3749   "TARGET_SIMD"
3750   "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3751   [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3752 )
3753
3754 ;; sq<r>dmulh_lane
3755
3756 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3757   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3758         (unspec:VDQHS
3759           [(match_operand:VDQHS 1 "register_operand" "w")
3760            (vec_select:<VEL>
3761              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3762              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3763          VQDMULH))]
3764   "TARGET_SIMD"
3765   "*
3766    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3767    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3768   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3769 )
3770
3771 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3772   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3773         (unspec:VDQHS
3774           [(match_operand:VDQHS 1 "register_operand" "w")
3775            (vec_select:<VEL>
3776              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3777              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3778          VQDMULH))]
3779   "TARGET_SIMD"
3780   "*
3781    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3782    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3783   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3784 )
3785
3786 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3787   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3788         (unspec:SD_HSI
3789           [(match_operand:SD_HSI 1 "register_operand" "w")
3790            (vec_select:<VEL>
3791              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3792              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3793          VQDMULH))]
3794   "TARGET_SIMD"
3795   "*
3796    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3797    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3798   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3799 )
3800
3801 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3802   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3803         (unspec:SD_HSI
3804           [(match_operand:SD_HSI 1 "register_operand" "w")
3805            (vec_select:<VEL>
3806              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3807              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3808          VQDMULH))]
3809   "TARGET_SIMD"
3810   "*
3811    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3812    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3813   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3814 )
3815
3816 ;; sqrdml[as]h.
3817
3818 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3819   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3820         (unspec:VSDQ_HSI
3821           [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3822            (match_operand:VSDQ_HSI 2 "register_operand" "w")
3823            (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3824           SQRDMLH_AS))]
3825    "TARGET_SIMD_RDMA"
3826    "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3827    [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3828 )
3829
3830 ;; sqrdml[as]h_lane.
3831
3832 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3833   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3834         (unspec:VDQHS
3835           [(match_operand:VDQHS 1 "register_operand" "0")
3836            (match_operand:VDQHS 2 "register_operand" "w")
3837            (vec_select:<VEL>
3838              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3839              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3840           SQRDMLH_AS))]
3841    "TARGET_SIMD_RDMA"
3842    {
3843      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3844      return
3845       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3846    }
3847    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3848 )
3849
3850 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3851   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3852         (unspec:SD_HSI
3853           [(match_operand:SD_HSI 1 "register_operand" "0")
3854            (match_operand:SD_HSI 2 "register_operand" "w")
3855            (vec_select:<VEL>
3856              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3857              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3858           SQRDMLH_AS))]
3859    "TARGET_SIMD_RDMA"
3860    {
3861      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3862      return
3863       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3864    }
3865    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3866 )
3867
3868 ;; sqrdml[as]h_laneq.
3869
3870 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3871   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3872         (unspec:VDQHS
3873           [(match_operand:VDQHS 1 "register_operand" "0")
3874            (match_operand:VDQHS 2 "register_operand" "w")
3875            (vec_select:<VEL>
3876              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3877              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3878           SQRDMLH_AS))]
3879    "TARGET_SIMD_RDMA"
3880    {
3881      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3882      return
3883       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3884    }
3885    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3886 )
3887
3888 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3889   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3890         (unspec:SD_HSI
3891           [(match_operand:SD_HSI 1 "register_operand" "0")
3892            (match_operand:SD_HSI 2 "register_operand" "w")
3893            (vec_select:<VEL>
3894              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3895              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3896           SQRDMLH_AS))]
3897    "TARGET_SIMD_RDMA"
3898    {
3899      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3900      return
3901       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3902    }
3903    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3904 )
3905
3906 ;; vqdml[sa]l
3907
3908 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3909   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3910         (SBINQOPS:<VWIDE>
3911           (match_operand:<VWIDE> 1 "register_operand" "0")
3912           (ss_ashift:<VWIDE>
3913               (mult:<VWIDE>
3914                 (sign_extend:<VWIDE>
3915                       (match_operand:VSD_HSI 2 "register_operand" "w"))
3916                 (sign_extend:<VWIDE>
3917                       (match_operand:VSD_HSI 3 "register_operand" "w")))
3918               (const_int 1))))]
3919   "TARGET_SIMD"
3920   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3921   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3922 )
3923
3924 ;; vqdml[sa]l_lane
3925
3926 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3927   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3928         (SBINQOPS:<VWIDE>
3929           (match_operand:<VWIDE> 1 "register_operand" "0")
3930           (ss_ashift:<VWIDE>
3931             (mult:<VWIDE>
3932               (sign_extend:<VWIDE>
3933                 (match_operand:VD_HSI 2 "register_operand" "w"))
3934               (sign_extend:<VWIDE>
3935                 (vec_duplicate:VD_HSI
3936                   (vec_select:<VEL>
3937                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3938                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3939               ))
3940             (const_int 1))))]
3941   "TARGET_SIMD"
3942   {
3943     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3944     return
3945       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3946   }
3947   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3948 )
3949
3950 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3951   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3952         (SBINQOPS:<VWIDE>
3953           (match_operand:<VWIDE> 1 "register_operand" "0")
3954           (ss_ashift:<VWIDE>
3955             (mult:<VWIDE>
3956               (sign_extend:<VWIDE>
3957                 (match_operand:VD_HSI 2 "register_operand" "w"))
3958               (sign_extend:<VWIDE>
3959                 (vec_duplicate:VD_HSI
3960                   (vec_select:<VEL>
3961                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3962                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3963               ))
3964             (const_int 1))))]
3965   "TARGET_SIMD"
3966   {
3967     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3968     return
3969       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3970   }
3971   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3972 )
3973
3974 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3975   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3976         (SBINQOPS:<VWIDE>
3977           (match_operand:<VWIDE> 1 "register_operand" "0")
3978           (ss_ashift:<VWIDE>
3979             (mult:<VWIDE>
3980               (sign_extend:<VWIDE>
3981                 (match_operand:SD_HSI 2 "register_operand" "w"))
3982               (sign_extend:<VWIDE>
3983                 (vec_select:<VEL>
3984                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3985                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3986               )
3987             (const_int 1))))]
3988   "TARGET_SIMD"
3989   {
3990     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3991     return
3992       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3993   }
3994   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3995 )
3996
3997 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3998   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3999         (SBINQOPS:<VWIDE>
4000           (match_operand:<VWIDE> 1 "register_operand" "0")
4001           (ss_ashift:<VWIDE>
4002             (mult:<VWIDE>
4003               (sign_extend:<VWIDE>
4004                 (match_operand:SD_HSI 2 "register_operand" "w"))
4005               (sign_extend:<VWIDE>
4006                 (vec_select:<VEL>
4007                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4008                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4009               )
4010             (const_int 1))))]
4011   "TARGET_SIMD"
4012   {
4013     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4014     return
4015       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4016   }
4017   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4018 )
4019
4020 ;; vqdml[sa]l_n
4021
4022 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
4023   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4024         (SBINQOPS:<VWIDE>
4025           (match_operand:<VWIDE> 1 "register_operand" "0")
4026           (ss_ashift:<VWIDE>
4027               (mult:<VWIDE>
4028                 (sign_extend:<VWIDE>
4029                       (match_operand:VD_HSI 2 "register_operand" "w"))
4030                 (sign_extend:<VWIDE>
4031                   (vec_duplicate:VD_HSI
4032                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4033               (const_int 1))))]
4034   "TARGET_SIMD"
4035   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4036   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4037 )
4038
4039 ;; sqdml[as]l2
4040
4041 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
4042   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4043         (SBINQOPS:<VWIDE>
4044          (match_operand:<VWIDE> 1 "register_operand" "0")
4045          (ss_ashift:<VWIDE>
4046              (mult:<VWIDE>
4047                (sign_extend:<VWIDE>
4048                  (vec_select:<VHALF>
4049                      (match_operand:VQ_HSI 2 "register_operand" "w")
4050                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4051                (sign_extend:<VWIDE>
4052                  (vec_select:<VHALF>
4053                      (match_operand:VQ_HSI 3 "register_operand" "w")
4054                      (match_dup 4))))
4055              (const_int 1))))]
4056   "TARGET_SIMD"
4057   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4058   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4059 )
4060
4061 (define_expand "aarch64_sqdmlal2<mode>"
4062   [(match_operand:<VWIDE> 0 "register_operand")
4063    (match_operand:<VWIDE> 1 "register_operand")
4064    (match_operand:VQ_HSI 2 "register_operand")
4065    (match_operand:VQ_HSI 3 "register_operand")]
4066   "TARGET_SIMD"
4067 {
4068   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4069   emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
4070                                                   operands[2], operands[3], p));
4071   DONE;
4072 })
4073
4074 (define_expand "aarch64_sqdmlsl2<mode>"
4075   [(match_operand:<VWIDE> 0 "register_operand")
4076    (match_operand:<VWIDE> 1 "register_operand")
4077    (match_operand:VQ_HSI 2 "register_operand")
4078    (match_operand:VQ_HSI 3 "register_operand")]
4079   "TARGET_SIMD"
4080 {
4081   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4082   emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
4083                                                   operands[2], operands[3], p));
4084   DONE;
4085 })
4086
4087 ;; vqdml[sa]l2_lane
4088
4089 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
4090   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4091         (SBINQOPS:<VWIDE>
4092           (match_operand:<VWIDE> 1 "register_operand" "0")
4093           (ss_ashift:<VWIDE>
4094               (mult:<VWIDE>
4095                 (sign_extend:<VWIDE>
4096                   (vec_select:<VHALF>
4097                     (match_operand:VQ_HSI 2 "register_operand" "w")
4098                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4099                 (sign_extend:<VWIDE>
4100                   (vec_duplicate:<VHALF>
4101                     (vec_select:<VEL>
4102                       (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4103                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4104                     ))))
4105               (const_int 1))))]
4106   "TARGET_SIMD"
4107   {
4108     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4109     return
4110      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4111   }
4112   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4113 )
4114
4115 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
4116   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4117         (SBINQOPS:<VWIDE>
4118           (match_operand:<VWIDE> 1 "register_operand" "0")
4119           (ss_ashift:<VWIDE>
4120               (mult:<VWIDE>
4121                 (sign_extend:<VWIDE>
4122                   (vec_select:<VHALF>
4123                     (match_operand:VQ_HSI 2 "register_operand" "w")
4124                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4125                 (sign_extend:<VWIDE>
4126                   (vec_duplicate:<VHALF>
4127                     (vec_select:<VEL>
4128                       (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4129                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4130                     ))))
4131               (const_int 1))))]
4132   "TARGET_SIMD"
4133   {
4134     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4135     return
4136      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4137   }
4138   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4139 )
4140
4141 (define_expand "aarch64_sqdmlal2_lane<mode>"
4142   [(match_operand:<VWIDE> 0 "register_operand")
4143    (match_operand:<VWIDE> 1 "register_operand")
4144    (match_operand:VQ_HSI 2 "register_operand")
4145    (match_operand:<VCOND> 3 "register_operand")
4146    (match_operand:SI 4 "immediate_operand")]
4147   "TARGET_SIMD"
4148 {
4149   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4150   emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
4151                                                        operands[2], operands[3],
4152                                                        operands[4], p));
4153   DONE;
4154 })
4155
4156 (define_expand "aarch64_sqdmlal2_laneq<mode>"
4157   [(match_operand:<VWIDE> 0 "register_operand")
4158    (match_operand:<VWIDE> 1 "register_operand")
4159    (match_operand:VQ_HSI 2 "register_operand")
4160    (match_operand:<VCONQ> 3 "register_operand")
4161    (match_operand:SI 4 "immediate_operand")]
4162   "TARGET_SIMD"
4163 {
4164   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4165   emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
4166                                                        operands[2], operands[3],
4167                                                        operands[4], p));
4168   DONE;
4169 })
4170
4171 (define_expand "aarch64_sqdmlsl2_lane<mode>"
4172   [(match_operand:<VWIDE> 0 "register_operand")
4173    (match_operand:<VWIDE> 1 "register_operand")
4174    (match_operand:VQ_HSI 2 "register_operand")
4175    (match_operand:<VCOND> 3 "register_operand")
4176    (match_operand:SI 4 "immediate_operand")]
4177   "TARGET_SIMD"
4178 {
4179   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4180   emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
4181                                                        operands[2], operands[3],
4182                                                        operands[4], p));
4183   DONE;
4184 })
4185
4186 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
4187   [(match_operand:<VWIDE> 0 "register_operand")
4188    (match_operand:<VWIDE> 1 "register_operand")
4189    (match_operand:VQ_HSI 2 "register_operand")
4190    (match_operand:<VCONQ> 3 "register_operand")
4191    (match_operand:SI 4 "immediate_operand")]
4192   "TARGET_SIMD"
4193 {
4194   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4195   emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4196                                                        operands[2], operands[3],
4197                                                        operands[4], p));
4198   DONE;
4199 })
4200
4201 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4202   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4203         (SBINQOPS:<VWIDE>
4204           (match_operand:<VWIDE> 1 "register_operand" "0")
4205           (ss_ashift:<VWIDE>
4206             (mult:<VWIDE>
4207               (sign_extend:<VWIDE>
4208                 (vec_select:<VHALF>
4209                   (match_operand:VQ_HSI 2 "register_operand" "w")
4210                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4211               (sign_extend:<VWIDE>
4212                 (vec_duplicate:<VHALF>
4213                   (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4214             (const_int 1))))]
4215   "TARGET_SIMD"
4216   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4217   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4218 )
4219
4220 (define_expand "aarch64_sqdmlal2_n<mode>"
4221   [(match_operand:<VWIDE> 0 "register_operand")
4222    (match_operand:<VWIDE> 1 "register_operand")
4223    (match_operand:VQ_HSI 2 "register_operand")
4224    (match_operand:<VEL> 3 "register_operand")]
4225   "TARGET_SIMD"
4226 {
4227   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4228   emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4229                                                     operands[2], operands[3],
4230                                                     p));
4231   DONE;
4232 })
4233
4234 (define_expand "aarch64_sqdmlsl2_n<mode>"
4235   [(match_operand:<VWIDE> 0 "register_operand")
4236    (match_operand:<VWIDE> 1 "register_operand")
4237    (match_operand:VQ_HSI 2 "register_operand")
4238    (match_operand:<VEL> 3 "register_operand")]
4239   "TARGET_SIMD"
4240 {
4241   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4242   emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4243                                                     operands[2], operands[3],
4244                                                     p));
4245   DONE;
4246 })
4247
4248 ;; vqdmull
4249
4250 (define_insn "aarch64_sqdmull<mode>"
4251   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4252         (ss_ashift:<VWIDE>
4253              (mult:<VWIDE>
4254                (sign_extend:<VWIDE>
4255                      (match_operand:VSD_HSI 1 "register_operand" "w"))
4256                (sign_extend:<VWIDE>
4257                      (match_operand:VSD_HSI 2 "register_operand" "w")))
4258              (const_int 1)))]
4259   "TARGET_SIMD"
4260   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4261   [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4262 )
4263
4264 ;; vqdmull_lane
4265
4266 (define_insn "aarch64_sqdmull_lane<mode>"
4267   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4268         (ss_ashift:<VWIDE>
4269              (mult:<VWIDE>
4270                (sign_extend:<VWIDE>
4271                  (match_operand:VD_HSI 1 "register_operand" "w"))
4272                (sign_extend:<VWIDE>
4273                  (vec_duplicate:VD_HSI
4274                    (vec_select:<VEL>
4275                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4276                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4277                ))
4278              (const_int 1)))]
4279   "TARGET_SIMD"
4280   {
4281     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4282     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4283   }
4284   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4285 )
4286
4287 (define_insn "aarch64_sqdmull_laneq<mode>"
4288   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4289         (ss_ashift:<VWIDE>
4290              (mult:<VWIDE>
4291                (sign_extend:<VWIDE>
4292                  (match_operand:VD_HSI 1 "register_operand" "w"))
4293                (sign_extend:<VWIDE>
4294                  (vec_duplicate:VD_HSI
4295                    (vec_select:<VEL>
4296                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4297                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4298                ))
4299              (const_int 1)))]
4300   "TARGET_SIMD"
4301   {
4302     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4303     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4304   }
4305   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4306 )
4307
4308 (define_insn "aarch64_sqdmull_lane<mode>"
4309   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4310         (ss_ashift:<VWIDE>
4311              (mult:<VWIDE>
4312                (sign_extend:<VWIDE>
4313                  (match_operand:SD_HSI 1 "register_operand" "w"))
4314                (sign_extend:<VWIDE>
4315                  (vec_select:<VEL>
4316                    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4317                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4318                ))
4319              (const_int 1)))]
4320   "TARGET_SIMD"
4321   {
4322     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4323     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4324   }
4325   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4326 )
4327
4328 (define_insn "aarch64_sqdmull_laneq<mode>"
4329   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4330         (ss_ashift:<VWIDE>
4331              (mult:<VWIDE>
4332                (sign_extend:<VWIDE>
4333                  (match_operand:SD_HSI 1 "register_operand" "w"))
4334                (sign_extend:<VWIDE>
4335                  (vec_select:<VEL>
4336                    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4337                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4338                ))
4339              (const_int 1)))]
4340   "TARGET_SIMD"
4341   {
4342     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4343     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4344   }
4345   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4346 )
4347
4348 ;; vqdmull_n
4349
4350 (define_insn "aarch64_sqdmull_n<mode>"
4351   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4352         (ss_ashift:<VWIDE>
4353              (mult:<VWIDE>
4354                (sign_extend:<VWIDE>
4355                  (match_operand:VD_HSI 1 "register_operand" "w"))
4356                (sign_extend:<VWIDE>
4357                  (vec_duplicate:VD_HSI
4358                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4359                )
4360              (const_int 1)))]
4361   "TARGET_SIMD"
4362   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4363   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4364 )
4365
4366 ;; vqdmull2
4367
4368
4369
4370 (define_insn "aarch64_sqdmull2<mode>_internal"
4371   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4372         (ss_ashift:<VWIDE>
4373              (mult:<VWIDE>
4374                (sign_extend:<VWIDE>
4375                  (vec_select:<VHALF>
4376                    (match_operand:VQ_HSI 1 "register_operand" "w")
4377                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4378                (sign_extend:<VWIDE>
4379                  (vec_select:<VHALF>
4380                    (match_operand:VQ_HSI 2 "register_operand" "w")
4381                    (match_dup 3)))
4382                )
4383              (const_int 1)))]
4384   "TARGET_SIMD"
4385   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4386   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4387 )
4388
4389 (define_expand "aarch64_sqdmull2<mode>"
4390   [(match_operand:<VWIDE> 0 "register_operand")
4391    (match_operand:VQ_HSI 1 "register_operand")
4392    (match_operand:VQ_HSI 2 "register_operand")]
4393   "TARGET_SIMD"
4394 {
4395   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4396   emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4397                                                   operands[2], p));
4398   DONE;
4399 })
4400
4401 ;; vqdmull2_lane
4402
4403 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4404   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4405         (ss_ashift:<VWIDE>
4406              (mult:<VWIDE>
4407                (sign_extend:<VWIDE>
4408                  (vec_select:<VHALF>
4409                    (match_operand:VQ_HSI 1 "register_operand" "w")
4410                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4411                (sign_extend:<VWIDE>
4412                  (vec_duplicate:<VHALF>
4413                    (vec_select:<VEL>
4414                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4415                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4416                ))
4417              (const_int 1)))]
4418   "TARGET_SIMD"
4419   {
4420     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4421     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4422   }
4423   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4424 )
4425
4426 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4427   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4428         (ss_ashift:<VWIDE>
4429              (mult:<VWIDE>
4430                (sign_extend:<VWIDE>
4431                  (vec_select:<VHALF>
4432                    (match_operand:VQ_HSI 1 "register_operand" "w")
4433                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4434                (sign_extend:<VWIDE>
4435                  (vec_duplicate:<VHALF>
4436                    (vec_select:<VEL>
4437                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4438                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4439                ))
4440              (const_int 1)))]
4441   "TARGET_SIMD"
4442   {
4443     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4444     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4445   }
4446   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4447 )
4448
4449 (define_expand "aarch64_sqdmull2_lane<mode>"
4450   [(match_operand:<VWIDE> 0 "register_operand")
4451    (match_operand:VQ_HSI 1 "register_operand")
4452    (match_operand:<VCOND> 2 "register_operand")
4453    (match_operand:SI 3 "immediate_operand")]
4454   "TARGET_SIMD"
4455 {
4456   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4457   emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4458                                                        operands[2], operands[3],
4459                                                        p));
4460   DONE;
4461 })
4462
4463 (define_expand "aarch64_sqdmull2_laneq<mode>"
4464   [(match_operand:<VWIDE> 0 "register_operand")
4465    (match_operand:VQ_HSI 1 "register_operand")
4466    (match_operand:<VCONQ> 2 "register_operand")
4467    (match_operand:SI 3 "immediate_operand")]
4468   "TARGET_SIMD"
4469 {
4470   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4471   emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4472                                                        operands[2], operands[3],
4473                                                        p));
4474   DONE;
4475 })
4476
4477 ;; vqdmull2_n
4478
4479 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4480   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4481         (ss_ashift:<VWIDE>
4482              (mult:<VWIDE>
4483                (sign_extend:<VWIDE>
4484                  (vec_select:<VHALF>
4485                    (match_operand:VQ_HSI 1 "register_operand" "w")
4486                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4487                (sign_extend:<VWIDE>
4488                  (vec_duplicate:<VHALF>
4489                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4490                )
4491              (const_int 1)))]
4492   "TARGET_SIMD"
4493   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4494   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4495 )
4496
4497 (define_expand "aarch64_sqdmull2_n<mode>"
4498   [(match_operand:<VWIDE> 0 "register_operand")
4499    (match_operand:VQ_HSI 1 "register_operand")
4500    (match_operand:<VEL> 2 "register_operand")]
4501   "TARGET_SIMD"
4502 {
4503   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4504   emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4505                                                     operands[2], p));
4506   DONE;
4507 })
4508
4509 ;; vshl
4510
4511 (define_insn "aarch64_<sur>shl<mode>"
4512   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4513         (unspec:VSDQ_I_DI
4514           [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4515            (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4516          VSHL))]
4517   "TARGET_SIMD"
4518   "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4519   [(set_attr "type" "neon_shift_reg<q>")]
4520 )
4521
4522
4523 ;; vqshl
4524
4525 (define_insn "aarch64_<sur>q<r>shl<mode>"
4526   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4527         (unspec:VSDQ_I
4528           [(match_operand:VSDQ_I 1 "register_operand" "w")
4529            (match_operand:VSDQ_I 2 "register_operand" "w")]
4530          VQSHL))]
4531   "TARGET_SIMD"
4532   "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4533   [(set_attr "type" "neon_sat_shift_reg<q>")]
4534 )
4535
4536 ;; vshll_n
4537
4538 (define_insn "aarch64_<sur>shll_n<mode>"
4539   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4540         (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4541                          (match_operand:SI 2
4542                            "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4543                          VSHLL))]
4544   "TARGET_SIMD"
4545   {
4546     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4547       return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4548     else
4549       return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4550   }
4551   [(set_attr "type" "neon_shift_imm_long")]
4552 )
4553
4554 ;; vshll_high_n
4555
4556 (define_insn "aarch64_<sur>shll2_n<mode>"
4557   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4558         (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4559                          (match_operand:SI 2 "immediate_operand" "i")]
4560                          VSHLL))]
4561   "TARGET_SIMD"
4562   {
4563     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4564       return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4565     else
4566       return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4567   }
4568   [(set_attr "type" "neon_shift_imm_long")]
4569 )
4570
4571 ;; vrshr_n
4572
4573 (define_insn "aarch64_<sur>shr_n<mode>"
4574   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4575         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4576                            (match_operand:SI 2
4577                              "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4578                           VRSHR_N))]
4579   "TARGET_SIMD"
4580   "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4581   [(set_attr "type" "neon_sat_shift_imm<q>")]
4582 )
4583
4584 ;; v(r)sra_n
4585
4586 (define_insn "aarch64_<sur>sra_n<mode>"
4587   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4588         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4589                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4590                        (match_operand:SI 3
4591                          "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4592                       VSRA))]
4593   "TARGET_SIMD"
4594   "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4595   [(set_attr "type" "neon_shift_acc<q>")]
4596 )
4597
4598 ;; vs<lr>i_n
4599
4600 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4601   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4602         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4603                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4604                        (match_operand:SI 3
4605                          "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4606                       VSLRI))]
4607   "TARGET_SIMD"
4608   "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4609   [(set_attr "type" "neon_shift_imm<q>")]
4610 )
4611
4612 ;; vqshl(u)
4613
4614 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4615   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4616         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4617                        (match_operand:SI 2
4618                          "aarch64_simd_shift_imm_<ve_mode>" "i")]
4619                       VQSHL_N))]
4620   "TARGET_SIMD"
4621   "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4622   [(set_attr "type" "neon_sat_shift_imm<q>")]
4623 )
4624
4625
4626 ;; vq(r)shr(u)n_n
4627
4628 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4629   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4630         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4631                             (match_operand:SI 2
4632                               "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4633                            VQSHRN_N))]
4634   "TARGET_SIMD"
4635   "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4636   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4637 )
4638
4639
4640 ;; cm(eq|ge|gt|lt|le)
4641 ;; Note, we have constraints for Dz and Z as different expanders
4642 ;; have different ideas of what should be passed to this pattern.
4643
4644 (define_insn "aarch64_cm<optab><mode>"
4645   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4646         (neg:<V_INT_EQUIV>
4647           (COMPARISONS:<V_INT_EQUIV>
4648             (match_operand:VDQ_I 1 "register_operand" "w,w")
4649             (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4650           )))]
4651   "TARGET_SIMD"
4652   "@
4653   cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4654   cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4655   [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4656 )
4657
4658 (define_insn_and_split "aarch64_cm<optab>di"
4659   [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4660         (neg:DI
4661           (COMPARISONS:DI
4662             (match_operand:DI 1 "register_operand" "w,w,r")
4663             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4664           )))
4665      (clobber (reg:CC CC_REGNUM))]
4666   "TARGET_SIMD"
4667   "#"
4668   "&& reload_completed"
4669   [(set (match_operand:DI 0 "register_operand")
4670         (neg:DI
4671           (COMPARISONS:DI
4672             (match_operand:DI 1 "register_operand")
4673             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4674           )))]
4675   {
4676     /* If we are in the general purpose register file,
4677        we split to a sequence of comparison and store.  */
4678     if (GP_REGNUM_P (REGNO (operands[0]))
4679         && GP_REGNUM_P (REGNO (operands[1])))
4680       {
4681         machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4682         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4683         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4684         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4685         DONE;
4686       }
4687     /* Otherwise, we expand to a similar pattern which does not
4688        clobber CC_REGNUM.  */
4689   }
4690   [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4691 )
4692
4693 (define_insn "*aarch64_cm<optab>di"
4694   [(set (match_operand:DI 0 "register_operand" "=w,w")
4695         (neg:DI
4696           (COMPARISONS:DI
4697             (match_operand:DI 1 "register_operand" "w,w")
4698             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4699           )))]
4700   "TARGET_SIMD && reload_completed"
4701   "@
4702   cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4703   cm<optab>\t%d0, %d1, #0"
4704   [(set_attr "type" "neon_compare, neon_compare_zero")]
4705 )
4706
4707 ;; cm(hs|hi)
4708
4709 (define_insn "aarch64_cm<optab><mode>"
4710   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4711         (neg:<V_INT_EQUIV>
4712           (UCOMPARISONS:<V_INT_EQUIV>
4713             (match_operand:VDQ_I 1 "register_operand" "w")
4714             (match_operand:VDQ_I 2 "register_operand" "w")
4715           )))]
4716   "TARGET_SIMD"
4717   "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4718   [(set_attr "type" "neon_compare<q>")]
4719 )
4720
4721 (define_insn_and_split "aarch64_cm<optab>di"
4722   [(set (match_operand:DI 0 "register_operand" "=w,r")
4723         (neg:DI
4724           (UCOMPARISONS:DI
4725             (match_operand:DI 1 "register_operand" "w,r")
4726             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4727           )))
4728     (clobber (reg:CC CC_REGNUM))]
4729   "TARGET_SIMD"
4730   "#"
4731   "&& reload_completed"
4732   [(set (match_operand:DI 0 "register_operand")
4733         (neg:DI
4734           (UCOMPARISONS:DI
4735             (match_operand:DI 1 "register_operand")
4736             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4737           )))]
4738   {
4739     /* If we are in the general purpose register file,
4740        we split to a sequence of comparison and store.  */
4741     if (GP_REGNUM_P (REGNO (operands[0]))
4742         && GP_REGNUM_P (REGNO (operands[1])))
4743       {
4744         machine_mode mode = CCmode;
4745         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4746         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4747         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4748         DONE;
4749       }
4750     /* Otherwise, we expand to a similar pattern which does not
4751        clobber CC_REGNUM.  */
4752   }
4753   [(set_attr "type" "neon_compare,multiple")]
4754 )
4755
4756 (define_insn "*aarch64_cm<optab>di"
4757   [(set (match_operand:DI 0 "register_operand" "=w")
4758         (neg:DI
4759           (UCOMPARISONS:DI
4760             (match_operand:DI 1 "register_operand" "w")
4761             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4762           )))]
4763   "TARGET_SIMD && reload_completed"
4764   "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4765   [(set_attr "type" "neon_compare")]
4766 )
4767
4768 ;; cmtst
4769
4770 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4771 ;; we don't have any insns using ne, and aarch64_vcond outputs
4772 ;; not (neg (eq (and x y) 0))
4773 ;; which is rewritten by simplify_rtx as
4774 ;; plus (eq (and x y) 0) -1.
4775
4776 (define_insn "aarch64_cmtst<mode>"
4777   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4778         (plus:<V_INT_EQUIV>
4779           (eq:<V_INT_EQUIV>
4780             (and:VDQ_I
4781               (match_operand:VDQ_I 1 "register_operand" "w")
4782               (match_operand:VDQ_I 2 "register_operand" "w"))
4783             (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4784           (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4785   ]
4786   "TARGET_SIMD"
4787   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4788   [(set_attr "type" "neon_tst<q>")]
4789 )
4790
4791 (define_insn_and_split "aarch64_cmtstdi"
4792   [(set (match_operand:DI 0 "register_operand" "=w,r")
4793         (neg:DI
4794           (ne:DI
4795             (and:DI
4796               (match_operand:DI 1 "register_operand" "w,r")
4797               (match_operand:DI 2 "register_operand" "w,r"))
4798             (const_int 0))))
4799     (clobber (reg:CC CC_REGNUM))]
4800   "TARGET_SIMD"
4801   "#"
4802   "&& reload_completed"
4803   [(set (match_operand:DI 0 "register_operand")
4804         (neg:DI
4805           (ne:DI
4806             (and:DI
4807               (match_operand:DI 1 "register_operand")
4808               (match_operand:DI 2 "register_operand"))
4809             (const_int 0))))]
4810   {
4811     /* If we are in the general purpose register file,
4812        we split to a sequence of comparison and store.  */
4813     if (GP_REGNUM_P (REGNO (operands[0]))
4814         && GP_REGNUM_P (REGNO (operands[1])))
4815       {
4816         rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4817         machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4818         rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4819         rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4820         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4821         DONE;
4822       }
4823     /* Otherwise, we expand to a similar pattern which does not
4824        clobber CC_REGNUM.  */
4825   }
4826   [(set_attr "type" "neon_tst,multiple")]
4827 )
4828
4829 (define_insn "*aarch64_cmtstdi"
4830   [(set (match_operand:DI 0 "register_operand" "=w")
4831         (neg:DI
4832           (ne:DI
4833             (and:DI
4834               (match_operand:DI 1 "register_operand" "w")
4835               (match_operand:DI 2 "register_operand" "w"))
4836             (const_int 0))))]
4837   "TARGET_SIMD"
4838   "cmtst\t%d0, %d1, %d2"
4839   [(set_attr "type" "neon_tst")]
4840 )
4841
4842 ;; fcm(eq|ge|gt|le|lt)
4843
4844 (define_insn "aarch64_cm<optab><mode>"
4845   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4846         (neg:<V_INT_EQUIV>
4847           (COMPARISONS:<V_INT_EQUIV>
4848             (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4849             (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4850           )))]
4851   "TARGET_SIMD"
4852   "@
4853   fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4854   fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4855   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4856 )
4857
4858 ;; fac(ge|gt)
4859 ;; Note we can also handle what would be fac(le|lt) by
4860 ;; generating fac(ge|gt).
4861
4862 (define_insn "aarch64_fac<optab><mode>"
4863   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4864         (neg:<V_INT_EQUIV>
4865           (FAC_COMPARISONS:<V_INT_EQUIV>
4866             (abs:VHSDF_HSDF
4867               (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4868             (abs:VHSDF_HSDF
4869               (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4870   )))]
4871   "TARGET_SIMD"
4872   "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4873   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4874 )
4875
4876 ;; addp
4877
4878 (define_insn "aarch64_addp<mode>"
4879   [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4880         (unspec:VD_BHSI
4881           [(match_operand:VD_BHSI 1 "register_operand" "w")
4882            (match_operand:VD_BHSI 2 "register_operand" "w")]
4883           UNSPEC_ADDP))]
4884   "TARGET_SIMD"
4885   "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4886   [(set_attr "type" "neon_reduc_add<q>")]
4887 )
4888
4889 (define_insn "aarch64_addpdi"
4890   [(set (match_operand:DI 0 "register_operand" "=w")
4891         (unspec:DI
4892           [(match_operand:V2DI 1 "register_operand" "w")]
4893           UNSPEC_ADDP))]
4894   "TARGET_SIMD"
4895   "addp\t%d0, %1.2d"
4896   [(set_attr "type" "neon_reduc_add")]
4897 )
4898
4899 ;; sqrt
4900
4901 (define_expand "sqrt<mode>2"
4902   [(set (match_operand:VHSDF 0 "register_operand")
4903         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))]
4904   "TARGET_SIMD"
4905 {
4906   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4907     DONE;
4908 })
4909
4910 (define_insn "*sqrt<mode>2"
4911   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4912         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4913   "TARGET_SIMD"
4914   "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4915   [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4916 )
4917
4918 ;; Patterns for vector struct loads and stores.
4919
4920 (define_insn "aarch64_simd_ld2<mode>"
4921   [(set (match_operand:OI 0 "register_operand" "=w")
4922         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4923                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4924                    UNSPEC_LD2))]
4925   "TARGET_SIMD"
4926   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4927   [(set_attr "type" "neon_load2_2reg<q>")]
4928 )
4929
4930 (define_insn "aarch64_simd_ld2r<mode>"
4931   [(set (match_operand:OI 0 "register_operand" "=w")
4932        (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4933                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4934                   UNSPEC_LD2_DUP))]
4935   "TARGET_SIMD"
4936   "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4937   [(set_attr "type" "neon_load2_all_lanes<q>")]
4938 )
4939
4940 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4941   [(set (match_operand:OI 0 "register_operand" "=w")
4942         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4943                     (match_operand:OI 2 "register_operand" "0")
4944                     (match_operand:SI 3 "immediate_operand" "i")
4945                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4946                    UNSPEC_LD2_LANE))]
4947   "TARGET_SIMD"
4948   {
4949     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4950     return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4951   }
4952   [(set_attr "type" "neon_load2_one_lane")]
4953 )
4954
4955 (define_expand "vec_load_lanesoi<mode>"
4956   [(set (match_operand:OI 0 "register_operand")
4957         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand")
4958                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4959                    UNSPEC_LD2))]
4960   "TARGET_SIMD"
4961 {
4962   if (BYTES_BIG_ENDIAN)
4963     {
4964       rtx tmp = gen_reg_rtx (OImode);
4965       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4966       emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4967       emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4968     }
4969   else
4970     emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4971   DONE;
4972 })
4973
4974 (define_insn "aarch64_simd_st2<mode>"
4975   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4976         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4977                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4978                    UNSPEC_ST2))]
4979   "TARGET_SIMD"
4980   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4981   [(set_attr "type" "neon_store2_2reg<q>")]
4982 )
4983
4984 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4985 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4986   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4987         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4988                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4989                     (match_operand:SI 2 "immediate_operand" "i")]
4990                    UNSPEC_ST2_LANE))]
4991   "TARGET_SIMD"
4992   {
4993     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4994     return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4995   }
4996   [(set_attr "type" "neon_store2_one_lane<q>")]
4997 )
4998
4999 (define_expand "vec_store_lanesoi<mode>"
5000   [(set (match_operand:OI 0 "aarch64_simd_struct_operand")
5001         (unspec:OI [(match_operand:OI 1 "register_operand")
5002                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5003                    UNSPEC_ST2))]
5004   "TARGET_SIMD"
5005 {
5006   if (BYTES_BIG_ENDIAN)
5007     {
5008       rtx tmp = gen_reg_rtx (OImode);
5009       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5010       emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
5011       emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
5012     }
5013   else
5014     emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
5015   DONE;
5016 })
5017
5018 (define_insn "aarch64_simd_ld3<mode>"
5019   [(set (match_operand:CI 0 "register_operand" "=w")
5020         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5021                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5022                    UNSPEC_LD3))]
5023   "TARGET_SIMD"
5024   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5025   [(set_attr "type" "neon_load3_3reg<q>")]
5026 )
5027
5028 (define_insn "aarch64_simd_ld3r<mode>"
5029   [(set (match_operand:CI 0 "register_operand" "=w")
5030        (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5031                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5032                   UNSPEC_LD3_DUP))]
5033   "TARGET_SIMD"
5034   "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5035   [(set_attr "type" "neon_load3_all_lanes<q>")]
5036 )
5037
5038 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
5039   [(set (match_operand:CI 0 "register_operand" "=w")
5040         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5041                     (match_operand:CI 2 "register_operand" "0")
5042                     (match_operand:SI 3 "immediate_operand" "i")
5043                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5044                    UNSPEC_LD3_LANE))]
5045   "TARGET_SIMD"
5046 {
5047     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5048     return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
5049 }
5050   [(set_attr "type" "neon_load3_one_lane")]
5051 )
5052
5053 (define_expand "vec_load_lanesci<mode>"
5054   [(set (match_operand:CI 0 "register_operand")
5055         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand")
5056                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5057                    UNSPEC_LD3))]
5058   "TARGET_SIMD"
5059 {
5060   if (BYTES_BIG_ENDIAN)
5061     {
5062       rtx tmp = gen_reg_rtx (CImode);
5063       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5064       emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
5065       emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
5066     }
5067   else
5068     emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
5069   DONE;
5070 })
5071
5072 (define_insn "aarch64_simd_st3<mode>"
5073   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5074         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
5075                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5076                    UNSPEC_ST3))]
5077   "TARGET_SIMD"
5078   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5079   [(set_attr "type" "neon_store3_3reg<q>")]
5080 )
5081
5082 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5083 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
5084   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5085         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5086                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5087                      (match_operand:SI 2 "immediate_operand" "i")]
5088                     UNSPEC_ST3_LANE))]
5089   "TARGET_SIMD"
5090   {
5091     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5092     return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
5093   }
5094   [(set_attr "type" "neon_store3_one_lane<q>")]
5095 )
5096
5097 (define_expand "vec_store_lanesci<mode>"
5098   [(set (match_operand:CI 0 "aarch64_simd_struct_operand")
5099         (unspec:CI [(match_operand:CI 1 "register_operand")
5100                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5101                    UNSPEC_ST3))]
5102   "TARGET_SIMD"
5103 {
5104   if (BYTES_BIG_ENDIAN)
5105     {
5106       rtx tmp = gen_reg_rtx (CImode);
5107       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5108       emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
5109       emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
5110     }
5111   else
5112     emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
5113   DONE;
5114 })
5115
5116 (define_insn "aarch64_simd_ld4<mode>"
5117   [(set (match_operand:XI 0 "register_operand" "=w")
5118         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5119                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5120                    UNSPEC_LD4))]
5121   "TARGET_SIMD"
5122   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5123   [(set_attr "type" "neon_load4_4reg<q>")]
5124 )
5125
5126 (define_insn "aarch64_simd_ld4r<mode>"
5127   [(set (match_operand:XI 0 "register_operand" "=w")
5128        (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5129                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5130                   UNSPEC_LD4_DUP))]
5131   "TARGET_SIMD"
5132   "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5133   [(set_attr "type" "neon_load4_all_lanes<q>")]
5134 )
5135
5136 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
5137   [(set (match_operand:XI 0 "register_operand" "=w")
5138         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5139                     (match_operand:XI 2 "register_operand" "0")
5140                     (match_operand:SI 3 "immediate_operand" "i")
5141                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5142                    UNSPEC_LD4_LANE))]
5143   "TARGET_SIMD"
5144 {
5145     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5146     return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
5147 }
5148   [(set_attr "type" "neon_load4_one_lane")]
5149 )
5150
5151 (define_expand "vec_load_lanesxi<mode>"
5152   [(set (match_operand:XI 0 "register_operand")
5153         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand")
5154                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5155                    UNSPEC_LD4))]
5156   "TARGET_SIMD"
5157 {
5158   if (BYTES_BIG_ENDIAN)
5159     {
5160       rtx tmp = gen_reg_rtx (XImode);
5161       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5162       emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
5163       emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
5164     }
5165   else
5166     emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
5167   DONE;
5168 })
5169
5170 (define_insn "aarch64_simd_st4<mode>"
5171   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5172         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5173                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5174                    UNSPEC_ST4))]
5175   "TARGET_SIMD"
5176   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5177   [(set_attr "type" "neon_store4_4reg<q>")]
5178 )
5179
5180 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5181 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
5182   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5183         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5184                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5185                      (match_operand:SI 2 "immediate_operand" "i")]
5186                     UNSPEC_ST4_LANE))]
5187   "TARGET_SIMD"
5188   {
5189     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5190     return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5191   }
5192   [(set_attr "type" "neon_store4_one_lane<q>")]
5193 )
5194
5195 (define_expand "vec_store_lanesxi<mode>"
5196   [(set (match_operand:XI 0 "aarch64_simd_struct_operand")
5197         (unspec:XI [(match_operand:XI 1 "register_operand")
5198                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5199                    UNSPEC_ST4))]
5200   "TARGET_SIMD"
5201 {
5202   if (BYTES_BIG_ENDIAN)
5203     {
5204       rtx tmp = gen_reg_rtx (XImode);
5205       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5206       emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5207       emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5208     }
5209   else
5210     emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5211   DONE;
5212 })
5213
5214 (define_insn_and_split "aarch64_rev_reglist<mode>"
5215 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5216         (unspec:VSTRUCT
5217                    [(match_operand:VSTRUCT 1 "register_operand" "w")
5218                     (match_operand:V16QI 2 "register_operand" "w")]
5219                    UNSPEC_REV_REGLIST))]
5220   "TARGET_SIMD"
5221   "#"
5222   "&& reload_completed"
5223   [(const_int 0)]
5224 {
5225   int i;
5226   int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5227   for (i = 0; i < nregs; i++)
5228     {
5229       rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5230       rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5231       emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5232     }
5233   DONE;
5234 }
5235   [(set_attr "type" "neon_tbl1_q")
5236    (set_attr "length" "<insn_count>")]
5237 )
5238
5239 ;; Reload patterns for AdvSIMD register list operands.
5240
5241 (define_expand "mov<mode>"
5242   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
5243         (match_operand:VSTRUCT 1 "general_operand"))]
5244   "TARGET_SIMD"
5245 {
5246   if (can_create_pseudo_p ())
5247     {
5248       if (GET_CODE (operands[0]) != REG)
5249         operands[1] = force_reg (<MODE>mode, operands[1]);
5250     }
5251 })
5252
5253
5254 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5255   [(match_operand:CI 0 "register_operand")
5256    (match_operand:DI 1 "register_operand")
5257    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5258   "TARGET_SIMD"
5259 {
5260   rtx mem = gen_rtx_MEM (CImode, operands[1]);
5261   emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5262   DONE;
5263 })
5264
5265 (define_insn "aarch64_ld1_x3_<mode>"
5266   [(set (match_operand:CI 0 "register_operand" "=w")
5267         (unspec:CI
5268           [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5269            (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5270   "TARGET_SIMD"
5271   "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5272   [(set_attr "type" "neon_load1_3reg<q>")]
5273 )
5274
5275 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5276   [(match_operand:DI 0 "register_operand")
5277    (match_operand:OI 1 "register_operand")
5278    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5279   "TARGET_SIMD"
5280 {
5281   rtx mem = gen_rtx_MEM (OImode, operands[0]);
5282   emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5283   DONE;
5284 })
5285
5286 (define_insn "aarch64_st1_x2_<mode>"
5287    [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5288          (unspec:OI
5289           [(match_operand:OI 1 "register_operand" "w")
5290           (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5291   "TARGET_SIMD"
5292   "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5293   [(set_attr "type" "neon_store1_2reg<q>")]
5294 )
5295
5296 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5297   [(match_operand:DI 0 "register_operand")
5298    (match_operand:CI 1 "register_operand")
5299    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5300   "TARGET_SIMD"
5301 {
5302   rtx mem = gen_rtx_MEM (CImode, operands[0]);
5303   emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5304   DONE;
5305 })
5306
5307 (define_insn "aarch64_st1_x3_<mode>"
5308    [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5309         (unspec:CI
5310          [(match_operand:CI 1 "register_operand" "w")
5311           (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5312   "TARGET_SIMD"
5313   "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5314   [(set_attr "type" "neon_store1_3reg<q>")]
5315 )
5316
5317 (define_insn "*aarch64_mov<mode>"
5318   [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5319         (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5320   "TARGET_SIMD && !BYTES_BIG_ENDIAN
5321    && (register_operand (operands[0], <MODE>mode)
5322        || register_operand (operands[1], <MODE>mode))"
5323   "@
5324    #
5325    st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5326    ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5327   [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5328                      neon_load<nregs>_<nregs>reg_q")
5329    (set_attr "length" "<insn_count>,4,4")]
5330 )
5331
5332 (define_insn "aarch64_be_ld1<mode>"
5333   [(set (match_operand:VALLDI_F16 0     "register_operand" "=w")
5334         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5335                              "aarch64_simd_struct_operand" "Utv")]
5336         UNSPEC_LD1))]
5337   "TARGET_SIMD"
5338   "ld1\\t{%0<Vmtype>}, %1"
5339   [(set_attr "type" "neon_load1_1reg<q>")]
5340 )
5341
5342 (define_insn "aarch64_be_st1<mode>"
5343   [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5344         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5345         UNSPEC_ST1))]
5346   "TARGET_SIMD"
5347   "st1\\t{%1<Vmtype>}, %0"
5348   [(set_attr "type" "neon_store1_1reg<q>")]
5349 )
5350
5351 (define_insn "*aarch64_be_movoi"
5352   [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5353         (match_operand:OI 1 "general_operand"      " w,w,m"))]
5354   "TARGET_SIMD && BYTES_BIG_ENDIAN
5355    && (register_operand (operands[0], OImode)
5356        || register_operand (operands[1], OImode))"
5357   "@
5358    #
5359    stp\\t%q1, %R1, %0
5360    ldp\\t%q0, %R0, %1"
5361   [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5362    (set_attr "length" "8,4,4")]
5363 )
5364
5365 (define_insn "*aarch64_be_movci"
5366   [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5367         (match_operand:CI 1 "general_operand"      " w,w,o"))]
5368   "TARGET_SIMD && BYTES_BIG_ENDIAN
5369    && (register_operand (operands[0], CImode)
5370        || register_operand (operands[1], CImode))"
5371   "#"
5372   [(set_attr "type" "multiple")
5373    (set_attr "length" "12,4,4")]
5374 )
5375
5376 (define_insn "*aarch64_be_movxi"
5377   [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5378         (match_operand:XI 1 "general_operand"      " w,w,o"))]
5379   "TARGET_SIMD && BYTES_BIG_ENDIAN
5380    && (register_operand (operands[0], XImode)
5381        || register_operand (operands[1], XImode))"
5382   "#"
5383   [(set_attr "type" "multiple")
5384    (set_attr "length" "16,4,4")]
5385 )
5386
5387 (define_split
5388   [(set (match_operand:OI 0 "register_operand")
5389         (match_operand:OI 1 "register_operand"))]
5390   "TARGET_SIMD && reload_completed"
5391   [(const_int 0)]
5392 {
5393   aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5394   DONE;
5395 })
5396
5397 (define_split
5398   [(set (match_operand:CI 0 "nonimmediate_operand")
5399         (match_operand:CI 1 "general_operand"))]
5400   "TARGET_SIMD && reload_completed"
5401   [(const_int 0)]
5402 {
5403   if (register_operand (operands[0], CImode)
5404       && register_operand (operands[1], CImode))
5405     {
5406       aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5407       DONE;
5408     }
5409   else if (BYTES_BIG_ENDIAN)
5410     {
5411       emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5412                       simplify_gen_subreg (OImode, operands[1], CImode, 0));
5413       emit_move_insn (gen_lowpart (V16QImode,
5414                                    simplify_gen_subreg (TImode, operands[0],
5415                                                         CImode, 32)),
5416                       gen_lowpart (V16QImode,
5417                                    simplify_gen_subreg (TImode, operands[1],
5418                                                         CImode, 32)));
5419       DONE;
5420     }
5421   else
5422     FAIL;
5423 })
5424
5425 (define_split
5426   [(set (match_operand:XI 0 "nonimmediate_operand")
5427         (match_operand:XI 1 "general_operand"))]
5428   "TARGET_SIMD && reload_completed"
5429   [(const_int 0)]
5430 {
5431   if (register_operand (operands[0], XImode)
5432       && register_operand (operands[1], XImode))
5433     {
5434       aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5435       DONE;
5436     }
5437   else if (BYTES_BIG_ENDIAN)
5438     {
5439       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5440                       simplify_gen_subreg (OImode, operands[1], XImode, 0));
5441       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5442                       simplify_gen_subreg (OImode, operands[1], XImode, 32));
5443       DONE;
5444     }
5445   else
5446     FAIL;
5447 })
5448
5449 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5450   [(match_operand:VSTRUCT 0 "register_operand")
5451    (match_operand:DI 1 "register_operand")
5452    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5453   "TARGET_SIMD"
5454 {
5455   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5456   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5457                      * <VSTRUCT:nregs>);
5458
5459   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5460                                                                 mem));
5461   DONE;
5462 })
5463
5464 (define_insn "aarch64_ld2<mode>_dreg"
5465   [(set (match_operand:OI 0 "register_operand" "=w")
5466         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5467                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5468                    UNSPEC_LD2_DREG))]
5469   "TARGET_SIMD"
5470   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5471   [(set_attr "type" "neon_load2_2reg<q>")]
5472 )
5473
5474 (define_insn "aarch64_ld2<mode>_dreg"
5475   [(set (match_operand:OI 0 "register_operand" "=w")
5476         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5477                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5478                    UNSPEC_LD2_DREG))]
5479   "TARGET_SIMD"
5480   "ld1\\t{%S0.1d - %T0.1d}, %1"
5481   [(set_attr "type" "neon_load1_2reg<q>")]
5482 )
5483
5484 (define_insn "aarch64_ld3<mode>_dreg"
5485   [(set (match_operand:CI 0 "register_operand" "=w")
5486         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5487                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5488                    UNSPEC_LD3_DREG))]
5489   "TARGET_SIMD"
5490   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5491   [(set_attr "type" "neon_load3_3reg<q>")]
5492 )
5493
5494 (define_insn "aarch64_ld3<mode>_dreg"
5495   [(set (match_operand:CI 0 "register_operand" "=w")
5496         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5497                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5498                    UNSPEC_LD3_DREG))]
5499   "TARGET_SIMD"
5500   "ld1\\t{%S0.1d - %U0.1d}, %1"
5501   [(set_attr "type" "neon_load1_3reg<q>")]
5502 )
5503
5504 (define_insn "aarch64_ld4<mode>_dreg"
5505   [(set (match_operand:XI 0 "register_operand" "=w")
5506         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5507                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5508                    UNSPEC_LD4_DREG))]
5509   "TARGET_SIMD"
5510   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5511   [(set_attr "type" "neon_load4_4reg<q>")]
5512 )
5513
5514 (define_insn "aarch64_ld4<mode>_dreg"
5515   [(set (match_operand:XI 0 "register_operand" "=w")
5516         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5517                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5518                    UNSPEC_LD4_DREG))]
5519   "TARGET_SIMD"
5520   "ld1\\t{%S0.1d - %V0.1d}, %1"
5521   [(set_attr "type" "neon_load1_4reg<q>")]
5522 )
5523
5524 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5525  [(match_operand:VSTRUCT 0 "register_operand")
5526   (match_operand:DI 1 "register_operand")
5527   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5528   "TARGET_SIMD"
5529 {
5530   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5531   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5532
5533   emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5534   DONE;
5535 })
5536
5537 (define_expand "aarch64_ld1<VALL_F16:mode>"
5538  [(match_operand:VALL_F16 0 "register_operand")
5539   (match_operand:DI 1 "register_operand")]
5540   "TARGET_SIMD"
5541 {
5542   machine_mode mode = <VALL_F16:MODE>mode;
5543   rtx mem = gen_rtx_MEM (mode, operands[1]);
5544
5545   if (BYTES_BIG_ENDIAN)
5546     emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5547   else
5548     emit_move_insn (operands[0], mem);
5549   DONE;
5550 })
5551
5552 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5553  [(match_operand:VSTRUCT 0 "register_operand")
5554   (match_operand:DI 1 "register_operand")
5555   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5556   "TARGET_SIMD"
5557 {
5558   machine_mode mode = <VSTRUCT:MODE>mode;
5559   rtx mem = gen_rtx_MEM (mode, operands[1]);
5560
5561   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5562   DONE;
5563 })
5564
5565 (define_expand "aarch64_ld1x2<VQ:mode>"
5566  [(match_operand:OI 0 "register_operand")
5567   (match_operand:DI 1 "register_operand")
5568   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5569   "TARGET_SIMD"
5570 {
5571   machine_mode mode = OImode;
5572   rtx mem = gen_rtx_MEM (mode, operands[1]);
5573
5574   emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5575   DONE;
5576 })
5577
5578 (define_expand "aarch64_ld1x2<VDC:mode>"
5579  [(match_operand:OI 0 "register_operand")
5580   (match_operand:DI 1 "register_operand")
5581   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5582   "TARGET_SIMD"
5583 {
5584   machine_mode mode = OImode;
5585   rtx mem = gen_rtx_MEM (mode, operands[1]);
5586
5587   emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5588   DONE;
5589 })
5590
5591
5592 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5593   [(match_operand:VSTRUCT 0 "register_operand")
5594         (match_operand:DI 1 "register_operand")
5595         (match_operand:VSTRUCT 2 "register_operand")
5596         (match_operand:SI 3 "immediate_operand")
5597         (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5598   "TARGET_SIMD"
5599 {
5600   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5601   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5602                      * <VSTRUCT:nregs>);
5603
5604   aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5605   emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5606         operands[0], mem, operands[2], operands[3]));
5607   DONE;
5608 })
5609
5610 ;; Expanders for builtins to extract vector registers from large
5611 ;; opaque integer modes.
5612
5613 ;; D-register list.
5614
5615 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5616  [(match_operand:VDC 0 "register_operand")
5617   (match_operand:VSTRUCT 1 "register_operand")
5618   (match_operand:SI 2 "immediate_operand")]
5619   "TARGET_SIMD"
5620 {
5621   int part = INTVAL (operands[2]);
5622   rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5623   int offset = part * 16;
5624
5625   emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5626   emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5627   DONE;
5628 })
5629
5630 ;; Q-register list.
5631
5632 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5633  [(match_operand:VQ 0 "register_operand")
5634   (match_operand:VSTRUCT 1 "register_operand")
5635   (match_operand:SI 2 "immediate_operand")]
5636   "TARGET_SIMD"
5637 {
5638   int part = INTVAL (operands[2]);
5639   int offset = part * 16;
5640
5641   emit_move_insn (operands[0],
5642                   gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5643   DONE;
5644 })
5645
5646 ;; Permuted-store expanders for neon intrinsics.
5647
5648 ;; Permute instructions
5649
5650 ;; vec_perm support
5651
5652 (define_expand "vec_perm<mode>"
5653   [(match_operand:VB 0 "register_operand")
5654    (match_operand:VB 1 "register_operand")
5655    (match_operand:VB 2 "register_operand")
5656    (match_operand:VB 3 "register_operand")]
5657   "TARGET_SIMD"
5658 {
5659   aarch64_expand_vec_perm (operands[0], operands[1],
5660                            operands[2], operands[3], <nunits>);
5661   DONE;
5662 })
5663
5664 (define_insn "aarch64_tbl1<mode>"
5665   [(set (match_operand:VB 0 "register_operand" "=w")
5666         (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5667                     (match_operand:VB 2 "register_operand" "w")]
5668                    UNSPEC_TBL))]
5669   "TARGET_SIMD"
5670   "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5671   [(set_attr "type" "neon_tbl1<q>")]
5672 )
5673
5674 ;; Two source registers.
5675
5676 (define_insn "aarch64_tbl2v16qi"
5677   [(set (match_operand:V16QI 0 "register_operand" "=w")
5678         (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5679                        (match_operand:V16QI 2 "register_operand" "w")]
5680                       UNSPEC_TBL))]
5681   "TARGET_SIMD"
5682   "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5683   [(set_attr "type" "neon_tbl2_q")]
5684 )
5685
5686 (define_insn "aarch64_tbl3<mode>"
5687   [(set (match_operand:VB 0 "register_operand" "=w")
5688         (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5689                       (match_operand:VB 2 "register_operand" "w")]
5690                       UNSPEC_TBL))]
5691   "TARGET_SIMD"
5692   "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5693   [(set_attr "type" "neon_tbl3")]
5694 )
5695
5696 (define_insn "aarch64_tbx4<mode>"
5697   [(set (match_operand:VB 0 "register_operand" "=w")
5698         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5699                       (match_operand:OI 2 "register_operand" "w")
5700                       (match_operand:VB 3 "register_operand" "w")]
5701                       UNSPEC_TBX))]
5702   "TARGET_SIMD"
5703   "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5704   [(set_attr "type" "neon_tbl4")]
5705 )
5706
5707 ;; Three source registers.
5708
5709 (define_insn "aarch64_qtbl3<mode>"
5710   [(set (match_operand:VB 0 "register_operand" "=w")
5711         (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5712                       (match_operand:VB 2 "register_operand" "w")]
5713                       UNSPEC_TBL))]
5714   "TARGET_SIMD"
5715   "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5716   [(set_attr "type" "neon_tbl3")]
5717 )
5718
5719 (define_insn "aarch64_qtbx3<mode>"
5720   [(set (match_operand:VB 0 "register_operand" "=w")
5721         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5722                       (match_operand:CI 2 "register_operand" "w")
5723                       (match_operand:VB 3 "register_operand" "w")]
5724                       UNSPEC_TBX))]
5725   "TARGET_SIMD"
5726   "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5727   [(set_attr "type" "neon_tbl3")]
5728 )
5729
5730 ;; Four source registers.
5731
5732 (define_insn "aarch64_qtbl4<mode>"
5733   [(set (match_operand:VB 0 "register_operand" "=w")
5734         (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5735                       (match_operand:VB 2 "register_operand" "w")]
5736                       UNSPEC_TBL))]
5737   "TARGET_SIMD"
5738   "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5739   [(set_attr "type" "neon_tbl4")]
5740 )
5741
5742 (define_insn "aarch64_qtbx4<mode>"
5743   [(set (match_operand:VB 0 "register_operand" "=w")
5744         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5745                       (match_operand:XI 2 "register_operand" "w")
5746                       (match_operand:VB 3 "register_operand" "w")]
5747                       UNSPEC_TBX))]
5748   "TARGET_SIMD"
5749   "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5750   [(set_attr "type" "neon_tbl4")]
5751 )
5752
5753 (define_insn_and_split "aarch64_combinev16qi"
5754   [(set (match_operand:OI 0 "register_operand" "=w")
5755         (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5756                     (match_operand:V16QI 2 "register_operand" "w")]
5757                    UNSPEC_CONCAT))]
5758   "TARGET_SIMD"
5759   "#"
5760   "&& reload_completed"
5761   [(const_int 0)]
5762 {
5763   aarch64_split_combinev16qi (operands);
5764   DONE;
5765 }
5766 [(set_attr "type" "multiple")]
5767 )
5768
5769 ;; This instruction's pattern is generated directly by
5770 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5771 ;; need corresponding changes there.
5772 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5773   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5774         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5775                           (match_operand:VALL_F16 2 "register_operand" "w")]
5776          PERMUTE))]
5777   "TARGET_SIMD"
5778   "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5779   [(set_attr "type" "neon_permute<q>")]
5780 )
5781
5782 ;; This instruction's pattern is generated directly by
5783 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5784 ;; need corresponding changes there.  Note that the immediate (third)
5785 ;; operand is a lane index not a byte index.
5786 (define_insn "aarch64_ext<mode>"
5787   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5788         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5789                           (match_operand:VALL_F16 2 "register_operand" "w")
5790                           (match_operand:SI 3 "immediate_operand" "i")]
5791          UNSPEC_EXT))]
5792   "TARGET_SIMD"
5793 {
5794   operands[3] = GEN_INT (INTVAL (operands[3])
5795       * GET_MODE_UNIT_SIZE (<MODE>mode));
5796   return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5797 }
5798   [(set_attr "type" "neon_ext<q>")]
5799 )
5800
5801 ;; This instruction's pattern is generated directly by
5802 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5803 ;; need corresponding changes there.
5804 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5805   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5806         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5807                     REVERSE))]
5808   "TARGET_SIMD"
5809   "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5810   [(set_attr "type" "neon_rev<q>")]
5811 )
5812
5813 (define_insn "aarch64_st2<mode>_dreg"
5814   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5815         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5816                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5817                    UNSPEC_ST2))]
5818   "TARGET_SIMD"
5819   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5820   [(set_attr "type" "neon_store2_2reg")]
5821 )
5822
5823 (define_insn "aarch64_st2<mode>_dreg"
5824   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5825         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5826                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5827                    UNSPEC_ST2))]
5828   "TARGET_SIMD"
5829   "st1\\t{%S1.1d - %T1.1d}, %0"
5830   [(set_attr "type" "neon_store1_2reg")]
5831 )
5832
5833 (define_insn "aarch64_st3<mode>_dreg"
5834   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5835         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5836                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5837                    UNSPEC_ST3))]
5838   "TARGET_SIMD"
5839   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5840   [(set_attr "type" "neon_store3_3reg")]
5841 )
5842
5843 (define_insn "aarch64_st3<mode>_dreg"
5844   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5845         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5846                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5847                    UNSPEC_ST3))]
5848   "TARGET_SIMD"
5849   "st1\\t{%S1.1d - %U1.1d}, %0"
5850   [(set_attr "type" "neon_store1_3reg")]
5851 )
5852
5853 (define_insn "aarch64_st4<mode>_dreg"
5854   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5855         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5856                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5857                    UNSPEC_ST4))]
5858   "TARGET_SIMD"
5859   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5860   [(set_attr "type" "neon_store4_4reg")]
5861 )
5862
5863 (define_insn "aarch64_st4<mode>_dreg"
5864   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5865         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5866                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5867                    UNSPEC_ST4))]
5868   "TARGET_SIMD"
5869   "st1\\t{%S1.1d - %V1.1d}, %0"
5870   [(set_attr "type" "neon_store1_4reg")]
5871 )
5872
5873 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5874  [(match_operand:DI 0 "register_operand")
5875   (match_operand:VSTRUCT 1 "register_operand")
5876   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5877   "TARGET_SIMD"
5878 {
5879   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5880   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5881
5882   emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5883   DONE;
5884 })
5885
5886 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5887  [(match_operand:DI 0 "register_operand")
5888   (match_operand:VSTRUCT 1 "register_operand")
5889   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5890   "TARGET_SIMD"
5891 {
5892   machine_mode mode = <VSTRUCT:MODE>mode;
5893   rtx mem = gen_rtx_MEM (mode, operands[0]);
5894
5895   emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5896   DONE;
5897 })
5898
5899 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5900  [(match_operand:DI 0 "register_operand")
5901   (match_operand:VSTRUCT 1 "register_operand")
5902   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5903   (match_operand:SI 2 "immediate_operand")]
5904   "TARGET_SIMD"
5905 {
5906   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5907   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5908                      * <VSTRUCT:nregs>);
5909
5910   emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5911                 mem, operands[1], operands[2]));
5912   DONE;
5913 })
5914
5915 (define_expand "aarch64_st1<VALL_F16:mode>"
5916  [(match_operand:DI 0 "register_operand")
5917   (match_operand:VALL_F16 1 "register_operand")]
5918   "TARGET_SIMD"
5919 {
5920   machine_mode mode = <VALL_F16:MODE>mode;
5921   rtx mem = gen_rtx_MEM (mode, operands[0]);
5922
5923   if (BYTES_BIG_ENDIAN)
5924     emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5925   else
5926     emit_move_insn (mem, operands[1]);
5927   DONE;
5928 })
5929
5930 ;; Expander for builtins to insert vector registers into large
5931 ;; opaque integer modes.
5932
5933 ;; Q-register list.  We don't need a D-reg inserter as we zero
5934 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5935
5936 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5937  [(match_operand:VSTRUCT 0 "register_operand")
5938   (match_operand:VSTRUCT 1 "register_operand")
5939   (match_operand:VQ 2 "register_operand")
5940   (match_operand:SI 3 "immediate_operand")]
5941   "TARGET_SIMD"
5942 {
5943   int part = INTVAL (operands[3]);
5944   int offset = part * 16;
5945
5946   emit_move_insn (operands[0], operands[1]);
5947   emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5948                   operands[2]);
5949   DONE;
5950 })
5951
5952 ;; Standard pattern name vec_init<mode><Vel>.
5953
5954 (define_expand "vec_init<mode><Vel>"
5955   [(match_operand:VALL_F16 0 "register_operand")
5956    (match_operand 1 "" "")]
5957   "TARGET_SIMD"
5958 {
5959   aarch64_expand_vector_init (operands[0], operands[1]);
5960   DONE;
5961 })
5962
5963 (define_expand "vec_init<mode><Vhalf>"
5964   [(match_operand:VQ_NO2E 0 "register_operand")
5965    (match_operand 1 "" "")]
5966   "TARGET_SIMD"
5967 {
5968   aarch64_expand_vector_init (operands[0], operands[1]);
5969   DONE;
5970 })
5971
5972 (define_insn "*aarch64_simd_ld1r<mode>"
5973   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5974         (vec_duplicate:VALL_F16
5975           (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5976   "TARGET_SIMD"
5977   "ld1r\\t{%0.<Vtype>}, %1"
5978   [(set_attr "type" "neon_load1_all_lanes")]
5979 )
5980
5981 (define_insn "aarch64_simd_ld1<mode>_x2"
5982   [(set (match_operand:OI 0 "register_operand" "=w")
5983         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5984                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5985                    UNSPEC_LD1))]
5986   "TARGET_SIMD"
5987   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5988   [(set_attr "type" "neon_load1_2reg<q>")]
5989 )
5990
5991 (define_insn "aarch64_simd_ld1<mode>_x2"
5992   [(set (match_operand:OI 0 "register_operand" "=w")
5993         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5994                     (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5995                    UNSPEC_LD1))]
5996   "TARGET_SIMD"
5997   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5998   [(set_attr "type" "neon_load1_2reg<q>")]
5999 )
6000
6001
6002 (define_insn "@aarch64_frecpe<mode>"
6003   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
6004         (unspec:VHSDF_HSDF
6005          [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
6006          UNSPEC_FRECPE))]
6007   "TARGET_SIMD"
6008   "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
6009   [(set_attr "type" "neon_fp_recpe_<stype><q>")]
6010 )
6011
6012 (define_insn "aarch64_frecpx<mode>"
6013   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
6014         (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
6015          UNSPEC_FRECPX))]
6016   "TARGET_SIMD"
6017   "frecpx\t%<s>0, %<s>1"
6018   [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
6019 )
6020
6021 (define_insn "@aarch64_frecps<mode>"
6022   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
6023         (unspec:VHSDF_HSDF
6024           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
6025           (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
6026           UNSPEC_FRECPS))]
6027   "TARGET_SIMD"
6028   "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6029   [(set_attr "type" "neon_fp_recps_<stype><q>")]
6030 )
6031
6032 (define_insn "aarch64_urecpe<mode>"
6033   [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
6034         (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
6035                 UNSPEC_URECPE))]
6036  "TARGET_SIMD"
6037  "urecpe\\t%0.<Vtype>, %1.<Vtype>"
6038   [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
6039
6040 ;; Standard pattern name vec_extract<mode><Vel>.
6041
6042 (define_expand "vec_extract<mode><Vel>"
6043   [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
6044    (match_operand:VALL_F16 1 "register_operand")
6045    (match_operand:SI 2 "immediate_operand")]
6046   "TARGET_SIMD"
6047 {
6048     emit_insn
6049       (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
6050     DONE;
6051 })
6052
6053 ;; aes
6054
6055 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
6056   [(set (match_operand:V16QI 0 "register_operand" "=w")
6057         (unspec:V16QI
6058                 [(xor:V16QI
6059                  (match_operand:V16QI 1 "register_operand" "%0")
6060                  (match_operand:V16QI 2 "register_operand" "w"))]
6061          CRYPTO_AES))]
6062   "TARGET_SIMD && TARGET_AES"
6063   "aes<aes_op>\\t%0.16b, %2.16b"
6064   [(set_attr "type" "crypto_aese")]
6065 )
6066
6067 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
6068   [(set (match_operand:V16QI 0 "register_operand" "=w")
6069         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
6070          CRYPTO_AESMC))]
6071   "TARGET_SIMD && TARGET_AES"
6072   "aes<aesmc_op>\\t%0.16b, %1.16b"
6073   [(set_attr "type" "crypto_aesmc")]
6074 )
6075
6076 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
6077 ;; and enforce the register dependency without scheduling or register
6078 ;; allocation messing up the order or introducing moves inbetween.
6079 ;;  Mash the two together during combine.
6080
6081 (define_insn "*aarch64_crypto_aese_fused"
6082   [(set (match_operand:V16QI 0 "register_operand" "=w")
6083         (unspec:V16QI
6084           [(unspec:V16QI
6085            [(xor:V16QI
6086                 (match_operand:V16QI 1 "register_operand" "%0")
6087                 (match_operand:V16QI 2 "register_operand" "w"))]
6088              UNSPEC_AESE)]
6089         UNSPEC_AESMC))]
6090   "TARGET_SIMD && TARGET_AES
6091    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6092   "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
6093   [(set_attr "type" "crypto_aese")
6094    (set_attr "length" "8")]
6095 )
6096
6097 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
6098 ;; and enforce the register dependency without scheduling or register
6099 ;; allocation messing up the order or introducing moves inbetween.
6100 ;;  Mash the two together during combine.
6101
6102 (define_insn "*aarch64_crypto_aesd_fused"
6103   [(set (match_operand:V16QI 0 "register_operand" "=w")
6104         (unspec:V16QI
6105           [(unspec:V16QI
6106                     [(xor:V16QI
6107                         (match_operand:V16QI 1 "register_operand" "%0")
6108                         (match_operand:V16QI 2 "register_operand" "w"))]
6109                 UNSPEC_AESD)]
6110           UNSPEC_AESIMC))]
6111   "TARGET_SIMD && TARGET_AES
6112    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6113   "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
6114   [(set_attr "type" "crypto_aese")
6115    (set_attr "length" "8")]
6116 )
6117
6118 ;; sha1
6119
6120 (define_insn "aarch64_crypto_sha1hsi"
6121   [(set (match_operand:SI 0 "register_operand" "=w")
6122         (unspec:SI [(match_operand:SI 1
6123                        "register_operand" "w")]
6124          UNSPEC_SHA1H))]
6125   "TARGET_SIMD && TARGET_SHA2"
6126   "sha1h\\t%s0, %s1"
6127   [(set_attr "type" "crypto_sha1_fast")]
6128 )
6129
6130 (define_insn "aarch64_crypto_sha1hv4si"
6131   [(set (match_operand:SI 0 "register_operand" "=w")
6132         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6133                      (parallel [(const_int 0)]))]
6134          UNSPEC_SHA1H))]
6135   "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
6136   "sha1h\\t%s0, %s1"
6137   [(set_attr "type" "crypto_sha1_fast")]
6138 )
6139
6140 (define_insn "aarch64_be_crypto_sha1hv4si"
6141   [(set (match_operand:SI 0 "register_operand" "=w")
6142         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6143                      (parallel [(const_int 3)]))]
6144          UNSPEC_SHA1H))]
6145   "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
6146   "sha1h\\t%s0, %s1"
6147   [(set_attr "type" "crypto_sha1_fast")]
6148 )
6149
6150 (define_insn "aarch64_crypto_sha1su1v4si"
6151   [(set (match_operand:V4SI 0 "register_operand" "=w")
6152         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6153                       (match_operand:V4SI 2 "register_operand" "w")]
6154          UNSPEC_SHA1SU1))]
6155   "TARGET_SIMD && TARGET_SHA2"
6156   "sha1su1\\t%0.4s, %2.4s"
6157   [(set_attr "type" "crypto_sha1_fast")]
6158 )
6159
6160 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
6161   [(set (match_operand:V4SI 0 "register_operand" "=w")
6162         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6163                       (match_operand:SI 2 "register_operand" "w")
6164                       (match_operand:V4SI 3 "register_operand" "w")]
6165          CRYPTO_SHA1))]
6166   "TARGET_SIMD && TARGET_SHA2"
6167   "sha1<sha1_op>\\t%q0, %s2, %3.4s"
6168   [(set_attr "type" "crypto_sha1_slow")]
6169 )
6170
6171 (define_insn "aarch64_crypto_sha1su0v4si"
6172   [(set (match_operand:V4SI 0 "register_operand" "=w")
6173         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6174                       (match_operand:V4SI 2 "register_operand" "w")
6175                       (match_operand:V4SI 3 "register_operand" "w")]
6176          UNSPEC_SHA1SU0))]
6177   "TARGET_SIMD && TARGET_SHA2"
6178   "sha1su0\\t%0.4s, %2.4s, %3.4s"
6179   [(set_attr "type" "crypto_sha1_xor")]
6180 )
6181
6182 ;; sha256
6183
6184 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6185   [(set (match_operand:V4SI 0 "register_operand" "=w")
6186         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6187                       (match_operand:V4SI 2 "register_operand" "w")
6188                       (match_operand:V4SI 3 "register_operand" "w")]
6189          CRYPTO_SHA256))]
6190   "TARGET_SIMD && TARGET_SHA2"
6191   "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
6192   [(set_attr "type" "crypto_sha256_slow")]
6193 )
6194
6195 (define_insn "aarch64_crypto_sha256su0v4si"
6196   [(set (match_operand:V4SI 0 "register_operand" "=w")
6197         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6198                       (match_operand:V4SI 2 "register_operand" "w")]
6199          UNSPEC_SHA256SU0))]
6200   "TARGET_SIMD && TARGET_SHA2"
6201   "sha256su0\\t%0.4s, %2.4s"
6202   [(set_attr "type" "crypto_sha256_fast")]
6203 )
6204
6205 (define_insn "aarch64_crypto_sha256su1v4si"
6206   [(set (match_operand:V4SI 0 "register_operand" "=w")
6207         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6208                       (match_operand:V4SI 2 "register_operand" "w")
6209                       (match_operand:V4SI 3 "register_operand" "w")]
6210          UNSPEC_SHA256SU1))]
6211   "TARGET_SIMD && TARGET_SHA2"
6212   "sha256su1\\t%0.4s, %2.4s, %3.4s"
6213   [(set_attr "type" "crypto_sha256_slow")]
6214 )
6215
6216 ;; sha512
6217
6218 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6219   [(set (match_operand:V2DI 0 "register_operand" "=w")
6220         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6221                       (match_operand:V2DI 2 "register_operand" "w")
6222                       (match_operand:V2DI 3 "register_operand" "w")]
6223          CRYPTO_SHA512))]
6224   "TARGET_SIMD && TARGET_SHA3"
6225   "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6226   [(set_attr "type" "crypto_sha512")]
6227 )
6228
6229 (define_insn "aarch64_crypto_sha512su0qv2di"
6230   [(set (match_operand:V2DI 0 "register_operand" "=w")
6231         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6232                       (match_operand:V2DI 2 "register_operand" "w")]
6233          UNSPEC_SHA512SU0))]
6234   "TARGET_SIMD && TARGET_SHA3"
6235   "sha512su0\\t%0.2d, %2.2d"
6236   [(set_attr "type" "crypto_sha512")]
6237 )
6238
6239 (define_insn "aarch64_crypto_sha512su1qv2di"
6240   [(set (match_operand:V2DI 0 "register_operand" "=w")
6241         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6242                       (match_operand:V2DI 2 "register_operand" "w")
6243                       (match_operand:V2DI 3 "register_operand" "w")]
6244          UNSPEC_SHA512SU1))]
6245   "TARGET_SIMD && TARGET_SHA3"
6246   "sha512su1\\t%0.2d, %2.2d, %3.2d"
6247   [(set_attr "type" "crypto_sha512")]
6248 )
6249
6250 ;; sha3
6251
6252 (define_insn "eor3q<mode>4"
6253   [(set (match_operand:VQ_I 0 "register_operand" "=w")
6254         (xor:VQ_I
6255          (xor:VQ_I
6256           (match_operand:VQ_I 2 "register_operand" "w")
6257           (match_operand:VQ_I 3 "register_operand" "w"))
6258          (match_operand:VQ_I 1 "register_operand" "w")))]
6259   "TARGET_SIMD && TARGET_SHA3"
6260   "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6261   [(set_attr "type" "crypto_sha3")]
6262 )
6263
6264 (define_insn "aarch64_rax1qv2di"
6265   [(set (match_operand:V2DI 0 "register_operand" "=w")
6266         (xor:V2DI
6267          (rotate:V2DI
6268           (match_operand:V2DI 2 "register_operand" "w")
6269           (const_int 1))
6270          (match_operand:V2DI 1 "register_operand" "w")))]
6271   "TARGET_SIMD && TARGET_SHA3"
6272   "rax1\\t%0.2d, %1.2d, %2.2d"
6273   [(set_attr "type" "crypto_sha3")]
6274 )
6275
6276 (define_insn "aarch64_xarqv2di"
6277   [(set (match_operand:V2DI 0 "register_operand" "=w")
6278         (rotatert:V2DI
6279          (xor:V2DI
6280           (match_operand:V2DI 1 "register_operand" "%w")
6281           (match_operand:V2DI 2 "register_operand" "w"))
6282          (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6283   "TARGET_SIMD && TARGET_SHA3"
6284   "xar\\t%0.2d, %1.2d, %2.2d, %3"
6285   [(set_attr "type" "crypto_sha3")]
6286 )
6287
6288 (define_insn "bcaxq<mode>4"
6289   [(set (match_operand:VQ_I 0 "register_operand" "=w")
6290         (xor:VQ_I
6291          (and:VQ_I
6292           (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6293           (match_operand:VQ_I 2 "register_operand" "w"))
6294          (match_operand:VQ_I 1 "register_operand" "w")))]
6295   "TARGET_SIMD && TARGET_SHA3"
6296   "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6297   [(set_attr "type" "crypto_sha3")]
6298 )
6299
6300 ;; SM3
6301
6302 (define_insn "aarch64_sm3ss1qv4si"
6303   [(set (match_operand:V4SI 0 "register_operand" "=w")
6304         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6305                       (match_operand:V4SI 2 "register_operand" "w")
6306                       (match_operand:V4SI 3 "register_operand" "w")]
6307          UNSPEC_SM3SS1))]
6308   "TARGET_SIMD && TARGET_SM4"
6309   "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6310   [(set_attr "type" "crypto_sm3")]
6311 )
6312
6313
6314 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6315   [(set (match_operand:V4SI 0 "register_operand" "=w")
6316         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6317                       (match_operand:V4SI 2 "register_operand" "w")
6318                       (match_operand:V4SI 3 "register_operand" "w")
6319                       (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6320          CRYPTO_SM3TT))]
6321   "TARGET_SIMD && TARGET_SM4"
6322   "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6323   [(set_attr "type" "crypto_sm3")]
6324 )
6325
6326 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6327   [(set (match_operand:V4SI 0 "register_operand" "=w")
6328         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6329                       (match_operand:V4SI 2 "register_operand" "w")
6330                       (match_operand:V4SI 3 "register_operand" "w")]
6331          CRYPTO_SM3PART))]
6332   "TARGET_SIMD && TARGET_SM4"
6333   "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6334   [(set_attr "type" "crypto_sm3")]
6335 )
6336
6337 ;; SM4
6338
6339 (define_insn "aarch64_sm4eqv4si"
6340   [(set (match_operand:V4SI 0 "register_operand" "=w")
6341         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6342                       (match_operand:V4SI 2 "register_operand" "w")]
6343          UNSPEC_SM4E))]
6344   "TARGET_SIMD && TARGET_SM4"
6345   "sm4e\\t%0.4s, %2.4s"
6346   [(set_attr "type" "crypto_sm4")]
6347 )
6348
6349 (define_insn "aarch64_sm4ekeyqv4si"
6350   [(set (match_operand:V4SI 0 "register_operand" "=w")
6351         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6352                       (match_operand:V4SI 2 "register_operand" "w")]
6353          UNSPEC_SM4EKEY))]
6354   "TARGET_SIMD && TARGET_SM4"
6355   "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6356   [(set_attr "type" "crypto_sm4")]
6357 )
6358
6359 ;; fp16fml
6360
6361 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6362   [(set (match_operand:VDQSF 0 "register_operand")
6363         (unspec:VDQSF
6364          [(match_operand:VDQSF 1 "register_operand")
6365           (match_operand:<VFMLA_W> 2 "register_operand")
6366           (match_operand:<VFMLA_W> 3 "register_operand")]
6367          VFMLA16_LOW))]
6368   "TARGET_F16FML"
6369 {
6370   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6371                                             <nunits> * 2, false);
6372   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6373                                             <nunits> * 2, false);
6374
6375   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6376                                                                 operands[1],
6377                                                                 operands[2],
6378                                                                 operands[3],
6379                                                                 p1, p2));
6380   DONE;
6381
6382 })
6383
6384 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6385   [(set (match_operand:VDQSF 0 "register_operand")
6386         (unspec:VDQSF
6387          [(match_operand:VDQSF 1 "register_operand")
6388           (match_operand:<VFMLA_W> 2 "register_operand")
6389           (match_operand:<VFMLA_W> 3 "register_operand")]
6390          VFMLA16_HIGH))]
6391   "TARGET_F16FML"
6392 {
6393   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6394   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6395
6396   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6397                                                                  operands[1],
6398                                                                  operands[2],
6399                                                                  operands[3],
6400                                                                  p1, p2));
6401   DONE;
6402 })
6403
6404 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6405   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6406         (fma:VDQSF
6407          (float_extend:VDQSF
6408           (vec_select:<VFMLA_SEL_W>
6409            (match_operand:<VFMLA_W> 2 "register_operand" "w")
6410            (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6411          (float_extend:VDQSF
6412           (vec_select:<VFMLA_SEL_W>
6413            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6414            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6415          (match_operand:VDQSF 1 "register_operand" "0")))]
6416   "TARGET_F16FML"
6417   "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6418   [(set_attr "type" "neon_fp_mul_s")]
6419 )
6420
6421 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6422   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6423         (fma:VDQSF
6424          (float_extend:VDQSF
6425           (neg:<VFMLA_SEL_W>
6426            (vec_select:<VFMLA_SEL_W>
6427             (match_operand:<VFMLA_W> 2 "register_operand" "w")
6428             (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6429          (float_extend:VDQSF
6430           (vec_select:<VFMLA_SEL_W>
6431            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6432            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6433          (match_operand:VDQSF 1 "register_operand" "0")))]
6434   "TARGET_F16FML"
6435   "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6436   [(set_attr "type" "neon_fp_mul_s")]
6437 )
6438
6439 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6440   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6441         (fma:VDQSF
6442          (float_extend:VDQSF
6443           (vec_select:<VFMLA_SEL_W>
6444            (match_operand:<VFMLA_W> 2 "register_operand" "w")
6445            (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6446          (float_extend:VDQSF
6447           (vec_select:<VFMLA_SEL_W>
6448            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6449            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6450          (match_operand:VDQSF 1 "register_operand" "0")))]
6451   "TARGET_F16FML"
6452   "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6453   [(set_attr "type" "neon_fp_mul_s")]
6454 )
6455
6456 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6457   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6458         (fma:VDQSF
6459          (float_extend:VDQSF
6460           (neg:<VFMLA_SEL_W>
6461            (vec_select:<VFMLA_SEL_W>
6462             (match_operand:<VFMLA_W> 2 "register_operand" "w")
6463             (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6464          (float_extend:VDQSF
6465           (vec_select:<VFMLA_SEL_W>
6466            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6467            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6468          (match_operand:VDQSF 1 "register_operand" "0")))]
6469   "TARGET_F16FML"
6470   "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6471   [(set_attr "type" "neon_fp_mul_s")]
6472 )
6473
6474 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6475   [(set (match_operand:V2SF 0 "register_operand")
6476         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6477                            (match_operand:V4HF 2 "register_operand")
6478                            (match_operand:V4HF 3 "register_operand")
6479                            (match_operand:SI 4 "aarch64_imm2")]
6480          VFMLA16_LOW))]
6481   "TARGET_F16FML"
6482 {
6483     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6484     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6485
6486     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6487                                                             operands[1],
6488                                                             operands[2],
6489                                                             operands[3],
6490                                                             p1, lane));
6491     DONE;
6492 }
6493 )
6494
6495 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6496   [(set (match_operand:V2SF 0 "register_operand")
6497         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6498                            (match_operand:V4HF 2 "register_operand")
6499                            (match_operand:V4HF 3 "register_operand")
6500                            (match_operand:SI 4 "aarch64_imm2")]
6501          VFMLA16_HIGH))]
6502   "TARGET_F16FML"
6503 {
6504     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6505     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6506
6507     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6508                                                              operands[1],
6509                                                              operands[2],
6510                                                              operands[3],
6511                                                              p1, lane));
6512     DONE;
6513 })
6514
6515 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6516   [(set (match_operand:V2SF 0 "register_operand" "=w")
6517         (fma:V2SF
6518          (float_extend:V2SF
6519            (vec_select:V2HF
6520             (match_operand:V4HF 2 "register_operand" "w")
6521             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6522          (float_extend:V2SF
6523            (vec_duplicate:V2HF
6524             (vec_select:HF
6525              (match_operand:V4HF 3 "register_operand" "x")
6526              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6527          (match_operand:V2SF 1 "register_operand" "0")))]
6528   "TARGET_F16FML"
6529   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6530   [(set_attr "type" "neon_fp_mul_s")]
6531 )
6532
6533 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6534   [(set (match_operand:V2SF 0 "register_operand" "=w")
6535         (fma:V2SF
6536          (float_extend:V2SF
6537           (neg:V2HF
6538            (vec_select:V2HF
6539             (match_operand:V4HF 2 "register_operand" "w")
6540             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6541          (float_extend:V2SF
6542           (vec_duplicate:V2HF
6543            (vec_select:HF
6544             (match_operand:V4HF 3 "register_operand" "x")
6545             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6546          (match_operand:V2SF 1 "register_operand" "0")))]
6547   "TARGET_F16FML"
6548   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6549   [(set_attr "type" "neon_fp_mul_s")]
6550 )
6551
6552 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6553   [(set (match_operand:V2SF 0 "register_operand" "=w")
6554         (fma:V2SF
6555          (float_extend:V2SF
6556            (vec_select:V2HF
6557             (match_operand:V4HF 2 "register_operand" "w")
6558             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6559          (float_extend:V2SF
6560            (vec_duplicate:V2HF
6561             (vec_select:HF
6562              (match_operand:V4HF 3 "register_operand" "x")
6563              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6564          (match_operand:V2SF 1 "register_operand" "0")))]
6565   "TARGET_F16FML"
6566   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6567   [(set_attr "type" "neon_fp_mul_s")]
6568 )
6569
6570 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6571   [(set (match_operand:V2SF 0 "register_operand" "=w")
6572         (fma:V2SF
6573          (float_extend:V2SF
6574            (neg:V2HF
6575             (vec_select:V2HF
6576              (match_operand:V4HF 2 "register_operand" "w")
6577              (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6578          (float_extend:V2SF
6579            (vec_duplicate:V2HF
6580             (vec_select:HF
6581              (match_operand:V4HF 3 "register_operand" "x")
6582              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6583          (match_operand:V2SF 1 "register_operand" "0")))]
6584   "TARGET_F16FML"
6585   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6586   [(set_attr "type" "neon_fp_mul_s")]
6587 )
6588
6589 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6590   [(set (match_operand:V4SF 0 "register_operand")
6591         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6592                            (match_operand:V8HF 2 "register_operand")
6593                            (match_operand:V8HF 3 "register_operand")
6594                            (match_operand:SI 4 "aarch64_lane_imm3")]
6595          VFMLA16_LOW))]
6596   "TARGET_F16FML"
6597 {
6598     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6599     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6600
6601     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6602                                                               operands[1],
6603                                                               operands[2],
6604                                                               operands[3],
6605                                                               p1, lane));
6606     DONE;
6607 })
6608
6609 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6610   [(set (match_operand:V4SF 0 "register_operand")
6611         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6612                            (match_operand:V8HF 2 "register_operand")
6613                            (match_operand:V8HF 3 "register_operand")
6614                            (match_operand:SI 4 "aarch64_lane_imm3")]
6615          VFMLA16_HIGH))]
6616   "TARGET_F16FML"
6617 {
6618     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6619     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6620
6621     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6622                                                                operands[1],
6623                                                                operands[2],
6624                                                                operands[3],
6625                                                                p1, lane));
6626     DONE;
6627 })
6628
6629 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6630   [(set (match_operand:V4SF 0 "register_operand" "=w")
6631         (fma:V4SF
6632          (float_extend:V4SF
6633           (vec_select:V4HF
6634             (match_operand:V8HF 2 "register_operand" "w")
6635             (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6636          (float_extend:V4SF
6637           (vec_duplicate:V4HF
6638            (vec_select:HF
6639             (match_operand:V8HF 3 "register_operand" "x")
6640             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6641          (match_operand:V4SF 1 "register_operand" "0")))]
6642   "TARGET_F16FML"
6643   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6644   [(set_attr "type" "neon_fp_mul_s")]
6645 )
6646
6647 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6648   [(set (match_operand:V4SF 0 "register_operand" "=w")
6649         (fma:V4SF
6650           (float_extend:V4SF
6651            (neg:V4HF
6652             (vec_select:V4HF
6653              (match_operand:V8HF 2 "register_operand" "w")
6654              (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6655          (float_extend:V4SF
6656           (vec_duplicate:V4HF
6657            (vec_select:HF
6658             (match_operand:V8HF 3 "register_operand" "x")
6659             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6660          (match_operand:V4SF 1 "register_operand" "0")))]
6661   "TARGET_F16FML"
6662   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6663   [(set_attr "type" "neon_fp_mul_s")]
6664 )
6665
6666 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6667   [(set (match_operand:V4SF 0 "register_operand" "=w")
6668         (fma:V4SF
6669          (float_extend:V4SF
6670           (vec_select:V4HF
6671             (match_operand:V8HF 2 "register_operand" "w")
6672             (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6673          (float_extend:V4SF
6674           (vec_duplicate:V4HF
6675            (vec_select:HF
6676             (match_operand:V8HF 3 "register_operand" "x")
6677             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6678          (match_operand:V4SF 1 "register_operand" "0")))]
6679   "TARGET_F16FML"
6680   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6681   [(set_attr "type" "neon_fp_mul_s")]
6682 )
6683
6684 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6685   [(set (match_operand:V4SF 0 "register_operand" "=w")
6686         (fma:V4SF
6687          (float_extend:V4SF
6688           (neg:V4HF
6689            (vec_select:V4HF
6690             (match_operand:V8HF 2 "register_operand" "w")
6691             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6692          (float_extend:V4SF
6693           (vec_duplicate:V4HF
6694            (vec_select:HF
6695             (match_operand:V8HF 3 "register_operand" "x")
6696             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6697          (match_operand:V4SF 1 "register_operand" "0")))]
6698   "TARGET_F16FML"
6699   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6700   [(set_attr "type" "neon_fp_mul_s")]
6701 )
6702
6703 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6704   [(set (match_operand:V2SF 0 "register_operand")
6705         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6706                       (match_operand:V4HF 2 "register_operand")
6707                       (match_operand:V8HF 3 "register_operand")
6708                       (match_operand:SI 4 "aarch64_lane_imm3")]
6709          VFMLA16_LOW))]
6710   "TARGET_F16FML"
6711 {
6712     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6713     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6714
6715     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6716                                                              operands[1],
6717                                                              operands[2],
6718                                                              operands[3],
6719                                                              p1, lane));
6720     DONE;
6721
6722 })
6723
6724 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6725   [(set (match_operand:V2SF 0 "register_operand")
6726         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6727                       (match_operand:V4HF 2 "register_operand")
6728                       (match_operand:V8HF 3 "register_operand")
6729                       (match_operand:SI 4 "aarch64_lane_imm3")]
6730          VFMLA16_HIGH))]
6731   "TARGET_F16FML"
6732 {
6733     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6734     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6735
6736     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6737                                                               operands[1],
6738                                                               operands[2],
6739                                                               operands[3],
6740                                                               p1, lane));
6741     DONE;
6742
6743 })
6744
6745 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6746   [(set (match_operand:V2SF 0 "register_operand" "=w")
6747         (fma:V2SF
6748          (float_extend:V2SF
6749            (vec_select:V2HF
6750             (match_operand:V4HF 2 "register_operand" "w")
6751             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6752          (float_extend:V2SF
6753           (vec_duplicate:V2HF
6754            (vec_select:HF
6755             (match_operand:V8HF 3 "register_operand" "x")
6756             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6757          (match_operand:V2SF 1 "register_operand" "0")))]
6758   "TARGET_F16FML"
6759   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6760   [(set_attr "type" "neon_fp_mul_s")]
6761 )
6762
6763 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6764   [(set (match_operand:V2SF 0 "register_operand" "=w")
6765         (fma:V2SF
6766          (float_extend:V2SF
6767           (neg:V2HF
6768            (vec_select:V2HF
6769             (match_operand:V4HF 2 "register_operand" "w")
6770             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6771          (float_extend:V2SF
6772           (vec_duplicate:V2HF
6773            (vec_select:HF
6774             (match_operand:V8HF 3 "register_operand" "x")
6775             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6776          (match_operand:V2SF 1 "register_operand" "0")))]
6777   "TARGET_F16FML"
6778   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6779   [(set_attr "type" "neon_fp_mul_s")]
6780 )
6781
6782 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6783   [(set (match_operand:V2SF 0 "register_operand" "=w")
6784         (fma:V2SF
6785          (float_extend:V2SF
6786            (vec_select:V2HF
6787             (match_operand:V4HF 2 "register_operand" "w")
6788             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6789          (float_extend:V2SF
6790           (vec_duplicate:V2HF
6791            (vec_select:HF
6792             (match_operand:V8HF 3 "register_operand" "x")
6793             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6794          (match_operand:V2SF 1 "register_operand" "0")))]
6795   "TARGET_F16FML"
6796   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6797   [(set_attr "type" "neon_fp_mul_s")]
6798 )
6799
6800 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6801   [(set (match_operand:V2SF 0 "register_operand" "=w")
6802         (fma:V2SF
6803          (float_extend:V2SF
6804           (neg:V2HF
6805            (vec_select:V2HF
6806             (match_operand:V4HF 2 "register_operand" "w")
6807             (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6808          (float_extend:V2SF
6809           (vec_duplicate:V2HF
6810            (vec_select:HF
6811             (match_operand:V8HF 3 "register_operand" "x")
6812             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6813          (match_operand:V2SF 1 "register_operand" "0")))]
6814   "TARGET_F16FML"
6815   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6816   [(set_attr "type" "neon_fp_mul_s")]
6817 )
6818
6819 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6820   [(set (match_operand:V4SF 0 "register_operand")
6821         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6822                       (match_operand:V8HF 2 "register_operand")
6823                       (match_operand:V4HF 3 "register_operand")
6824                       (match_operand:SI 4 "aarch64_imm2")]
6825          VFMLA16_LOW))]
6826   "TARGET_F16FML"
6827 {
6828     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6829     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6830
6831     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6832                                                              operands[1],
6833                                                              operands[2],
6834                                                              operands[3],
6835                                                              p1, lane));
6836     DONE;
6837 })
6838
6839 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6840   [(set (match_operand:V4SF 0 "register_operand")
6841         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6842                       (match_operand:V8HF 2 "register_operand")
6843                       (match_operand:V4HF 3 "register_operand")
6844                       (match_operand:SI 4 "aarch64_imm2")]
6845          VFMLA16_HIGH))]
6846   "TARGET_F16FML"
6847 {
6848     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6849     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6850
6851     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6852                                                               operands[1],
6853                                                               operands[2],
6854                                                               operands[3],
6855                                                               p1, lane));
6856     DONE;
6857 })
6858
6859 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6860   [(set (match_operand:V4SF 0 "register_operand" "=w")
6861         (fma:V4SF
6862          (float_extend:V4SF
6863           (vec_select:V4HF
6864            (match_operand:V8HF 2 "register_operand" "w")
6865            (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6866          (float_extend:V4SF
6867           (vec_duplicate:V4HF
6868            (vec_select:HF
6869             (match_operand:V4HF 3 "register_operand" "x")
6870             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6871          (match_operand:V4SF 1 "register_operand" "0")))]
6872   "TARGET_F16FML"
6873   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6874   [(set_attr "type" "neon_fp_mul_s")]
6875 )
6876
6877 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6878   [(set (match_operand:V4SF 0 "register_operand" "=w")
6879         (fma:V4SF
6880          (float_extend:V4SF
6881           (neg:V4HF
6882            (vec_select:V4HF
6883             (match_operand:V8HF 2 "register_operand" "w")
6884             (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6885          (float_extend:V4SF
6886           (vec_duplicate:V4HF
6887            (vec_select:HF
6888             (match_operand:V4HF 3 "register_operand" "x")
6889             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6890          (match_operand:V4SF 1 "register_operand" "0")))]
6891   "TARGET_F16FML"
6892   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6893   [(set_attr "type" "neon_fp_mul_s")]
6894 )
6895
6896 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6897   [(set (match_operand:V4SF 0 "register_operand" "=w")
6898         (fma:V4SF
6899          (float_extend:V4SF
6900           (vec_select:V4HF
6901            (match_operand:V8HF 2 "register_operand" "w")
6902            (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6903          (float_extend:V4SF
6904           (vec_duplicate:V4HF
6905            (vec_select:HF
6906             (match_operand:V4HF 3 "register_operand" "x")
6907             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6908          (match_operand:V4SF 1 "register_operand" "0")))]
6909   "TARGET_F16FML"
6910   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6911   [(set_attr "type" "neon_fp_mul_s")]
6912 )
6913
6914 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6915   [(set (match_operand:V4SF 0 "register_operand" "=w")
6916         (fma:V4SF
6917          (float_extend:V4SF
6918           (neg:V4HF
6919            (vec_select:V4HF
6920             (match_operand:V8HF 2 "register_operand" "w")
6921             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6922          (float_extend:V4SF
6923           (vec_duplicate:V4HF
6924            (vec_select:HF
6925             (match_operand:V4HF 3 "register_operand" "x")
6926             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6927          (match_operand:V4SF 1 "register_operand" "0")))]
6928   "TARGET_F16FML"
6929   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6930   [(set_attr "type" "neon_fp_mul_s")]
6931 )
6932
6933 ;; pmull
6934
6935 (define_insn "aarch64_crypto_pmulldi"
6936   [(set (match_operand:TI 0 "register_operand" "=w")
6937         (unspec:TI  [(match_operand:DI 1 "register_operand" "w")
6938                      (match_operand:DI 2 "register_operand" "w")]
6939                     UNSPEC_PMULL))]
6940  "TARGET_SIMD && TARGET_AES"
6941  "pmull\\t%0.1q, %1.1d, %2.1d"
6942   [(set_attr "type" "crypto_pmull")]
6943 )
6944
6945 (define_insn "aarch64_crypto_pmullv2di"
6946  [(set (match_operand:TI 0 "register_operand" "=w")
6947        (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
6948                    (match_operand:V2DI 2 "register_operand" "w")]
6949                   UNSPEC_PMULL2))]
6950   "TARGET_SIMD && TARGET_AES"
6951   "pmull2\\t%0.1q, %1.2d, %2.2d"
6952   [(set_attr "type" "crypto_pmull")]
6953 )