gcc/config/aarch64/aarch64-simd.md

   1 ;; Machine description for AArch64 AdvSIMD architecture.
   2 ;; Copyright (C) 2011-2019 Free Software Foundation, Inc.
   3 ;; Contributed by ARM Ltd.
   4 ;;
   5 ;; This file is part of GCC.
   6 ;;
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published by
   9 ;; the Free Software Foundation; either version 3, or (at your option)
  10 ;; any later version.
  11 ;;
  12 ;; GCC is distributed in the hope that it will be useful, but
  13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 ;; General Public License for more details.
  16 ;;
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 (define_expand "mov<mode>"
  22   [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
  23         (match_operand:VALL_F16 1 "general_operand" ""))]
  24   "TARGET_SIMD"
  25   "
  26   /* Force the operand into a register if it is not an
  27      immediate whose use can be replaced with xzr.
  28      If the mode is 16 bytes wide, then we will be doing
  29      a stp in DI mode, so we check the validity of that.
  30      If the mode is 8 bytes wide, then we will do doing a
  31      normal str, so the check need not apply.  */
  32   if (GET_CODE (operands[0]) == MEM
  33       && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
  34            && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
  35                 && aarch64_mem_pair_operand (operands[0], DImode))
  36                || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
  37       operands[1] = force_reg (<MODE>mode, operands[1]);
  38   "
  39 )
  40
  41 (define_expand "movmisalign<mode>"
  42   [(set (match_operand:VALL 0 "nonimmediate_operand" "")
  43         (match_operand:VALL 1 "general_operand" ""))]
  44   "TARGET_SIMD"
  45 {
  46   /* This pattern is not permitted to fail during expansion: if both arguments
  47      are non-registers (e.g. memory := constant, which can be created by the
  48      auto-vectorizer), force operand 1 into a register.  */
  49   if (!register_operand (operands[0], <MODE>mode)
  50       && !register_operand (operands[1], <MODE>mode))
  51     operands[1] = force_reg (<MODE>mode, operands[1]);
  52 })
  53
  54 (define_insn "aarch64_simd_dup<mode>"
  55   [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
  56         (vec_duplicate:VDQ_I
  57           (match_operand:<VEL> 1 "register_operand" "w,?r")))]
  58   "TARGET_SIMD"
  59   "@
  60    dup\\t%0.<Vtype>, %1.<Vetype>[0]
  61    dup\\t%0.<Vtype>, %<vw>1"
  62   [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
  63 )
  64
  65 (define_insn "aarch64_simd_dup<mode>"
  66   [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
  67         (vec_duplicate:VDQF_F16
  68           (match_operand:<VEL> 1 "register_operand" "w")))]
  69   "TARGET_SIMD"
  70   "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
  71   [(set_attr "type" "neon_dup<q>")]
  72 )
  73
  74 (define_insn "aarch64_dup_lane<mode>"
  75   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
  76         (vec_duplicate:VALL_F16
  77           (vec_select:<VEL>
  78             (match_operand:VALL_F16 1 "register_operand" "w")
  79             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  80           )))]
  81   "TARGET_SIMD"
  82   {
  83     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
  84     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
  85   }
  86   [(set_attr "type" "neon_dup<q>")]
  87 )
  88
  89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
  90   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
  91         (vec_duplicate:VALL_F16_NO_V2Q
  92           (vec_select:<VEL>
  93             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
  94             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  95           )))]
  96   "TARGET_SIMD"
  97   {
  98     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
  99     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
 100   }
 101   [(set_attr "type" "neon_dup<q>")]
 102 )
 103
 104 (define_insn "*aarch64_simd_mov<VD:mode>"
 105   [(set (match_operand:VD 0 "nonimmediate_operand"
 106                 "=w, m,  m,  w, ?r, ?w, ?r, w")
 107         (match_operand:VD 1 "general_operand"
 108                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 109   "TARGET_SIMD
 110    && (register_operand (operands[0], <MODE>mode)
 111        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 112 {
 113    switch (which_alternative)
 114      {
 115      case 0: return "ldr\t%d0, %1";
 116      case 1: return "str\txzr, %0";
 117      case 2: return "str\t%d1, %0";
 118      case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 119      case 4: return "umov\t%0, %1.d[0]";
 120      case 5: return "fmov\t%d0, %1";
 121      case 6: return "mov\t%0, %1";
 122      case 7:
 123         return aarch64_output_simd_mov_immediate (operands[1], 64);
 124      default: gcc_unreachable ();
 125      }
 126 }
 127   [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
 128                      neon_logic<q>, neon_to_gp<q>, f_mcr,\
 129                      mov_reg, neon_move<q>")]
 130 )
 131
 132 (define_insn "*aarch64_simd_mov<VQ:mode>"
 133   [(set (match_operand:VQ 0 "nonimmediate_operand"
 134                 "=w, Umn,  m,  w, ?r, ?w, ?r, w")
 135         (match_operand:VQ 1 "general_operand"
 136                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 137   "TARGET_SIMD
 138    && (register_operand (operands[0], <MODE>mode)
 139        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 140 {
 141   switch (which_alternative)
 142     {
 143     case 0:
 144         return "ldr\t%q0, %1";
 145     case 1:
 146         return "stp\txzr, xzr, %0";
 147     case 2:
 148         return "str\t%q1, %0";
 149     case 3:
 150         return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 151     case 4:
 152     case 5:
 153     case 6:
 154         return "#";
 155     case 7:
 156         return aarch64_output_simd_mov_immediate (operands[1], 128);
 157     default:
 158         gcc_unreachable ();
 159     }
 160 }
 161   [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
 162                      neon_logic<q>, multiple, multiple,\
 163                      multiple, neon_move<q>")
 164    (set_attr "length" "4,4,4,4,8,8,8,4")]
 165 )
 166
 167 ;; When storing lane zero we can use the normal STR and its more permissive
 168 ;; addressing modes.
 169
 170 (define_insn "aarch64_store_lane0<mode>"
 171   [(set (match_operand:<VEL> 0 "memory_operand" "=m")
 172         (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
 173                         (parallel [(match_operand 2 "const_int_operand" "n")])))]
 174   "TARGET_SIMD
 175    && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
 176   "str\\t%<Vetype>1, %0"
 177   [(set_attr "type" "neon_store1_1reg<q>")]
 178 )
 179
 180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
 181   [(set (match_operand:DREG 0 "register_operand" "=w")
 182         (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
 183    (set (match_operand:DREG2 2 "register_operand" "=w")
 184         (match_operand:DREG2 3 "memory_operand" "m"))]
 185   "TARGET_SIMD
 186    && rtx_equal_p (XEXP (operands[3], 0),
 187                    plus_constant (Pmode,
 188                                   XEXP (operands[1], 0),
 189                                   GET_MODE_SIZE (<DREG:MODE>mode)))"
 190   "ldp\\t%d0, %d2, %1"
 191   [(set_attr "type" "neon_ldp")]
 192 )
 193
 194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
 195   [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
 196         (match_operand:DREG 1 "register_operand" "w"))
 197    (set (match_operand:DREG2 2 "memory_operand" "=m")
 198         (match_operand:DREG2 3 "register_operand" "w"))]
 199   "TARGET_SIMD
 200    && rtx_equal_p (XEXP (operands[2], 0),
 201                    plus_constant (Pmode,
 202                                   XEXP (operands[0], 0),
 203                                   GET_MODE_SIZE (<DREG:MODE>mode)))"
 204   "stp\\t%d1, %d3, %0"
 205   [(set_attr "type" "neon_stp")]
 206 )
 207
 208 (define_insn "load_pair<VQ:mode><VQ2:mode>"
 209   [(set (match_operand:VQ 0 "register_operand" "=w")
 210         (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
 211    (set (match_operand:VQ2 2 "register_operand" "=w")
 212         (match_operand:VQ2 3 "memory_operand" "m"))]
 213   "TARGET_SIMD
 214     && rtx_equal_p (XEXP (operands[3], 0),
 215                     plus_constant (Pmode,
 216                                XEXP (operands[1], 0),
 217                                GET_MODE_SIZE (<VQ:MODE>mode)))"
 218   "ldp\\t%q0, %q2, %1"
 219   [(set_attr "type" "neon_ldp_q")]
 220 )
 221
 222 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
 223   [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
 224         (match_operand:VQ 1 "register_operand" "w"))
 225    (set (match_operand:VQ2 2 "memory_operand" "=m")
 226         (match_operand:VQ2 3 "register_operand" "w"))]
 227   "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
 228                 plus_constant (Pmode,
 229                                XEXP (operands[0], 0),
 230                                GET_MODE_SIZE (<VQ:MODE>mode)))"
 231   "stp\\t%q1, %q3, %0"
 232   [(set_attr "type" "neon_stp_q")]
 233 )
 234
 235
 236 (define_split
 237   [(set (match_operand:VQ 0 "register_operand" "")
 238       (match_operand:VQ 1 "register_operand" ""))]
 239   "TARGET_SIMD && reload_completed
 240    && GP_REGNUM_P (REGNO (operands[0]))
 241    && GP_REGNUM_P (REGNO (operands[1]))"
 242   [(const_int 0)]
 243 {
 244   aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
 245   DONE;
 246 })
 247
 248 (define_split
 249   [(set (match_operand:VQ 0 "register_operand" "")
 250         (match_operand:VQ 1 "register_operand" ""))]
 251   "TARGET_SIMD && reload_completed
 252    && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
 253        || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
 254   [(const_int 0)]
 255 {
 256   aarch64_split_simd_move (operands[0], operands[1]);
 257   DONE;
 258 })
 259
 260 (define_expand "@aarch64_split_simd_mov<mode>"
 261   [(set (match_operand:VQ 0)
 262         (match_operand:VQ 1))]
 263   "TARGET_SIMD"
 264   {
 265     rtx dst = operands[0];
 266     rtx src = operands[1];
 267
 268     if (GP_REGNUM_P (REGNO (src)))
 269       {
 270         rtx src_low_part = gen_lowpart (<VHALF>mode, src);
 271         rtx src_high_part = gen_highpart (<VHALF>mode, src);
 272
 273         emit_insn
 274           (gen_move_lo_quad_<mode> (dst, src_low_part));
 275         emit_insn
 276           (gen_move_hi_quad_<mode> (dst, src_high_part));
 277       }
 278
 279     else
 280       {
 281         rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
 282         rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
 283         rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
 284         rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
 285
 286         emit_insn
 287           (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
 288         emit_insn
 289           (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
 290       }
 291     DONE;
 292   }
 293 )
 294
 295 (define_insn "aarch64_simd_mov_from_<mode>low"
 296   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
 297         (vec_select:<VHALF>
 298           (match_operand:VQ 1 "register_operand" "w")
 299           (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
 300   "TARGET_SIMD && reload_completed"
 301   "umov\t%0, %1.d[0]"
 302   [(set_attr "type" "neon_to_gp<q>")
 303    (set_attr "length" "4")
 304   ])
 305
 306 (define_insn "aarch64_simd_mov_from_<mode>high"
 307   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
 308         (vec_select:<VHALF>
 309           (match_operand:VQ 1 "register_operand" "w")
 310           (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
 311   "TARGET_SIMD && reload_completed"
 312   "umov\t%0, %1.d[1]"
 313   [(set_attr "type" "neon_to_gp<q>")
 314    (set_attr "length" "4")
 315   ])
 316
 317 (define_insn "orn<mode>3"
 318  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 319        (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 320                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 321  "TARGET_SIMD"
 322  "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 323   [(set_attr "type" "neon_logic<q>")]
 324 )
 325
 326 (define_insn "bic<mode>3"
 327  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 328        (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 329                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 330  "TARGET_SIMD"
 331  "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 332   [(set_attr "type" "neon_logic<q>")]
 333 )
 334
 335 (define_insn "add<mode>3"
 336   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 337         (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 338                   (match_operand:VDQ_I 2 "register_operand" "w")))]
 339   "TARGET_SIMD"
 340   "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 341   [(set_attr "type" "neon_add<q>")]
 342 )
 343
 344 (define_insn "sub<mode>3"
 345   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 346         (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 347                    (match_operand:VDQ_I 2 "register_operand" "w")))]
 348   "TARGET_SIMD"
 349   "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 350   [(set_attr "type" "neon_sub<q>")]
 351 )
 352
 353 (define_insn "mul<mode>3"
 354   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 355         (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
 356                    (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
 357   "TARGET_SIMD"
 358   "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 359   [(set_attr "type" "neon_mul_<Vetype><q>")]
 360 )
 361
 362 (define_insn "bswap<mode>2"
 363   [(set (match_operand:VDQHSD 0 "register_operand" "=w")
 364         (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
 365   "TARGET_SIMD"
 366   "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
 367   [(set_attr "type" "neon_rev<q>")]
 368 )
 369
 370 (define_insn "aarch64_rbit<mode>"
 371   [(set (match_operand:VB 0 "register_operand" "=w")
 372         (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
 373                    UNSPEC_RBIT))]
 374   "TARGET_SIMD"
 375   "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
 376   [(set_attr "type" "neon_rbit")]
 377 )
 378
 379 (define_expand "ctz<mode>2"
 380   [(set (match_operand:VS 0 "register_operand")
 381         (ctz:VS (match_operand:VS 1 "register_operand")))]
 382   "TARGET_SIMD"
 383   {
 384      emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
 385      rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
 386                                              <MODE>mode, 0);
 387      emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
 388      emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
 389      DONE;
 390   }
 391 )
 392
 393 (define_expand "xorsign<mode>3"
 394   [(match_operand:VHSDF 0 "register_operand")
 395    (match_operand:VHSDF 1 "register_operand")
 396    (match_operand:VHSDF 2 "register_operand")]
 397   "TARGET_SIMD"
 398 {
 399
 400   machine_mode imode = <V_INT_EQUIV>mode;
 401   rtx v_bitmask = gen_reg_rtx (imode);
 402   rtx op1x = gen_reg_rtx (imode);
 403   rtx op2x = gen_reg_rtx (imode);
 404
 405   rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
 406   rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
 407
 408   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 409
 410   emit_move_insn (v_bitmask,
 411                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 412                                                      HOST_WIDE_INT_M1U << bits));
 413
 414   emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
 415   emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
 416   emit_move_insn (operands[0],
 417                   lowpart_subreg (<MODE>mode, op1x, imode));
 418   DONE;
 419 }
 420 )
 421
 422 ;; These instructions map to the __builtins for the Dot Product operations.
 423 (define_insn "aarch64_<sur>dot<vsi2qi>"
 424   [(set (match_operand:VS 0 "register_operand" "=w")
 425         (plus:VS (match_operand:VS 1 "register_operand" "0")
 426                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 427                             (match_operand:<VSI2QI> 3 "register_operand" "w")]
 428                 DOTPROD)))]
 429   "TARGET_DOTPROD"
 430   "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
 431   [(set_attr "type" "neon_dot")]
 432 )
 433
 434 ;; These expands map to the Dot Product optab the vectorizer checks for.
 435 ;; The auto-vectorizer expects a dot product builtin that also does an
 436 ;; accumulation into the provided register.
 437 ;; Given the following pattern
 438 ;;
 439 ;; for (i=0; i<len; i++) {
 440 ;;     c = a[i] * b[i];
 441 ;;     r += c;
 442 ;; }
 443 ;; return result;
 444 ;;
 445 ;; This can be auto-vectorized to
 446 ;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
 447 ;;
 448 ;; given enough iterations.  However the vectorizer can keep unrolling the loop
 449 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
 450 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
 451 ;; ...
 452 ;;
 453 ;; and so the vectorizer provides r, in which the result has to be accumulated.
 454 (define_expand "<sur>dot_prod<vsi2qi>"
 455   [(set (match_operand:VS 0 "register_operand")
 456         (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
 457                             (match_operand:<VSI2QI> 2 "register_operand")]
 458                  DOTPROD)
 459                 (match_operand:VS 3 "register_operand")))]
 460   "TARGET_DOTPROD"
 461 {
 462   emit_insn (
 463     gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
 464                                     operands[2]));
 465   emit_insn (gen_rtx_SET (operands[0], operands[3]));
 466   DONE;
 467 })
 468
 469 ;; These instructions map to the __builtins for the Dot Product
 470 ;; indexed operations.
 471 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
 472   [(set (match_operand:VS 0 "register_operand" "=w")
 473         (plus:VS (match_operand:VS 1 "register_operand" "0")
 474                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 475                             (match_operand:V8QI 3 "register_operand" "<h_con>")
 476                             (match_operand:SI 4 "immediate_operand" "i")]
 477                 DOTPROD)))]
 478   "TARGET_DOTPROD"
 479   {
 480     operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
 481     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 482   }
 483   [(set_attr "type" "neon_dot")]
 484 )
 485
 486 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
 487   [(set (match_operand:VS 0 "register_operand" "=w")
 488         (plus:VS (match_operand:VS 1 "register_operand" "0")
 489                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 490                             (match_operand:V16QI 3 "register_operand" "<h_con>")
 491                             (match_operand:SI 4 "immediate_operand" "i")]
 492                 DOTPROD)))]
 493   "TARGET_DOTPROD"
 494   {
 495     operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
 496     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 497   }
 498   [(set_attr "type" "neon_dot")]
 499 )
 500
 501 (define_expand "copysign<mode>3"
 502   [(match_operand:VHSDF 0 "register_operand")
 503    (match_operand:VHSDF 1 "register_operand")
 504    (match_operand:VHSDF 2 "register_operand")]
 505   "TARGET_FLOAT && TARGET_SIMD"
 506 {
 507   rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
 508   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 509
 510   emit_move_insn (v_bitmask,
 511                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 512                                                      HOST_WIDE_INT_M1U << bits));
 513   emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
 514                                          operands[2], operands[1]));
 515   DONE;
 516 }
 517 )
 518
 519 (define_insn "*aarch64_mul3_elt<mode>"
 520  [(set (match_operand:VMUL 0 "register_operand" "=w")
 521     (mult:VMUL
 522       (vec_duplicate:VMUL
 523           (vec_select:<VEL>
 524             (match_operand:VMUL 1 "register_operand" "<h_con>")
 525             (parallel [(match_operand:SI 2 "immediate_operand")])))
 526       (match_operand:VMUL 3 "register_operand" "w")))]
 527   "TARGET_SIMD"
 528   {
 529     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
 530     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 531   }
 532   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 533 )
 534
 535 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
 536   [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
 537      (mult:VMUL_CHANGE_NLANES
 538        (vec_duplicate:VMUL_CHANGE_NLANES
 539           (vec_select:<VEL>
 540             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
 541             (parallel [(match_operand:SI 2 "immediate_operand")])))
 542       (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
 543   "TARGET_SIMD"
 544   {
 545     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
 546     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 547   }
 548   [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
 549 )
 550
 551 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
 552  [(set (match_operand:VMUL 0 "register_operand" "=w")
 553     (mult:VMUL
 554       (vec_duplicate:VMUL
 555             (match_operand:<VEL> 1 "register_operand" "<h_con>"))
 556       (match_operand:VMUL 2 "register_operand" "w")))]
 557   "TARGET_SIMD"
 558   "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
 559   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 560 )
 561
 562 (define_insn "@aarch64_rsqrte<mode>"
 563   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 564         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
 565                      UNSPEC_RSQRTE))]
 566   "TARGET_SIMD"
 567   "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 568   [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
 569
 570 (define_insn "@aarch64_rsqrts<mode>"
 571   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 572         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
 573                             (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
 574          UNSPEC_RSQRTS))]
 575   "TARGET_SIMD"
 576   "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 577   [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
 578
 579 (define_expand "rsqrt<mode>2"
 580   [(set (match_operand:VALLF 0 "register_operand" "=w")
 581         (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
 582                      UNSPEC_RSQRT))]
 583   "TARGET_SIMD"
 584 {
 585   aarch64_emit_approx_sqrt (operands[0], operands[1], true);
 586   DONE;
 587 })
 588
 589 (define_insn "*aarch64_mul3_elt_to_64v2df"
 590   [(set (match_operand:DF 0 "register_operand" "=w")
 591      (mult:DF
 592        (vec_select:DF
 593          (match_operand:V2DF 1 "register_operand" "w")
 594          (parallel [(match_operand:SI 2 "immediate_operand")]))
 595        (match_operand:DF 3 "register_operand" "w")))]
 596   "TARGET_SIMD"
 597   {
 598     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
 599     return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
 600   }
 601   [(set_attr "type" "neon_fp_mul_d_scalar_q")]
 602 )
 603
 604 (define_insn "neg<mode>2"
 605   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 606         (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 607   "TARGET_SIMD"
 608   "neg\t%0.<Vtype>, %1.<Vtype>"
 609   [(set_attr "type" "neon_neg<q>")]
 610 )
 611
 612 (define_insn "abs<mode>2"
 613   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 614         (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 615   "TARGET_SIMD"
 616   "abs\t%0.<Vtype>, %1.<Vtype>"
 617   [(set_attr "type" "neon_abs<q>")]
 618 )
 619
 620 ;; The intrinsic version of integer ABS must not be allowed to
 621 ;; combine with any operation with an integerated ABS step, such
 622 ;; as SABD.
 623 (define_insn "aarch64_abs<mode>"
 624   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
 625           (unspec:VSDQ_I_DI
 626             [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
 627            UNSPEC_ABS))]
 628   "TARGET_SIMD"
 629   "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 630   [(set_attr "type" "neon_abs<q>")]
 631 )
 632
 633 (define_insn "abd<mode>_3"
 634   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 635         (abs:VDQ_BHSI (minus:VDQ_BHSI
 636                        (match_operand:VDQ_BHSI 1 "register_operand" "w")
 637                        (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
 638   "TARGET_SIMD"
 639   "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 640   [(set_attr "type" "neon_abd<q>")]
 641 )
 642
 643 (define_insn "aarch64_<sur>abdl2<mode>_3"
 644   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 645         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 646                           (match_operand:VDQV_S 2 "register_operand" "w")]
 647         ABDL2))]
 648   "TARGET_SIMD"
 649   "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
 650   [(set_attr "type" "neon_abd<q>")]
 651 )
 652
 653 (define_insn "aarch64_<sur>abal<mode>_4"
 654   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 655         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 656                           (match_operand:VDQV_S 2 "register_operand" "w")
 657                          (match_operand:<VDBLW> 3 "register_operand" "0")]
 658         ABAL))]
 659   "TARGET_SIMD"
 660   "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
 661   [(set_attr "type" "neon_arith_acc<q>")]
 662 )
 663
 664 (define_insn "aarch64_<sur>adalp<mode>_3"
 665   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 666         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 667                           (match_operand:<VDBLW> 2 "register_operand" "0")]
 668         ADALP))]
 669   "TARGET_SIMD"
 670   "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
 671   [(set_attr "type" "neon_reduc_add<q>")]
 672 )
 673
 674 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
 675 ;; inputs in operands 1 and 2.  The sequence also has to perform a widening
 676 ;; reduction of the difference into a V4SI vector and accumulate that into
 677 ;; operand 3 before copying that into the result operand 0.
 678 ;; Perform that with a sequence of:
 679 ;; UABDL2       tmp.8h, op1.16b, op2.16b
 680 ;; UABAL        tmp.8h, op1.16b, op2.16b
 681 ;; UADALP       op3.4s, tmp.8h
 682 ;; MOV          op0, op3 // should be eliminated in later passes.
 683 ;; The signed version just uses the signed variants of the above instructions.
 684
 685 (define_expand "<sur>sadv16qi"
 686   [(use (match_operand:V4SI 0 "register_operand"))
 687    (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
 688                   (use (match_operand:V16QI 2 "register_operand"))] ABAL)
 689    (use (match_operand:V4SI 3 "register_operand"))]
 690   "TARGET_SIMD"
 691   {
 692     rtx reduc = gen_reg_rtx (V8HImode);
 693     emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
 694                                                operands[2]));
 695     emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
 696                                               operands[2], reduc));
 697     emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
 698                                               operands[3]));
 699     emit_move_insn (operands[0], operands[3]);
 700     DONE;
 701   }
 702 )
 703
 704 (define_insn "aba<mode>_3"
 705   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 706         (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
 707                          (match_operand:VDQ_BHSI 1 "register_operand" "w")
 708                          (match_operand:VDQ_BHSI 2 "register_operand" "w")))
 709                        (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
 710   "TARGET_SIMD"
 711   "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 712   [(set_attr "type" "neon_arith_acc<q>")]
 713 )
 714
 715 (define_insn "fabd<mode>3"
 716   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 717         (abs:VHSDF_HSDF
 718           (minus:VHSDF_HSDF
 719             (match_operand:VHSDF_HSDF 1 "register_operand" "w")
 720             (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
 721   "TARGET_SIMD"
 722   "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 723   [(set_attr "type" "neon_fp_abd_<stype><q>")]
 724 )
 725
 726 ;; For AND (vector, register) and BIC (vector, immediate)
 727 (define_insn "and<mode>3"
 728   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
 729         (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
 730                    (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
 731   "TARGET_SIMD"
 732   {
 733     switch (which_alternative)
 734       {
 735       case 0:
 736         return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
 737       case 1:
 738         return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
 739                                                   AARCH64_CHECK_BIC);
 740       default:
 741         gcc_unreachable ();
 742       }
 743   }
 744   [(set_attr "type" "neon_logic<q>")]
 745 )
 746
 747 ;; For ORR (vector, register) and ORR (vector, immediate)
 748 (define_insn "ior<mode>3"
 749   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
 750         (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
 751                    (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
 752   "TARGET_SIMD"
 753   {
 754     switch (which_alternative)
 755       {
 756       case 0:
 757         return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
 758       case 1:
 759         return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
 760                                                   AARCH64_CHECK_ORR);
 761       default:
 762         gcc_unreachable ();
 763       }
 764   }
 765   [(set_attr "type" "neon_logic<q>")]
 766 )
 767
 768 (define_insn "xor<mode>3"
 769   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 770         (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 771                  (match_operand:VDQ_I 2 "register_operand" "w")))]
 772   "TARGET_SIMD"
 773   "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
 774   [(set_attr "type" "neon_logic<q>")]
 775 )
 776
 777 (define_insn "one_cmpl<mode>2"
 778   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 779         (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 780   "TARGET_SIMD"
 781   "not\t%0.<Vbtype>, %1.<Vbtype>"
 782   [(set_attr "type" "neon_logic<q>")]
 783 )
 784
 785 (define_insn "aarch64_simd_vec_set<mode>"
 786   [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
 787         (vec_merge:VALL_F16
 788             (vec_duplicate:VALL_F16
 789                 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
 790             (match_operand:VALL_F16 3 "register_operand" "0,0,0")
 791             (match_operand:SI 2 "immediate_operand" "i,i,i")))]
 792   "TARGET_SIMD"
 793   {
 794    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 795    operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
 796    switch (which_alternative)
 797      {
 798      case 0:
 799         return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
 800      case 1:
 801         return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
 802      case 2:
 803         return "ld1\\t{%0.<Vetype>}[%p2], %1";
 804      default:
 805         gcc_unreachable ();
 806      }
 807   }
 808   [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
 809 )
 810
 811 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
 812   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
 813         (vec_merge:VALL_F16
 814             (vec_duplicate:VALL_F16
 815               (vec_select:<VEL>
 816                 (match_operand:VALL_F16 3 "register_operand" "w")
 817                 (parallel
 818                   [(match_operand:SI 4 "immediate_operand" "i")])))
 819             (match_operand:VALL_F16 1 "register_operand" "0")
 820             (match_operand:SI 2 "immediate_operand" "i")))]
 821   "TARGET_SIMD"
 822   {
 823     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 824     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
 825     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
 826
 827     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
 828   }
 829   [(set_attr "type" "neon_ins<q>")]
 830 )
 831
 832 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
 833   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
 834         (vec_merge:VALL_F16_NO_V2Q
 835             (vec_duplicate:VALL_F16_NO_V2Q
 836               (vec_select:<VEL>
 837                 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
 838                 (parallel
 839                   [(match_operand:SI 4 "immediate_operand" "i")])))
 840             (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
 841             (match_operand:SI 2 "immediate_operand" "i")))]
 842   "TARGET_SIMD"
 843   {
 844     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 845     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
 846     operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
 847                                            INTVAL (operands[4]));
 848
 849     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
 850   }
 851   [(set_attr "type" "neon_ins<q>")]
 852 )
 853
 854 (define_insn "aarch64_simd_lshr<mode>"
 855  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 856        (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 857                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
 858  "TARGET_SIMD"
 859  "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
 860   [(set_attr "type" "neon_shift_imm<q>")]
 861 )
 862
 863 (define_insn "aarch64_simd_ashr<mode>"
 864  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 865        (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 866                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
 867  "TARGET_SIMD"
 868  "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
 869   [(set_attr "type" "neon_shift_imm<q>")]
 870 )
 871
 872 (define_insn "aarch64_simd_imm_shl<mode>"
 873  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 874        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 875                    (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm" "Dl")))]
 876  "TARGET_SIMD"
 877   "shl\t%0.<Vtype>, %1.<Vtype>, %2"
 878   [(set_attr "type" "neon_shift_imm<q>")]
 879 )
 880
 881 (define_insn "aarch64_simd_reg_sshl<mode>"
 882  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 883        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 884                    (match_operand:VDQ_I 2 "register_operand" "w")))]
 885  "TARGET_SIMD"
 886  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 887   [(set_attr "type" "neon_shift_reg<q>")]
 888 )
 889
 890 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
 891  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 892        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
 893                     (match_operand:VDQ_I 2 "register_operand" "w")]
 894                    UNSPEC_ASHIFT_UNSIGNED))]
 895  "TARGET_SIMD"
 896  "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 897   [(set_attr "type" "neon_shift_reg<q>")]
 898 )
 899
 900 (define_insn "aarch64_simd_reg_shl<mode>_signed"
 901  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 902        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
 903                     (match_operand:VDQ_I 2 "register_operand" "w")]
 904                    UNSPEC_ASHIFT_SIGNED))]
 905  "TARGET_SIMD"
 906  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 907   [(set_attr "type" "neon_shift_reg<q>")]
 908 )
 909
 910 (define_expand "ashl<mode>3"
 911   [(match_operand:VDQ_I 0 "register_operand" "")
 912    (match_operand:VDQ_I 1 "register_operand" "")
 913    (match_operand:SI  2 "general_operand" "")]
 914  "TARGET_SIMD"
 915 {
 916   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
 917   int shift_amount;
 918
 919   if (CONST_INT_P (operands[2]))
 920     {
 921       shift_amount = INTVAL (operands[2]);
 922       if (shift_amount >= 0 && shift_amount < bit_width)
 923         {
 924           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 925                                                        shift_amount);
 926           emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
 927                                                      operands[1],
 928                                                      tmp));
 929           DONE;
 930         }
 931       else
 932         {
 933           operands[2] = force_reg (SImode, operands[2]);
 934         }
 935     }
 936   else if (MEM_P (operands[2]))
 937     {
 938       operands[2] = force_reg (SImode, operands[2]);
 939     }
 940
 941   if (REG_P (operands[2]))
 942     {
 943       rtx tmp = gen_reg_rtx (<MODE>mode);
 944       emit_insn (gen_aarch64_simd_dup<mode> (tmp,
 945                                              convert_to_mode (<VEL>mode,
 946                                                               operands[2],
 947                                                               0)));
 948       emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
 949                                                   tmp));
 950       DONE;
 951     }
 952   else
 953     FAIL;
 954 }
 955 )
 956
 957 (define_expand "lshr<mode>3"
 958   [(match_operand:VDQ_I 0 "register_operand" "")
 959    (match_operand:VDQ_I 1 "register_operand" "")
 960    (match_operand:SI  2 "general_operand" "")]
 961  "TARGET_SIMD"
 962 {
 963   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
 964   int shift_amount;
 965
 966   if (CONST_INT_P (operands[2]))
 967     {
 968       shift_amount = INTVAL (operands[2]);
 969       if (shift_amount > 0 && shift_amount <= bit_width)
 970         {
 971           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 972                                                        shift_amount);
 973           emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
 974                                                   operands[1],
 975                                                   tmp));
 976           DONE;
 977         }
 978       else
 979         operands[2] = force_reg (SImode, operands[2]);
 980     }
 981   else if (MEM_P (operands[2]))
 982     {
 983       operands[2] = force_reg (SImode, operands[2]);
 984     }
 985
 986   if (REG_P (operands[2]))
 987     {
 988       rtx tmp = gen_reg_rtx (SImode);
 989       rtx tmp1 = gen_reg_rtx (<MODE>mode);
 990       emit_insn (gen_negsi2 (tmp, operands[2]));
 991       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
 992                                              convert_to_mode (<VEL>mode,
 993                                                               tmp, 0)));
 994       emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
 995                                                           operands[1],
 996                                                           tmp1));
 997       DONE;
 998     }
 999   else
1000     FAIL;
1001 }
1002 )
1003
1004 (define_expand "ashr<mode>3"
1005   [(match_operand:VDQ_I 0 "register_operand" "")
1006    (match_operand:VDQ_I 1 "register_operand" "")
1007    (match_operand:SI  2 "general_operand" "")]
1008  "TARGET_SIMD"
1009 {
1010   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1011   int shift_amount;
1012
1013   if (CONST_INT_P (operands[2]))
1014     {
1015       shift_amount = INTVAL (operands[2]);
1016       if (shift_amount > 0 && shift_amount <= bit_width)
1017         {
1018           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1019                                                        shift_amount);
1020           emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1021                                                   operands[1],
1022                                                   tmp));
1023           DONE;
1024         }
1025       else
1026         operands[2] = force_reg (SImode, operands[2]);
1027     }
1028   else if (MEM_P (operands[2]))
1029     {
1030       operands[2] = force_reg (SImode, operands[2]);
1031     }
1032
1033   if (REG_P (operands[2]))
1034     {
1035       rtx tmp = gen_reg_rtx (SImode);
1036       rtx tmp1 = gen_reg_rtx (<MODE>mode);
1037       emit_insn (gen_negsi2 (tmp, operands[2]));
1038       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1039                                              convert_to_mode (<VEL>mode,
1040                                                               tmp, 0)));
1041       emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
1042                                                         operands[1],
1043                                                         tmp1));
1044       DONE;
1045     }
1046   else
1047     FAIL;
1048 }
1049 )
1050
1051 (define_expand "vashl<mode>3"
1052  [(match_operand:VDQ_I 0 "register_operand" "")
1053   (match_operand:VDQ_I 1 "register_operand" "")
1054   (match_operand:VDQ_I 2 "register_operand" "")]
1055  "TARGET_SIMD"
1056 {
1057   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1058                                               operands[2]));
1059   DONE;
1060 })
1061
1062 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1063 ;; Negating individual lanes most certainly offsets the
1064 ;; gain from vectorization.
1065 (define_expand "vashr<mode>3"
1066  [(match_operand:VDQ_BHSI 0 "register_operand" "")
1067   (match_operand:VDQ_BHSI 1 "register_operand" "")
1068   (match_operand:VDQ_BHSI 2 "register_operand" "")]
1069  "TARGET_SIMD"
1070 {
1071   rtx neg = gen_reg_rtx (<MODE>mode);
1072   emit (gen_neg<mode>2 (neg, operands[2]));
1073   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1074                                                     neg));
1075   DONE;
1076 })
1077
1078 ;; DI vector shift
1079 (define_expand "aarch64_ashr_simddi"
1080   [(match_operand:DI 0 "register_operand" "=w")
1081    (match_operand:DI 1 "register_operand" "w")
1082    (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1083   "TARGET_SIMD"
1084   {
1085     /* An arithmetic shift right by 64 fills the result with copies of the sign
1086        bit, just like asr by 63 - however the standard pattern does not handle
1087        a shift by 64.  */
1088     if (INTVAL (operands[2]) == 64)
1089       operands[2] = GEN_INT (63);
1090     emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1091     DONE;
1092   }
1093 )
1094
1095 (define_expand "vlshr<mode>3"
1096  [(match_operand:VDQ_BHSI 0 "register_operand" "")
1097   (match_operand:VDQ_BHSI 1 "register_operand" "")
1098   (match_operand:VDQ_BHSI 2 "register_operand" "")]
1099  "TARGET_SIMD"
1100 {
1101   rtx neg = gen_reg_rtx (<MODE>mode);
1102   emit (gen_neg<mode>2 (neg, operands[2]));
1103   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1104                                                       neg));
1105   DONE;
1106 })
1107
1108 (define_expand "aarch64_lshr_simddi"
1109   [(match_operand:DI 0 "register_operand" "=w")
1110    (match_operand:DI 1 "register_operand" "w")
1111    (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1112   "TARGET_SIMD"
1113   {
1114     if (INTVAL (operands[2]) == 64)
1115       emit_move_insn (operands[0], const0_rtx);
1116     else
1117       emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1118     DONE;
1119   }
1120 )
1121
1122 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1123 (define_insn "vec_shr_<mode>"
1124   [(set (match_operand:VD 0 "register_operand" "=w")
1125         (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1126                     (match_operand:SI 2 "immediate_operand" "i")]
1127                    UNSPEC_VEC_SHR))]
1128   "TARGET_SIMD"
1129   {
1130     if (BYTES_BIG_ENDIAN)
1131       return "shl %d0, %d1, %2";
1132     else
1133       return "ushr %d0, %d1, %2";
1134   }
1135   [(set_attr "type" "neon_shift_imm")]
1136 )
1137
1138 (define_expand "vec_set<mode>"
1139   [(match_operand:VALL_F16 0 "register_operand" "+w")
1140    (match_operand:<VEL> 1 "register_operand" "w")
1141    (match_operand:SI 2 "immediate_operand" "")]
1142   "TARGET_SIMD"
1143   {
1144     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1145     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1146                                           GEN_INT (elem), operands[0]));
1147     DONE;
1148   }
1149 )
1150
1151
1152 (define_insn "aarch64_mla<mode>"
1153  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1154        (plus:VDQ_BHSI (mult:VDQ_BHSI
1155                         (match_operand:VDQ_BHSI 2 "register_operand" "w")
1156                         (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1157                       (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1158  "TARGET_SIMD"
1159  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1160   [(set_attr "type" "neon_mla_<Vetype><q>")]
1161 )
1162
1163 (define_insn "*aarch64_mla_elt<mode>"
1164  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1165        (plus:VDQHS
1166          (mult:VDQHS
1167            (vec_duplicate:VDQHS
1168               (vec_select:<VEL>
1169                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1170                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1171            (match_operand:VDQHS 3 "register_operand" "w"))
1172          (match_operand:VDQHS 4 "register_operand" "0")))]
1173  "TARGET_SIMD"
1174   {
1175     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1176     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1177   }
1178   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1179 )
1180
1181 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1182  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1183        (plus:VDQHS
1184          (mult:VDQHS
1185            (vec_duplicate:VDQHS
1186               (vec_select:<VEL>
1187                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1188                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1189            (match_operand:VDQHS 3 "register_operand" "w"))
1190          (match_operand:VDQHS 4 "register_operand" "0")))]
1191  "TARGET_SIMD"
1192   {
1193     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1194     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1195   }
1196   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1197 )
1198
1199 (define_insn "*aarch64_mla_elt_merge<mode>"
1200   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1201         (plus:VDQHS
1202           (mult:VDQHS (vec_duplicate:VDQHS
1203                   (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1204                 (match_operand:VDQHS 2 "register_operand" "w"))
1205           (match_operand:VDQHS 3 "register_operand" "0")))]
1206  "TARGET_SIMD"
1207  "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1208   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1209 )
1210
1211 (define_insn "aarch64_mls<mode>"
1212  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1213        (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1214                    (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1215                               (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1216  "TARGET_SIMD"
1217  "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1218   [(set_attr "type" "neon_mla_<Vetype><q>")]
1219 )
1220
1221 (define_insn "*aarch64_mls_elt<mode>"
1222  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1223        (minus:VDQHS
1224          (match_operand:VDQHS 4 "register_operand" "0")
1225          (mult:VDQHS
1226            (vec_duplicate:VDQHS
1227               (vec_select:<VEL>
1228                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1229                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1230            (match_operand:VDQHS 3 "register_operand" "w"))))]
1231  "TARGET_SIMD"
1232   {
1233     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1234     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1235   }
1236   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1237 )
1238
1239 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1240  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1241        (minus:VDQHS
1242          (match_operand:VDQHS 4 "register_operand" "0")
1243          (mult:VDQHS
1244            (vec_duplicate:VDQHS
1245               (vec_select:<VEL>
1246                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1247                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1248            (match_operand:VDQHS 3 "register_operand" "w"))))]
1249  "TARGET_SIMD"
1250   {
1251     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1252     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1253   }
1254   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1255 )
1256
1257 (define_insn "*aarch64_mls_elt_merge<mode>"
1258   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1259         (minus:VDQHS
1260           (match_operand:VDQHS 1 "register_operand" "0")
1261           (mult:VDQHS (vec_duplicate:VDQHS
1262                   (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1263                 (match_operand:VDQHS 3 "register_operand" "w"))))]
1264   "TARGET_SIMD"
1265   "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1266   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1267 )
1268
1269 ;; Max/Min operations.
1270 (define_insn "<su><maxmin><mode>3"
1271  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1272        (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1273                     (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1274  "TARGET_SIMD"
1275  "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1276   [(set_attr "type" "neon_minmax<q>")]
1277 )
1278
1279 (define_expand "<su><maxmin>v2di3"
1280  [(set (match_operand:V2DI 0 "register_operand" "")
1281        (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1282                     (match_operand:V2DI 2 "register_operand" "")))]
1283  "TARGET_SIMD"
1284 {
1285   enum rtx_code cmp_operator;
1286   rtx cmp_fmt;
1287
1288   switch (<CODE>)
1289     {
1290     case UMIN:
1291       cmp_operator = LTU;
1292       break;
1293     case SMIN:
1294       cmp_operator = LT;
1295       break;
1296     case UMAX:
1297       cmp_operator = GTU;
1298       break;
1299     case SMAX:
1300       cmp_operator = GT;
1301       break;
1302     default:
1303       gcc_unreachable ();
1304     }
1305
1306   cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1307   emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1308               operands[2], cmp_fmt, operands[1], operands[2]));
1309   DONE;
1310 })
1311
1312 ;; Pairwise Integer Max/Min operations.
1313 (define_insn "aarch64_<maxmin_uns>p<mode>"
1314  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1315        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1316                          (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1317                         MAXMINV))]
1318  "TARGET_SIMD"
1319  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1320   [(set_attr "type" "neon_minmax<q>")]
1321 )
1322
1323 ;; Pairwise FP Max/Min operations.
1324 (define_insn "aarch64_<maxmin_uns>p<mode>"
1325  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1326        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1327                       (match_operand:VHSDF 2 "register_operand" "w")]
1328                       FMAXMINV))]
1329  "TARGET_SIMD"
1330  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1331   [(set_attr "type" "neon_minmax<q>")]
1332 )
1333
1334 ;; vec_concat gives a new vector with the low elements from operand 1, and
1335 ;; the high elements from operand 2.  That is to say, given op1 = { a, b }
1336 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1337 ;; What that means, is that the RTL descriptions of the below patterns
1338 ;; need to change depending on endianness.
1339
1340 ;; Move to the low architectural bits of the register.
1341 ;; On little-endian this is { operand, zeroes }
1342 ;; On big-endian this is { zeroes, operand }
1343
1344 (define_insn "move_lo_quad_internal_<mode>"
1345   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1346         (vec_concat:VQ_NO2E
1347           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1348           (vec_duplicate:<VHALF> (const_int 0))))]
1349   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1350   "@
1351    dup\\t%d0, %1.d[0]
1352    fmov\\t%d0, %1
1353    dup\\t%d0, %1"
1354   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1355    (set_attr "length" "4")
1356    (set_attr "arch" "simd,fp,simd")]
1357 )
1358
1359 (define_insn "move_lo_quad_internal_<mode>"
1360   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1361         (vec_concat:VQ_2E
1362           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1363           (const_int 0)))]
1364   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1365   "@
1366    dup\\t%d0, %1.d[0]
1367    fmov\\t%d0, %1
1368    dup\\t%d0, %1"
1369   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1370    (set_attr "length" "4")
1371    (set_attr "arch" "simd,fp,simd")]
1372 )
1373
1374 (define_insn "move_lo_quad_internal_be_<mode>"
1375   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1376         (vec_concat:VQ_NO2E
1377           (vec_duplicate:<VHALF> (const_int 0))
1378           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1379   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1380   "@
1381    dup\\t%d0, %1.d[0]
1382    fmov\\t%d0, %1
1383    dup\\t%d0, %1"
1384   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1385    (set_attr "length" "4")
1386    (set_attr "arch" "simd,fp,simd")]
1387 )
1388
1389 (define_insn "move_lo_quad_internal_be_<mode>"
1390   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1391         (vec_concat:VQ_2E
1392           (const_int 0)
1393           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1394   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1395   "@
1396    dup\\t%d0, %1.d[0]
1397    fmov\\t%d0, %1
1398    dup\\t%d0, %1"
1399   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1400    (set_attr "length" "4")
1401    (set_attr "arch" "simd,fp,simd")]
1402 )
1403
1404 (define_expand "move_lo_quad_<mode>"
1405   [(match_operand:VQ 0 "register_operand")
1406    (match_operand:VQ 1 "register_operand")]
1407   "TARGET_SIMD"
1408 {
1409   if (BYTES_BIG_ENDIAN)
1410     emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1411   else
1412     emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1413   DONE;
1414 }
1415 )
1416
1417 ;; Move operand1 to the high architectural bits of the register, keeping
1418 ;; the low architectural bits of operand2.
1419 ;; For little-endian this is { operand2, operand1 }
1420 ;; For big-endian this is { operand1, operand2 }
1421
1422 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1423   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1424         (vec_concat:VQ
1425           (vec_select:<VHALF>
1426                 (match_dup 0)
1427                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1428           (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1429   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1430   "@
1431    ins\\t%0.d[1], %1.d[0]
1432    ins\\t%0.d[1], %1"
1433   [(set_attr "type" "neon_ins")]
1434 )
1435
1436 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1437   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1438         (vec_concat:VQ
1439           (match_operand:<VHALF> 1 "register_operand" "w,r")
1440           (vec_select:<VHALF>
1441                 (match_dup 0)
1442                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1443   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1444   "@
1445    ins\\t%0.d[1], %1.d[0]
1446    ins\\t%0.d[1], %1"
1447   [(set_attr "type" "neon_ins")]
1448 )
1449
1450 (define_expand "move_hi_quad_<mode>"
1451  [(match_operand:VQ 0 "register_operand" "")
1452   (match_operand:<VHALF> 1 "register_operand" "")]
1453  "TARGET_SIMD"
1454 {
1455   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1456   if (BYTES_BIG_ENDIAN)
1457     emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1458                     operands[1], p));
1459   else
1460     emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1461                     operands[1], p));
1462   DONE;
1463 })
1464
1465 ;; Narrowing operations.
1466
1467 ;; For doubles.
1468 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1469  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1470        (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1471  "TARGET_SIMD"
1472  "xtn\\t%0.<Vntype>, %1.<Vtype>"
1473   [(set_attr "type" "neon_shift_imm_narrow_q")]
1474 )
1475
1476 (define_expand "vec_pack_trunc_<mode>"
1477  [(match_operand:<VNARROWD> 0 "register_operand" "")
1478   (match_operand:VDN 1 "register_operand" "")
1479   (match_operand:VDN 2 "register_operand" "")]
1480  "TARGET_SIMD"
1481 {
1482   rtx tempreg = gen_reg_rtx (<VDBL>mode);
1483   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1484   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1485
1486   emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1487   emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1488   emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1489   DONE;
1490 })
1491
1492 ;; For quads.
1493
1494 (define_insn "vec_pack_trunc_<mode>"
1495  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1496        (vec_concat:<VNARROWQ2>
1497          (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1498          (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1499  "TARGET_SIMD"
1500  {
1501    if (BYTES_BIG_ENDIAN)
1502      return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1503    else
1504      return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1505  }
1506   [(set_attr "type" "multiple")
1507    (set_attr "length" "8")]
1508 )
1509
1510 ;; Widening operations.
1511
1512 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1513   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1514         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1515                                (match_operand:VQW 1 "register_operand" "w")
1516                                (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1517                             )))]
1518   "TARGET_SIMD"
1519   "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1520   [(set_attr "type" "neon_shift_imm_long")]
1521 )
1522
1523 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1524   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1525         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1526                                (match_operand:VQW 1 "register_operand" "w")
1527                                (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1528                             )))]
1529   "TARGET_SIMD"
1530   "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1531   [(set_attr "type" "neon_shift_imm_long")]
1532 )
1533
1534 (define_expand "vec_unpack<su>_hi_<mode>"
1535   [(match_operand:<VWIDE> 0 "register_operand" "")
1536    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1537   "TARGET_SIMD"
1538   {
1539     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1540     emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1541                                                           operands[1], p));
1542     DONE;
1543   }
1544 )
1545
1546 (define_expand "vec_unpack<su>_lo_<mode>"
1547   [(match_operand:<VWIDE> 0 "register_operand" "")
1548    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1549   "TARGET_SIMD"
1550   {
1551     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1552     emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1553                                                           operands[1], p));
1554     DONE;
1555   }
1556 )
1557
1558 ;; Widening arithmetic.
1559
1560 (define_insn "*aarch64_<su>mlal_lo<mode>"
1561   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1562         (plus:<VWIDE>
1563           (mult:<VWIDE>
1564               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1565                  (match_operand:VQW 2 "register_operand" "w")
1566                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1567               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1568                  (match_operand:VQW 4 "register_operand" "w")
1569                  (match_dup 3))))
1570           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1571   "TARGET_SIMD"
1572   "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1573   [(set_attr "type" "neon_mla_<Vetype>_long")]
1574 )
1575
1576 (define_insn "*aarch64_<su>mlal_hi<mode>"
1577   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1578         (plus:<VWIDE>
1579           (mult:<VWIDE>
1580               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1581                  (match_operand:VQW 2 "register_operand" "w")
1582                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1583               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1584                  (match_operand:VQW 4 "register_operand" "w")
1585                  (match_dup 3))))
1586           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1587   "TARGET_SIMD"
1588   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1589   [(set_attr "type" "neon_mla_<Vetype>_long")]
1590 )
1591
1592 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1593   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1594         (minus:<VWIDE>
1595           (match_operand:<VWIDE> 1 "register_operand" "0")
1596           (mult:<VWIDE>
1597               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1598                  (match_operand:VQW 2 "register_operand" "w")
1599                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1600               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1601                  (match_operand:VQW 4 "register_operand" "w")
1602                  (match_dup 3))))))]
1603   "TARGET_SIMD"
1604   "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1605   [(set_attr "type" "neon_mla_<Vetype>_long")]
1606 )
1607
1608 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1609   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1610         (minus:<VWIDE>
1611           (match_operand:<VWIDE> 1 "register_operand" "0")
1612           (mult:<VWIDE>
1613               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1614                  (match_operand:VQW 2 "register_operand" "w")
1615                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1616               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1617                  (match_operand:VQW 4 "register_operand" "w")
1618                  (match_dup 3))))))]
1619   "TARGET_SIMD"
1620   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1621   [(set_attr "type" "neon_mla_<Vetype>_long")]
1622 )
1623
1624 (define_insn "*aarch64_<su>mlal<mode>"
1625   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1626         (plus:<VWIDE>
1627           (mult:<VWIDE>
1628             (ANY_EXTEND:<VWIDE>
1629               (match_operand:VD_BHSI 1 "register_operand" "w"))
1630             (ANY_EXTEND:<VWIDE>
1631               (match_operand:VD_BHSI 2 "register_operand" "w")))
1632           (match_operand:<VWIDE> 3 "register_operand" "0")))]
1633   "TARGET_SIMD"
1634   "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1635   [(set_attr "type" "neon_mla_<Vetype>_long")]
1636 )
1637
1638 (define_insn "*aarch64_<su>mlsl<mode>"
1639   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1640         (minus:<VWIDE>
1641           (match_operand:<VWIDE> 1 "register_operand" "0")
1642           (mult:<VWIDE>
1643             (ANY_EXTEND:<VWIDE>
1644               (match_operand:VD_BHSI 2 "register_operand" "w"))
1645             (ANY_EXTEND:<VWIDE>
1646               (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1647   "TARGET_SIMD"
1648   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1649   [(set_attr "type" "neon_mla_<Vetype>_long")]
1650 )
1651
1652 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1653  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1654        (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1655                            (match_operand:VQW 1 "register_operand" "w")
1656                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1657                      (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1658                            (match_operand:VQW 2 "register_operand" "w")
1659                            (match_dup 3)))))]
1660   "TARGET_SIMD"
1661   "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1662   [(set_attr "type" "neon_mul_<Vetype>_long")]
1663 )
1664
1665 (define_expand "vec_widen_<su>mult_lo_<mode>"
1666   [(match_operand:<VWIDE> 0 "register_operand" "")
1667    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1668    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1669  "TARGET_SIMD"
1670  {
1671    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1672    emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1673                                                        operands[1],
1674                                                        operands[2], p));
1675    DONE;
1676  }
1677 )
1678
1679 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1680  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1681       (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1682                             (match_operand:VQW 1 "register_operand" "w")
1683                             (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1684                     (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1685                             (match_operand:VQW 2 "register_operand" "w")
1686                             (match_dup 3)))))]
1687   "TARGET_SIMD"
1688   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1689   [(set_attr "type" "neon_mul_<Vetype>_long")]
1690 )
1691
1692 (define_expand "vec_widen_<su>mult_hi_<mode>"
1693   [(match_operand:<VWIDE> 0 "register_operand" "")
1694    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1695    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1696  "TARGET_SIMD"
1697  {
1698    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1699    emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1700                                                        operands[1],
1701                                                        operands[2], p));
1702    DONE;
1703
1704  }
1705 )
1706
1707 ;; FP vector operations.
1708 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1709 ;; double-precision (64-bit) floating-point data types and arithmetic as
1710 ;; defined by the IEEE 754-2008 standard.  This makes them vectorizable
1711 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1712 ;;
1713 ;; Floating-point operations can raise an exception.  Vectorizing such
1714 ;; operations are safe because of reasons explained below.
1715 ;;
1716 ;; ARMv8 permits an extension to enable trapped floating-point
1717 ;; exception handling, however this is an optional feature.  In the
1718 ;; event of a floating-point exception being raised by vectorised
1719 ;; code then:
1720 ;; 1.  If trapped floating-point exceptions are available, then a trap
1721 ;;     will be taken when any lane raises an enabled exception.  A trap
1722 ;;     handler may determine which lane raised the exception.
1723 ;; 2.  Alternatively a sticky exception flag is set in the
1724 ;;     floating-point status register (FPSR).  Software may explicitly
1725 ;;     test the exception flags, in which case the tests will either
1726 ;;     prevent vectorisation, allowing precise identification of the
1727 ;;     failing operation, or if tested outside of vectorisable regions
1728 ;;     then the specific operation and lane are not of interest.
1729
1730 ;; FP arithmetic operations.
1731
1732 (define_insn "add<mode>3"
1733  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1734        (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1735                    (match_operand:VHSDF 2 "register_operand" "w")))]
1736  "TARGET_SIMD"
1737  "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1738   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1739 )
1740
1741 (define_insn "sub<mode>3"
1742  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1743        (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1744                     (match_operand:VHSDF 2 "register_operand" "w")))]
1745  "TARGET_SIMD"
1746  "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1747   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1748 )
1749
1750 (define_insn "mul<mode>3"
1751  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1752        (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1753                    (match_operand:VHSDF 2 "register_operand" "w")))]
1754  "TARGET_SIMD"
1755  "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1756   [(set_attr "type" "neon_fp_mul_<stype><q>")]
1757 )
1758
1759 (define_expand "div<mode>3"
1760  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1761        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1762                   (match_operand:VHSDF 2 "register_operand" "w")))]
1763  "TARGET_SIMD"
1764 {
1765   if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1766     DONE;
1767
1768   operands[1] = force_reg (<MODE>mode, operands[1]);
1769 })
1770
1771 (define_insn "*div<mode>3"
1772  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1773        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1774                  (match_operand:VHSDF 2 "register_operand" "w")))]
1775  "TARGET_SIMD"
1776  "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1777   [(set_attr "type" "neon_fp_div_<stype><q>")]
1778 )
1779
1780 (define_insn "neg<mode>2"
1781  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1782        (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1783  "TARGET_SIMD"
1784  "fneg\\t%0.<Vtype>, %1.<Vtype>"
1785   [(set_attr "type" "neon_fp_neg_<stype><q>")]
1786 )
1787
1788 (define_insn "abs<mode>2"
1789  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1790        (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1791  "TARGET_SIMD"
1792  "fabs\\t%0.<Vtype>, %1.<Vtype>"
1793   [(set_attr "type" "neon_fp_abs_<stype><q>")]
1794 )
1795
1796 (define_insn "fma<mode>4"
1797   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1798        (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1799                   (match_operand:VHSDF 2 "register_operand" "w")
1800                   (match_operand:VHSDF 3 "register_operand" "0")))]
1801   "TARGET_SIMD"
1802  "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1803   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1804 )
1805
1806 (define_insn "*aarch64_fma4_elt<mode>"
1807   [(set (match_operand:VDQF 0 "register_operand" "=w")
1808     (fma:VDQF
1809       (vec_duplicate:VDQF
1810         (vec_select:<VEL>
1811           (match_operand:VDQF 1 "register_operand" "<h_con>")
1812           (parallel [(match_operand:SI 2 "immediate_operand")])))
1813       (match_operand:VDQF 3 "register_operand" "w")
1814       (match_operand:VDQF 4 "register_operand" "0")))]
1815   "TARGET_SIMD"
1816   {
1817     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1818     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1819   }
1820   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1821 )
1822
1823 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1824   [(set (match_operand:VDQSF 0 "register_operand" "=w")
1825     (fma:VDQSF
1826       (vec_duplicate:VDQSF
1827         (vec_select:<VEL>
1828           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1829           (parallel [(match_operand:SI 2 "immediate_operand")])))
1830       (match_operand:VDQSF 3 "register_operand" "w")
1831       (match_operand:VDQSF 4 "register_operand" "0")))]
1832   "TARGET_SIMD"
1833   {
1834     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1835     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1836   }
1837   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1838 )
1839
1840 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1841   [(set (match_operand:VMUL 0 "register_operand" "=w")
1842     (fma:VMUL
1843       (vec_duplicate:VMUL
1844           (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1845       (match_operand:VMUL 2 "register_operand" "w")
1846       (match_operand:VMUL 3 "register_operand" "0")))]
1847   "TARGET_SIMD"
1848   "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1849   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1850 )
1851
1852 (define_insn "*aarch64_fma4_elt_to_64v2df"
1853   [(set (match_operand:DF 0 "register_operand" "=w")
1854     (fma:DF
1855         (vec_select:DF
1856           (match_operand:V2DF 1 "register_operand" "w")
1857           (parallel [(match_operand:SI 2 "immediate_operand")]))
1858       (match_operand:DF 3 "register_operand" "w")
1859       (match_operand:DF 4 "register_operand" "0")))]
1860   "TARGET_SIMD"
1861   {
1862     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1863     return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1864   }
1865   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1866 )
1867
1868 (define_insn "fnma<mode>4"
1869   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1870         (fma:VHSDF
1871           (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1872           (match_operand:VHSDF 2 "register_operand" "w")
1873           (match_operand:VHSDF 3 "register_operand" "0")))]
1874   "TARGET_SIMD"
1875   "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1876   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1877 )
1878
1879 (define_insn "*aarch64_fnma4_elt<mode>"
1880   [(set (match_operand:VDQF 0 "register_operand" "=w")
1881     (fma:VDQF
1882       (neg:VDQF
1883         (match_operand:VDQF 3 "register_operand" "w"))
1884       (vec_duplicate:VDQF
1885         (vec_select:<VEL>
1886           (match_operand:VDQF 1 "register_operand" "<h_con>")
1887           (parallel [(match_operand:SI 2 "immediate_operand")])))
1888       (match_operand:VDQF 4 "register_operand" "0")))]
1889   "TARGET_SIMD"
1890   {
1891     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1892     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1893   }
1894   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1895 )
1896
1897 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1898   [(set (match_operand:VDQSF 0 "register_operand" "=w")
1899     (fma:VDQSF
1900       (neg:VDQSF
1901         (match_operand:VDQSF 3 "register_operand" "w"))
1902       (vec_duplicate:VDQSF
1903         (vec_select:<VEL>
1904           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1905           (parallel [(match_operand:SI 2 "immediate_operand")])))
1906       (match_operand:VDQSF 4 "register_operand" "0")))]
1907   "TARGET_SIMD"
1908   {
1909     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1910     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1911   }
1912   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1913 )
1914
1915 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1916   [(set (match_operand:VMUL 0 "register_operand" "=w")
1917     (fma:VMUL
1918       (neg:VMUL
1919         (match_operand:VMUL 2 "register_operand" "w"))
1920       (vec_duplicate:VMUL
1921         (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1922       (match_operand:VMUL 3 "register_operand" "0")))]
1923   "TARGET_SIMD"
1924   "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1925   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1926 )
1927
1928 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1929   [(set (match_operand:DF 0 "register_operand" "=w")
1930     (fma:DF
1931       (vec_select:DF
1932         (match_operand:V2DF 1 "register_operand" "w")
1933         (parallel [(match_operand:SI 2 "immediate_operand")]))
1934       (neg:DF
1935         (match_operand:DF 3 "register_operand" "w"))
1936       (match_operand:DF 4 "register_operand" "0")))]
1937   "TARGET_SIMD"
1938   {
1939     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1940     return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1941   }
1942   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1943 )
1944
1945 ;; Vector versions of the floating-point frint patterns.
1946 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1947 (define_insn "<frint_pattern><mode>2"
1948   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1949         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1950                        FRINT))]
1951   "TARGET_SIMD"
1952   "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1953   [(set_attr "type" "neon_fp_round_<stype><q>")]
1954 )
1955
1956 ;; Vector versions of the fcvt standard patterns.
1957 ;; Expands to lbtrunc, lround, lceil, lfloor
1958 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1959   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1960         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1961                                [(match_operand:VHSDF 1 "register_operand" "w")]
1962                                FCVT)))]
1963   "TARGET_SIMD"
1964   "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1965   [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1966 )
1967
1968 ;; HF Scalar variants of related SIMD instructions.
1969 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1970   [(set (match_operand:HI 0 "register_operand" "=w")
1971         (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1972                       FCVT)))]
1973   "TARGET_SIMD_F16INST"
1974   "fcvt<frint_suffix><su>\t%h0, %h1"
1975   [(set_attr "type" "neon_fp_to_int_s")]
1976 )
1977
1978 (define_insn "<optab>_trunchfhi2"
1979   [(set (match_operand:HI 0 "register_operand" "=w")
1980         (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1981   "TARGET_SIMD_F16INST"
1982   "fcvtz<su>\t%h0, %h1"
1983   [(set_attr "type" "neon_fp_to_int_s")]
1984 )
1985
1986 (define_insn "<optab>hihf2"
1987   [(set (match_operand:HF 0 "register_operand" "=w")
1988         (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1989   "TARGET_SIMD_F16INST"
1990   "<su_optab>cvtf\t%h0, %h1"
1991   [(set_attr "type" "neon_int_to_fp_s")]
1992 )
1993
1994 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1995   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1996         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1997                                [(mult:VDQF
1998          (match_operand:VDQF 1 "register_operand" "w")
1999          (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
2000                                UNSPEC_FRINTZ)))]
2001   "TARGET_SIMD
2002    && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
2003                 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
2004   {
2005     int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
2006     char buf[64];
2007     snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
2008     output_asm_insn (buf, operands);
2009     return "";
2010   }
2011   [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
2012 )
2013
2014 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
2015   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2016         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2017                                [(match_operand:VHSDF 1 "register_operand")]
2018                                 UNSPEC_FRINTZ)))]
2019   "TARGET_SIMD"
2020   {})
2021
2022 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2023   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2024         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2025                                [(match_operand:VHSDF 1 "register_operand")]
2026                                 UNSPEC_FRINTZ)))]
2027   "TARGET_SIMD"
2028   {})
2029
2030 (define_expand "ftrunc<VHSDF:mode>2"
2031   [(set (match_operand:VHSDF 0 "register_operand")
2032         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2033                        UNSPEC_FRINTZ))]
2034   "TARGET_SIMD"
2035   {})
2036
2037 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2038   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2039         (FLOATUORS:VHSDF
2040           (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2041   "TARGET_SIMD"
2042   "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2043   [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2044 )
2045
2046 ;; Conversions between vectors of floats and doubles.
2047 ;; Contains a mix of patterns to match standard pattern names
2048 ;; and those for intrinsics.
2049
2050 ;; Float widening operations.
2051
2052 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2053   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2054         (float_extend:<VWIDE> (vec_select:<VHALF>
2055                                (match_operand:VQ_HSF 1 "register_operand" "w")
2056                                (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2057                             )))]
2058   "TARGET_SIMD"
2059   "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2060   [(set_attr "type" "neon_fp_cvt_widen_s")]
2061 )
2062
2063 ;; Convert between fixed-point and floating-point (vector modes)
2064
2065 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2066   [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2067         (unspec:<VHSDF:FCVT_TARGET>
2068           [(match_operand:VHSDF 1 "register_operand" "w")
2069            (match_operand:SI 2 "immediate_operand" "i")]
2070          FCVT_F2FIXED))]
2071   "TARGET_SIMD"
2072   "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2073   [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2074 )
2075
2076 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2077   [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2078         (unspec:<VDQ_HSDI:FCVT_TARGET>
2079           [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2080            (match_operand:SI 2 "immediate_operand" "i")]
2081          FCVT_FIXED2F))]
2082   "TARGET_SIMD"
2083   "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2084   [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2085 )
2086
2087 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2088 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2089 ;; the meaning of HI and LO changes depending on the target endianness.
2090 ;; While elsewhere we map the higher numbered elements of a vector to
2091 ;; the lower architectural lanes of the vector, for these patterns we want
2092 ;; to always treat "hi" as referring to the higher architectural lanes.
2093 ;; Consequently, while the patterns below look inconsistent with our
2094 ;; other big-endian patterns their behavior is as required.
2095
2096 (define_expand "vec_unpacks_lo_<mode>"
2097   [(match_operand:<VWIDE> 0 "register_operand" "")
2098    (match_operand:VQ_HSF 1 "register_operand" "")]
2099   "TARGET_SIMD"
2100   {
2101     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2102     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2103                                                        operands[1], p));
2104     DONE;
2105   }
2106 )
2107
2108 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2109   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2110         (float_extend:<VWIDE> (vec_select:<VHALF>
2111                                (match_operand:VQ_HSF 1 "register_operand" "w")
2112                                (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2113                             )))]
2114   "TARGET_SIMD"
2115   "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2116   [(set_attr "type" "neon_fp_cvt_widen_s")]
2117 )
2118
2119 (define_expand "vec_unpacks_hi_<mode>"
2120   [(match_operand:<VWIDE> 0 "register_operand" "")
2121    (match_operand:VQ_HSF 1 "register_operand" "")]
2122   "TARGET_SIMD"
2123   {
2124     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2125     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2126                                                        operands[1], p));
2127     DONE;
2128   }
2129 )
2130 (define_insn "aarch64_float_extend_lo_<Vwide>"
2131   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2132         (float_extend:<VWIDE>
2133           (match_operand:VDF 1 "register_operand" "w")))]
2134   "TARGET_SIMD"
2135   "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2136   [(set_attr "type" "neon_fp_cvt_widen_s")]
2137 )
2138
2139 ;; Float narrowing operations.
2140
2141 (define_insn "aarch64_float_truncate_lo_<mode>"
2142   [(set (match_operand:VDF 0 "register_operand" "=w")
2143       (float_truncate:VDF
2144         (match_operand:<VWIDE> 1 "register_operand" "w")))]
2145   "TARGET_SIMD"
2146   "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2147   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2148 )
2149
2150 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2151   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2152     (vec_concat:<VDBL>
2153       (match_operand:VDF 1 "register_operand" "0")
2154       (float_truncate:VDF
2155         (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2156   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2157   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2158   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2159 )
2160
2161 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2162   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2163     (vec_concat:<VDBL>
2164       (float_truncate:VDF
2165         (match_operand:<VWIDE> 2 "register_operand" "w"))
2166       (match_operand:VDF 1 "register_operand" "0")))]
2167   "TARGET_SIMD && BYTES_BIG_ENDIAN"
2168   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2169   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2170 )
2171
2172 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2173   [(match_operand:<VDBL> 0 "register_operand" "=w")
2174    (match_operand:VDF 1 "register_operand" "0")
2175    (match_operand:<VWIDE> 2 "register_operand" "w")]
2176   "TARGET_SIMD"
2177 {
2178   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2179                              ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2180                              : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2181   emit_insn (gen (operands[0], operands[1], operands[2]));
2182   DONE;
2183 }
2184 )
2185
2186 (define_expand "vec_pack_trunc_v2df"
2187   [(set (match_operand:V4SF 0 "register_operand")
2188       (vec_concat:V4SF
2189         (float_truncate:V2SF
2190             (match_operand:V2DF 1 "register_operand"))
2191         (float_truncate:V2SF
2192             (match_operand:V2DF 2 "register_operand"))
2193           ))]
2194   "TARGET_SIMD"
2195   {
2196     rtx tmp = gen_reg_rtx (V2SFmode);
2197     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2198     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2199
2200     emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2201     emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2202                                                    tmp, operands[hi]));
2203     DONE;
2204   }
2205 )
2206
2207 (define_expand "vec_pack_trunc_df"
2208   [(set (match_operand:V2SF 0 "register_operand")
2209       (vec_concat:V2SF
2210         (float_truncate:SF
2211             (match_operand:DF 1 "register_operand"))
2212         (float_truncate:SF
2213             (match_operand:DF 2 "register_operand"))
2214           ))]
2215   "TARGET_SIMD"
2216   {
2217     rtx tmp = gen_reg_rtx (V2SFmode);
2218     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2219     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2220
2221     emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2222     emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2223     emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2224     DONE;
2225   }
2226 )
2227
2228 ;; FP Max/Min
2229 ;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
2230 ;; expression like:
2231 ;;      a = (b < c) ? b : c;
2232 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
2233 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
2234 ;; -ffast-math.
2235 ;;
2236 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2237 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2238 ;; operand will be returned when both operands are zero (i.e. they may not
2239 ;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
2240 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2241 ;; NaNs.
2242
2243 (define_insn "<su><maxmin><mode>3"
2244   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2245         (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2246                        (match_operand:VHSDF 2 "register_operand" "w")))]
2247   "TARGET_SIMD"
2248   "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2249   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2250 )
2251
2252 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2253 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2254 ;; which implement the IEEE fmax ()/fmin () functions.
2255 (define_insn "<maxmin_uns><mode>3"
2256   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2257        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2258                       (match_operand:VHSDF 2 "register_operand" "w")]
2259                       FMAXMIN_UNS))]
2260   "TARGET_SIMD"
2261   "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2262   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2263 )
2264
2265 ;; 'across lanes' add.
2266
2267 (define_expand "reduc_plus_scal_<mode>"
2268   [(match_operand:<VEL> 0 "register_operand" "=w")
2269    (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2270                UNSPEC_ADDV)]
2271   "TARGET_SIMD"
2272   {
2273     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2274     rtx scratch = gen_reg_rtx (<MODE>mode);
2275     emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2276     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2277     DONE;
2278   }
2279 )
2280
2281 (define_insn "aarch64_faddp<mode>"
2282  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2283        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2284                       (match_operand:VHSDF 2 "register_operand" "w")]
2285         UNSPEC_FADDV))]
2286  "TARGET_SIMD"
2287  "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2288   [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2289 )
2290
2291 (define_insn "aarch64_reduc_plus_internal<mode>"
2292  [(set (match_operand:VDQV 0 "register_operand" "=w")
2293        (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2294                     UNSPEC_ADDV))]
2295  "TARGET_SIMD"
2296  "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2297   [(set_attr "type" "neon_reduc_add<q>")]
2298 )
2299
2300 (define_insn "aarch64_reduc_plus_internalv2si"
2301  [(set (match_operand:V2SI 0 "register_operand" "=w")
2302        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2303                     UNSPEC_ADDV))]
2304  "TARGET_SIMD"
2305  "addp\\t%0.2s, %1.2s, %1.2s"
2306   [(set_attr "type" "neon_reduc_add")]
2307 )
2308
2309 (define_insn "reduc_plus_scal_<mode>"
2310  [(set (match_operand:<VEL> 0 "register_operand" "=w")
2311        (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2312                    UNSPEC_FADDV))]
2313  "TARGET_SIMD"
2314  "faddp\\t%<Vetype>0, %1.<Vtype>"
2315   [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2316 )
2317
2318 (define_expand "reduc_plus_scal_v4sf"
2319  [(set (match_operand:SF 0 "register_operand")
2320        (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2321                     UNSPEC_FADDV))]
2322  "TARGET_SIMD"
2323 {
2324   rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2325   rtx scratch = gen_reg_rtx (V4SFmode);
2326   emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2327   emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2328   emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2329   DONE;
2330 })
2331
2332 (define_insn "clrsb<mode>2"
2333   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2334         (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2335   "TARGET_SIMD"
2336   "cls\\t%0.<Vtype>, %1.<Vtype>"
2337   [(set_attr "type" "neon_cls<q>")]
2338 )
2339
2340 (define_insn "clz<mode>2"
2341  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2342        (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2343  "TARGET_SIMD"
2344  "clz\\t%0.<Vtype>, %1.<Vtype>"
2345   [(set_attr "type" "neon_cls<q>")]
2346 )
2347
2348 (define_insn "popcount<mode>2"
2349   [(set (match_operand:VB 0 "register_operand" "=w")
2350         (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2351   "TARGET_SIMD"
2352   "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2353   [(set_attr "type" "neon_cnt<q>")]
2354 )
2355
2356 ;; 'across lanes' max and min ops.
2357
2358 ;; Template for outputting a scalar, so we can create __builtins which can be
2359 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function.  (This is FP smax/smin).
2360 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2361   [(match_operand:<VEL> 0 "register_operand")
2362    (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2363                   FMAXMINV)]
2364   "TARGET_SIMD"
2365   {
2366     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2367     rtx scratch = gen_reg_rtx (<MODE>mode);
2368     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2369                                                               operands[1]));
2370     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2371     DONE;
2372   }
2373 )
2374
2375 ;; Likewise for integer cases, signed and unsigned.
2376 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2377   [(match_operand:<VEL> 0 "register_operand")
2378    (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2379                     MAXMINV)]
2380   "TARGET_SIMD"
2381   {
2382     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2383     rtx scratch = gen_reg_rtx (<MODE>mode);
2384     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2385                                                               operands[1]));
2386     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2387     DONE;
2388   }
2389 )
2390
2391 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2392  [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2393        (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2394                     MAXMINV))]
2395  "TARGET_SIMD"
2396  "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2397   [(set_attr "type" "neon_reduc_minmax<q>")]
2398 )
2399
2400 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2401  [(set (match_operand:V2SI 0 "register_operand" "=w")
2402        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2403                     MAXMINV))]
2404  "TARGET_SIMD"
2405  "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2406   [(set_attr "type" "neon_reduc_minmax")]
2407 )
2408
2409 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2410  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2411        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2412                       FMAXMINV))]
2413  "TARGET_SIMD"
2414  "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2415   [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2416 )
2417
2418 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2419 ;; allocation.
2420 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2421 ;; to select.
2422 ;;
2423 ;; Thus our BSL is of the form:
2424 ;;   op0 = bsl (mask, op2, op3)
2425 ;; We can use any of:
2426 ;;
2427 ;;   if (op0 = mask)
2428 ;;     bsl mask, op1, op2
2429 ;;   if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2430 ;;     bit op0, op2, mask
2431 ;;   if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2432 ;;     bif op0, op1, mask
2433 ;;
2434 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2435 ;; Some forms of straight-line code may generate the equivalent form
2436 ;; in *aarch64_simd_bsl<mode>_alt.
2437
2438 (define_insn "aarch64_simd_bsl<mode>_internal"
2439   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2440         (xor:VDQ_I
2441            (and:VDQ_I
2442              (xor:VDQ_I
2443                (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2444                (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2445              (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2446           (match_dup:<V_INT_EQUIV> 3)
2447         ))]
2448   "TARGET_SIMD"
2449   "@
2450   bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2451   bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2452   bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2453   [(set_attr "type" "neon_bsl<q>")]
2454 )
2455
2456 ;; We need this form in addition to the above pattern to match the case
2457 ;; when combine tries merging three insns such that the second operand of
2458 ;; the outer XOR matches the second operand of the inner XOR rather than
2459 ;; the first.  The two are equivalent but since recog doesn't try all
2460 ;; permutations of commutative operations, we have to have a separate pattern.
2461
2462 (define_insn "*aarch64_simd_bsl<mode>_alt"
2463   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2464         (xor:VDQ_I
2465            (and:VDQ_I
2466              (xor:VDQ_I
2467                (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2468                (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2469               (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2470           (match_dup:<V_INT_EQUIV> 2)))]
2471   "TARGET_SIMD"
2472   "@
2473   bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2474   bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2475   bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2476   [(set_attr "type" "neon_bsl<q>")]
2477 )
2478
2479 ;; DImode is special, we want to avoid computing operations which are
2480 ;; more naturally computed in general purpose registers in the vector
2481 ;; registers.  If we do that, we need to move all three operands from general
2482 ;; purpose registers to vector registers, then back again.  However, we
2483 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2484 ;; optimizations based on the component operations of a BSL.
2485 ;;
2486 ;; That means we need a splitter back to the individual operations, if they
2487 ;; would be better calculated on the integer side.
2488
2489 (define_insn_and_split "aarch64_simd_bsldi_internal"
2490   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2491         (xor:DI
2492            (and:DI
2493              (xor:DI
2494                (match_operand:DI 3 "register_operand" "w,0,w,r")
2495                (match_operand:DI 2 "register_operand" "w,w,0,r"))
2496              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2497           (match_dup:DI 3)
2498         ))]
2499   "TARGET_SIMD"
2500   "@
2501   bsl\\t%0.8b, %2.8b, %3.8b
2502   bit\\t%0.8b, %2.8b, %1.8b
2503   bif\\t%0.8b, %3.8b, %1.8b
2504   #"
2505   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2506   [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2507 {
2508   /* Split back to individual operations.  If we're before reload, and
2509      able to create a temporary register, do so.  If we're after reload,
2510      we've got an early-clobber destination register, so use that.
2511      Otherwise, we can't create pseudos and we can't yet guarantee that
2512      operands[0] is safe to write, so FAIL to split.  */
2513
2514   rtx scratch;
2515   if (reload_completed)
2516     scratch = operands[0];
2517   else if (can_create_pseudo_p ())
2518     scratch = gen_reg_rtx (DImode);
2519   else
2520     FAIL;
2521
2522   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2523   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2524   emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2525   DONE;
2526 }
2527   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2528    (set_attr "length" "4,4,4,12")]
2529 )
2530
2531 (define_insn_and_split "aarch64_simd_bsldi_alt"
2532   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2533         (xor:DI
2534            (and:DI
2535              (xor:DI
2536                (match_operand:DI 3 "register_operand" "w,w,0,r")
2537                (match_operand:DI 2 "register_operand" "w,0,w,r"))
2538              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2539           (match_dup:DI 2)
2540         ))]
2541   "TARGET_SIMD"
2542   "@
2543   bsl\\t%0.8b, %3.8b, %2.8b
2544   bit\\t%0.8b, %3.8b, %1.8b
2545   bif\\t%0.8b, %2.8b, %1.8b
2546   #"
2547   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2548   [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2549 {
2550   /* Split back to individual operations.  If we're before reload, and
2551      able to create a temporary register, do so.  If we're after reload,
2552      we've got an early-clobber destination register, so use that.
2553      Otherwise, we can't create pseudos and we can't yet guarantee that
2554      operands[0] is safe to write, so FAIL to split.  */
2555
2556   rtx scratch;
2557   if (reload_completed)
2558     scratch = operands[0];
2559   else if (can_create_pseudo_p ())
2560     scratch = gen_reg_rtx (DImode);
2561   else
2562     FAIL;
2563
2564   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2565   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2566   emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2567   DONE;
2568 }
2569   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2570    (set_attr "length" "4,4,4,12")]
2571 )
2572
2573 (define_expand "aarch64_simd_bsl<mode>"
2574   [(match_operand:VALLDIF 0 "register_operand")
2575    (match_operand:<V_INT_EQUIV> 1 "register_operand")
2576    (match_operand:VALLDIF 2 "register_operand")
2577    (match_operand:VALLDIF 3 "register_operand")]
2578  "TARGET_SIMD"
2579 {
2580   /* We can't alias operands together if they have different modes.  */
2581   rtx tmp = operands[0];
2582   if (FLOAT_MODE_P (<MODE>mode))
2583     {
2584       operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2585       operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2586       tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2587     }
2588   operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2589   emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2590                                                          operands[1],
2591                                                          operands[2],
2592                                                          operands[3]));
2593   if (tmp != operands[0])
2594     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2595
2596   DONE;
2597 })
2598
2599 (define_expand "vcond_mask_<mode><v_int_equiv>"
2600   [(match_operand:VALLDI 0 "register_operand")
2601    (match_operand:VALLDI 1 "nonmemory_operand")
2602    (match_operand:VALLDI 2 "nonmemory_operand")
2603    (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2604   "TARGET_SIMD"
2605 {
2606   /* If we have (a = (P) ? -1 : 0);
2607      Then we can simply move the generated mask (result must be int).  */
2608   if (operands[1] == CONSTM1_RTX (<MODE>mode)
2609       && operands[2] == CONST0_RTX (<MODE>mode))
2610     emit_move_insn (operands[0], operands[3]);
2611   /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask.  */
2612   else if (operands[1] == CONST0_RTX (<MODE>mode)
2613            && operands[2] == CONSTM1_RTX (<MODE>mode))
2614     emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2615   else
2616     {
2617       if (!REG_P (operands[1]))
2618         operands[1] = force_reg (<MODE>mode, operands[1]);
2619       if (!REG_P (operands[2]))
2620         operands[2] = force_reg (<MODE>mode, operands[2]);
2621       emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2622                                              operands[1], operands[2]));
2623     }
2624
2625   DONE;
2626 })
2627
2628 ;; Patterns comparing two vectors to produce a mask.
2629
2630 (define_expand "vec_cmp<mode><mode>"
2631   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2632           (match_operator 1 "comparison_operator"
2633             [(match_operand:VSDQ_I_DI 2 "register_operand")
2634              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2635   "TARGET_SIMD"
2636 {
2637   rtx mask = operands[0];
2638   enum rtx_code code = GET_CODE (operands[1]);
2639
2640   switch (code)
2641     {
2642     case NE:
2643     case LE:
2644     case LT:
2645     case GE:
2646     case GT:
2647     case EQ:
2648       if (operands[3] == CONST0_RTX (<MODE>mode))
2649         break;
2650
2651       /* Fall through.  */
2652     default:
2653       if (!REG_P (operands[3]))
2654         operands[3] = force_reg (<MODE>mode, operands[3]);
2655
2656       break;
2657     }
2658
2659   switch (code)
2660     {
2661     case LT:
2662       emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2663       break;
2664
2665     case GE:
2666       emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2667       break;
2668
2669     case LE:
2670       emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2671       break;
2672
2673     case GT:
2674       emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2675       break;
2676
2677     case LTU:
2678       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2679       break;
2680
2681     case GEU:
2682       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2683       break;
2684
2685     case LEU:
2686       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2687       break;
2688
2689     case GTU:
2690       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2691       break;
2692
2693     case NE:
2694       /* Handle NE as !EQ.  */
2695       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2696       emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2697       break;
2698
2699     case EQ:
2700       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2701       break;
2702
2703     default:
2704       gcc_unreachable ();
2705     }
2706
2707   DONE;
2708 })
2709
2710 (define_expand "vec_cmp<mode><v_int_equiv>"
2711   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2712         (match_operator 1 "comparison_operator"
2713             [(match_operand:VDQF 2 "register_operand")
2714              (match_operand:VDQF 3 "nonmemory_operand")]))]
2715   "TARGET_SIMD"
2716 {
2717   int use_zero_form = 0;
2718   enum rtx_code code = GET_CODE (operands[1]);
2719   rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2720
2721   rtx (*comparison) (rtx, rtx, rtx) = NULL;
2722
2723   switch (code)
2724     {
2725     case LE:
2726     case LT:
2727     case GE:
2728     case GT:
2729     case EQ:
2730       if (operands[3] == CONST0_RTX (<MODE>mode))
2731         {
2732           use_zero_form = 1;
2733           break;
2734         }
2735       /* Fall through.  */
2736     default:
2737       if (!REG_P (operands[3]))
2738         operands[3] = force_reg (<MODE>mode, operands[3]);
2739
2740       break;
2741     }
2742
2743   switch (code)
2744     {
2745     case LT:
2746       if (use_zero_form)
2747         {
2748           comparison = gen_aarch64_cmlt<mode>;
2749           break;
2750         }
2751       /* Fall through.  */
2752     case UNLT:
2753       std::swap (operands[2], operands[3]);
2754       /* Fall through.  */
2755     case UNGT:
2756     case GT:
2757       comparison = gen_aarch64_cmgt<mode>;
2758       break;
2759     case LE:
2760       if (use_zero_form)
2761         {
2762           comparison = gen_aarch64_cmle<mode>;
2763           break;
2764         }
2765       /* Fall through.  */
2766     case UNLE:
2767       std::swap (operands[2], operands[3]);
2768       /* Fall through.  */
2769     case UNGE:
2770     case GE:
2771       comparison = gen_aarch64_cmge<mode>;
2772       break;
2773     case NE:
2774     case EQ:
2775       comparison = gen_aarch64_cmeq<mode>;
2776       break;
2777     case UNEQ:
2778     case ORDERED:
2779     case UNORDERED:
2780     case LTGT:
2781       break;
2782     default:
2783       gcc_unreachable ();
2784     }
2785
2786   switch (code)
2787     {
2788     case UNGE:
2789     case UNGT:
2790     case UNLE:
2791     case UNLT:
2792       {
2793         /* All of the above must not raise any FP exceptions.  Thus we first
2794            check each operand for NaNs and force any elements containing NaN to
2795            zero before using them in the compare.
2796            Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2797                                      (cm<cc> (isnan (a) ? 0.0 : a,
2798                                               isnan (b) ? 0.0 : b))
2799            We use the following transformations for doing the comparisions:
2800            a UNGE b -> a GE b
2801            a UNGT b -> a GT b
2802            a UNLE b -> b GE a
2803            a UNLT b -> b GT a.  */
2804
2805         rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2806         rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2807         rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2808         emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2809         emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2810         emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2811         emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2812                                           lowpart_subreg (<V_INT_EQUIV>mode,
2813                                                           operands[2],
2814                                                           <MODE>mode)));
2815         emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2816                                           lowpart_subreg (<V_INT_EQUIV>mode,
2817                                                           operands[3],
2818                                                           <MODE>mode)));
2819         gcc_assert (comparison != NULL);
2820         emit_insn (comparison (operands[0],
2821                                lowpart_subreg (<MODE>mode,
2822                                                tmp0, <V_INT_EQUIV>mode),
2823                                lowpart_subreg (<MODE>mode,
2824                                                tmp1, <V_INT_EQUIV>mode)));
2825         emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2826       }
2827       break;
2828
2829     case LT:
2830     case LE:
2831     case GT:
2832     case GE:
2833     case EQ:
2834     case NE:
2835       /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
2836          As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
2837          a GE b -> a GE b
2838          a GT b -> a GT b
2839          a LE b -> b GE a
2840          a LT b -> b GT a
2841          a EQ b -> a EQ b
2842          a NE b -> ~(a EQ b)  */
2843       gcc_assert (comparison != NULL);
2844       emit_insn (comparison (operands[0], operands[2], operands[3]));
2845       if (code == NE)
2846         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2847       break;
2848
2849     case LTGT:
2850       /* LTGT is not guranteed to not generate a FP exception.  So let's
2851          go the faster way : ((a > b) || (b > a)).  */
2852       emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2853                                          operands[2], operands[3]));
2854       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2855       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2856       break;
2857
2858     case ORDERED:
2859     case UNORDERED:
2860     case UNEQ:
2861       /* cmeq (a, a) & cmeq (b, b).  */
2862       emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2863                                          operands[2], operands[2]));
2864       emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2865       emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2866
2867       if (code == UNORDERED)
2868         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2869       else if (code == UNEQ)
2870         {
2871           emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
2872           emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
2873         }
2874       break;
2875
2876     default:
2877       gcc_unreachable ();
2878     }
2879
2880   DONE;
2881 })
2882
2883 (define_expand "vec_cmpu<mode><mode>"
2884   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2885           (match_operator 1 "comparison_operator"
2886             [(match_operand:VSDQ_I_DI 2 "register_operand")
2887              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2888   "TARGET_SIMD"
2889 {
2890   emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2891                                       operands[2], operands[3]));
2892   DONE;
2893 })
2894
2895 (define_expand "vcond<mode><mode>"
2896   [(set (match_operand:VALLDI 0 "register_operand")
2897         (if_then_else:VALLDI
2898           (match_operator 3 "comparison_operator"
2899             [(match_operand:VALLDI 4 "register_operand")
2900              (match_operand:VALLDI 5 "nonmemory_operand")])
2901           (match_operand:VALLDI 1 "nonmemory_operand")
2902           (match_operand:VALLDI 2 "nonmemory_operand")))]
2903   "TARGET_SIMD"
2904 {
2905   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2906   enum rtx_code code = GET_CODE (operands[3]);
2907
2908   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2909      it as well as switch operands 1/2 in order to avoid the additional
2910      NOT instruction.  */
2911   if (code == NE)
2912     {
2913       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2914                                     operands[4], operands[5]);
2915       std::swap (operands[1], operands[2]);
2916     }
2917   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2918                                              operands[4], operands[5]));
2919   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2920                                                  operands[2], mask));
2921
2922   DONE;
2923 })
2924
2925 (define_expand "vcond<v_cmp_mixed><mode>"
2926   [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2927         (if_then_else:<V_cmp_mixed>
2928           (match_operator 3 "comparison_operator"
2929             [(match_operand:VDQF_COND 4 "register_operand")
2930              (match_operand:VDQF_COND 5 "nonmemory_operand")])
2931           (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2932           (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2933   "TARGET_SIMD"
2934 {
2935   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2936   enum rtx_code code = GET_CODE (operands[3]);
2937
2938   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2939      it as well as switch operands 1/2 in order to avoid the additional
2940      NOT instruction.  */
2941   if (code == NE)
2942     {
2943       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2944                                     operands[4], operands[5]);
2945       std::swap (operands[1], operands[2]);
2946     }
2947   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2948                                              operands[4], operands[5]));
2949   emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
2950                                                 operands[0], operands[1],
2951                                                 operands[2], mask));
2952
2953   DONE;
2954 })
2955
2956 (define_expand "vcondu<mode><mode>"
2957   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2958         (if_then_else:VSDQ_I_DI
2959           (match_operator 3 "comparison_operator"
2960             [(match_operand:VSDQ_I_DI 4 "register_operand")
2961              (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2962           (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2963           (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2964   "TARGET_SIMD"
2965 {
2966   rtx mask = gen_reg_rtx (<MODE>mode);
2967   enum rtx_code code = GET_CODE (operands[3]);
2968
2969   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2970      it as well as switch operands 1/2 in order to avoid the additional
2971      NOT instruction.  */
2972   if (code == NE)
2973     {
2974       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2975                                     operands[4], operands[5]);
2976       std::swap (operands[1], operands[2]);
2977     }
2978   emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2979                                       operands[4], operands[5]));
2980   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2981                                                  operands[2], mask));
2982   DONE;
2983 })
2984
2985 (define_expand "vcondu<mode><v_cmp_mixed>"
2986   [(set (match_operand:VDQF 0 "register_operand")
2987         (if_then_else:VDQF
2988           (match_operator 3 "comparison_operator"
2989             [(match_operand:<V_cmp_mixed> 4 "register_operand")
2990              (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2991           (match_operand:VDQF 1 "nonmemory_operand")
2992           (match_operand:VDQF 2 "nonmemory_operand")))]
2993   "TARGET_SIMD"
2994 {
2995   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2996   enum rtx_code code = GET_CODE (operands[3]);
2997
2998   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2999      it as well as switch operands 1/2 in order to avoid the additional
3000      NOT instruction.  */
3001   if (code == NE)
3002     {
3003       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3004                                     operands[4], operands[5]);
3005       std::swap (operands[1], operands[2]);
3006     }
3007   emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
3008                                                   mask, operands[3],
3009                                                   operands[4], operands[5]));
3010   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3011                                                  operands[2], mask));
3012   DONE;
3013 })
3014
3015 ;; Patterns for AArch64 SIMD Intrinsics.
3016
3017 ;; Lane extraction with sign extension to general purpose register.
3018 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3019   [(set (match_operand:GPI 0 "register_operand" "=r")
3020         (sign_extend:GPI
3021           (vec_select:<VEL>
3022             (match_operand:VDQQH 1 "register_operand" "w")
3023             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3024   "TARGET_SIMD"
3025   {
3026     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3027     return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3028   }
3029   [(set_attr "type" "neon_to_gp<q>")]
3030 )
3031
3032 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
3033   [(set (match_operand:GPI 0 "register_operand" "=r")
3034         (zero_extend:GPI
3035           (vec_select:<VEL>
3036             (match_operand:VDQQH 1 "register_operand" "w")
3037             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3038   "TARGET_SIMD"
3039   {
3040     operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3041                                            INTVAL (operands[2]));
3042     return "umov\\t%w0, %1.<Vetype>[%2]";
3043   }
3044   [(set_attr "type" "neon_to_gp<q>")]
3045 )
3046
3047 ;; Lane extraction of a value, neither sign nor zero extension
3048 ;; is guaranteed so upper bits should be considered undefined.
3049 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3050 (define_insn "aarch64_get_lane<mode>"
3051   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3052         (vec_select:<VEL>
3053           (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3054           (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3055   "TARGET_SIMD"
3056   {
3057     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3058     switch (which_alternative)
3059       {
3060         case 0:
3061           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3062         case 1:
3063           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3064         case 2:
3065           return "st1\\t{%1.<Vetype>}[%2], %0";
3066         default:
3067           gcc_unreachable ();
3068       }
3069   }
3070   [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3071 )
3072
3073 (define_insn "load_pair_lanes<mode>"
3074   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3075         (vec_concat:<VDBL>
3076            (match_operand:VDC 1 "memory_operand" "Utq")
3077            (match_operand:VDC 2 "memory_operand" "m")))]
3078   "TARGET_SIMD && !STRICT_ALIGNMENT
3079    && rtx_equal_p (XEXP (operands[2], 0),
3080                    plus_constant (Pmode,
3081                                   XEXP (operands[1], 0),
3082                                   GET_MODE_SIZE (<MODE>mode)))"
3083   "ldr\\t%q0, %1"
3084   [(set_attr "type" "neon_load1_1reg_q")]
3085 )
3086
3087 (define_insn "store_pair_lanes<mode>"
3088   [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
3089         (vec_concat:<VDBL>
3090            (match_operand:VDC 1 "register_operand" "w, r")
3091            (match_operand:VDC 2 "register_operand" "w, r")))]
3092   "TARGET_SIMD"
3093   "@
3094    stp\\t%d1, %d2, %y0
3095    stp\\t%x1, %x2, %y0"
3096   [(set_attr "type" "neon_stp, store_16")]
3097 )
3098
3099 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3100 ;; dest vector.
3101
3102 (define_insn "*aarch64_combinez<mode>"
3103   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3104         (vec_concat:<VDBL>
3105           (match_operand:VDC 1 "general_operand" "w,?r,m")
3106           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3107   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3108   "@
3109    mov\\t%0.8b, %1.8b
3110    fmov\t%d0, %1
3111    ldr\\t%d0, %1"
3112   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3113    (set_attr "arch" "simd,fp,simd")]
3114 )
3115
3116 (define_insn "*aarch64_combinez_be<mode>"
3117   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3118         (vec_concat:<VDBL>
3119           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3120           (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3121   "TARGET_SIMD && BYTES_BIG_ENDIAN"
3122   "@
3123    mov\\t%0.8b, %1.8b
3124    fmov\t%d0, %1
3125    ldr\\t%d0, %1"
3126   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3127    (set_attr "arch" "simd,fp,simd")]
3128 )
3129
3130 (define_expand "aarch64_combine<mode>"
3131   [(match_operand:<VDBL> 0 "register_operand")
3132    (match_operand:VDC 1 "register_operand")
3133    (match_operand:VDC 2 "register_operand")]
3134   "TARGET_SIMD"
3135 {
3136   aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3137
3138   DONE;
3139 }
3140 )
3141
3142 (define_expand "@aarch64_simd_combine<mode>"
3143   [(match_operand:<VDBL> 0 "register_operand")
3144    (match_operand:VDC 1 "register_operand")
3145    (match_operand:VDC 2 "register_operand")]
3146   "TARGET_SIMD"
3147   {
3148     emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3149     emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3150     DONE;
3151   }
3152 [(set_attr "type" "multiple")]
3153 )
3154
3155 ;; <su><addsub>l<q>.
3156
3157 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3158  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3159        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3160                            (match_operand:VQW 1 "register_operand" "w")
3161                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3162                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3163                            (match_operand:VQW 2 "register_operand" "w")
3164                            (match_dup 3)))))]
3165   "TARGET_SIMD"
3166   "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3167   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3168 )
3169
3170 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3171  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3172        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3173                            (match_operand:VQW 1 "register_operand" "w")
3174                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3175                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3176                            (match_operand:VQW 2 "register_operand" "w")
3177                            (match_dup 3)))))]
3178   "TARGET_SIMD"
3179   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3180   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3181 )
3182
3183
3184 (define_expand "aarch64_saddl2<mode>"
3185   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3186    (match_operand:VQW 1 "register_operand" "w")
3187    (match_operand:VQW 2 "register_operand" "w")]
3188   "TARGET_SIMD"
3189 {
3190   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3191   emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3192                                                   operands[2], p));
3193   DONE;
3194 })
3195
3196 (define_expand "aarch64_uaddl2<mode>"
3197   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3198    (match_operand:VQW 1 "register_operand" "w")
3199    (match_operand:VQW 2 "register_operand" "w")]
3200   "TARGET_SIMD"
3201 {
3202   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3203   emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3204                                                   operands[2], p));
3205   DONE;
3206 })
3207
3208 (define_expand "aarch64_ssubl2<mode>"
3209   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3210    (match_operand:VQW 1 "register_operand" "w")
3211    (match_operand:VQW 2 "register_operand" "w")]
3212   "TARGET_SIMD"
3213 {
3214   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3215   emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3216                                                 operands[2], p));
3217   DONE;
3218 })
3219
3220 (define_expand "aarch64_usubl2<mode>"
3221   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3222    (match_operand:VQW 1 "register_operand" "w")
3223    (match_operand:VQW 2 "register_operand" "w")]
3224   "TARGET_SIMD"
3225 {
3226   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3227   emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3228                                                 operands[2], p));
3229   DONE;
3230 })
3231
3232 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3233  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3234        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3235                            (match_operand:VD_BHSI 1 "register_operand" "w"))
3236                        (ANY_EXTEND:<VWIDE>
3237                            (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3238   "TARGET_SIMD"
3239   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3240   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3241 )
3242
3243 ;; <su><addsub>w<q>.
3244
3245 (define_expand "widen_ssum<mode>3"
3246   [(set (match_operand:<VDBLW> 0 "register_operand" "")
3247         (plus:<VDBLW> (sign_extend:<VDBLW>
3248                         (match_operand:VQW 1 "register_operand" ""))
3249                       (match_operand:<VDBLW> 2 "register_operand" "")))]
3250   "TARGET_SIMD"
3251   {
3252     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3253     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3254
3255     emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3256                                                 operands[1], p));
3257     emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3258     DONE;
3259   }
3260 )
3261
3262 (define_expand "widen_ssum<mode>3"
3263   [(set (match_operand:<VWIDE> 0 "register_operand" "")
3264         (plus:<VWIDE> (sign_extend:<VWIDE>
3265                         (match_operand:VD_BHSI 1 "register_operand" ""))
3266                       (match_operand:<VWIDE> 2 "register_operand" "")))]
3267   "TARGET_SIMD"
3268 {
3269   emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3270   DONE;
3271 })
3272
3273 (define_expand "widen_usum<mode>3"
3274   [(set (match_operand:<VDBLW> 0 "register_operand" "")
3275         (plus:<VDBLW> (zero_extend:<VDBLW>
3276                         (match_operand:VQW 1 "register_operand" ""))
3277                       (match_operand:<VDBLW> 2 "register_operand" "")))]
3278   "TARGET_SIMD"
3279   {
3280     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3281     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3282
3283     emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3284                                                  operands[1], p));
3285     emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3286     DONE;
3287   }
3288 )
3289
3290 (define_expand "widen_usum<mode>3"
3291   [(set (match_operand:<VWIDE> 0 "register_operand" "")
3292         (plus:<VWIDE> (zero_extend:<VWIDE>
3293                         (match_operand:VD_BHSI 1 "register_operand" ""))
3294                       (match_operand:<VWIDE> 2 "register_operand" "")))]
3295   "TARGET_SIMD"
3296 {
3297   emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3298   DONE;
3299 })
3300
3301 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
3302   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3303         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3304           (ANY_EXTEND:<VWIDE>
3305             (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3306   "TARGET_SIMD"
3307   "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3308   [(set_attr "type" "neon_sub_widen")]
3309 )
3310
3311 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
3312   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3313         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3314           (ANY_EXTEND:<VWIDE>
3315             (vec_select:<VHALF>
3316               (match_operand:VQW 2 "register_operand" "w")
3317               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3318   "TARGET_SIMD"
3319   "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3320   [(set_attr "type" "neon_sub_widen")]
3321 )
3322
3323 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
3324   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3325         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3326           (ANY_EXTEND:<VWIDE>
3327             (vec_select:<VHALF>
3328               (match_operand:VQW 2 "register_operand" "w")
3329               (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3330   "TARGET_SIMD"
3331   "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3332   [(set_attr "type" "neon_sub_widen")]
3333 )
3334
3335 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
3336   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3337         (plus:<VWIDE>
3338           (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
3339           (match_operand:<VWIDE> 1 "register_operand" "w")))]
3340   "TARGET_SIMD"
3341   "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3342   [(set_attr "type" "neon_add_widen")]
3343 )
3344
3345 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
3346   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3347         (plus:<VWIDE>
3348           (ANY_EXTEND:<VWIDE>
3349             (vec_select:<VHALF>
3350               (match_operand:VQW 2 "register_operand" "w")
3351               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3352           (match_operand:<VWIDE> 1 "register_operand" "w")))]
3353   "TARGET_SIMD"
3354   "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3355   [(set_attr "type" "neon_add_widen")]
3356 )
3357
3358 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
3359   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3360         (plus:<VWIDE>
3361           (ANY_EXTEND:<VWIDE>
3362             (vec_select:<VHALF>
3363               (match_operand:VQW 2 "register_operand" "w")
3364               (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3365           (match_operand:<VWIDE> 1 "register_operand" "w")))]
3366   "TARGET_SIMD"
3367   "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3368   [(set_attr "type" "neon_add_widen")]
3369 )
3370
3371 (define_expand "aarch64_saddw2<mode>"
3372   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3373    (match_operand:<VWIDE> 1 "register_operand" "w")
3374    (match_operand:VQW 2 "register_operand" "w")]
3375   "TARGET_SIMD"
3376 {
3377   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3378   emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3379                                                 operands[2], p));
3380   DONE;
3381 })
3382
3383 (define_expand "aarch64_uaddw2<mode>"
3384   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3385    (match_operand:<VWIDE> 1 "register_operand" "w")
3386    (match_operand:VQW 2 "register_operand" "w")]
3387   "TARGET_SIMD"
3388 {
3389   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3390   emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3391                                                 operands[2], p));
3392   DONE;
3393 })
3394
3395
3396 (define_expand "aarch64_ssubw2<mode>"
3397   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3398    (match_operand:<VWIDE> 1 "register_operand" "w")
3399    (match_operand:VQW 2 "register_operand" "w")]
3400   "TARGET_SIMD"
3401 {
3402   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3403   emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3404                                                 operands[2], p));
3405   DONE;
3406 })
3407
3408 (define_expand "aarch64_usubw2<mode>"
3409   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3410    (match_operand:<VWIDE> 1 "register_operand" "w")
3411    (match_operand:VQW 2 "register_operand" "w")]
3412   "TARGET_SIMD"
3413 {
3414   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3415   emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3416                                                 operands[2], p));
3417   DONE;
3418 })
3419
3420 ;; <su><r>h<addsub>.
3421
3422 (define_expand "<u>avg<mode>3_floor"
3423   [(set (match_operand:VDQ_BHSI 0 "register_operand")
3424         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3425                           (match_operand:VDQ_BHSI 2 "register_operand")]
3426                          HADD))]
3427   "TARGET_SIMD"
3428 )
3429
3430 (define_expand "<u>avg<mode>3_ceil"
3431   [(set (match_operand:VDQ_BHSI 0 "register_operand")
3432         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3433                           (match_operand:VDQ_BHSI 2 "register_operand")]
3434                          RHADD))]
3435   "TARGET_SIMD"
3436 )
3437
3438 (define_insn "aarch64_<sur>h<addsub><mode>"
3439   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3440         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3441                       (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3442                      HADDSUB))]
3443   "TARGET_SIMD"
3444   "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3445   [(set_attr "type" "neon_<addsub>_halve<q>")]
3446 )
3447
3448 ;; <r><addsub>hn<q>.
3449
3450 (define_insn "aarch64_<sur><addsub>hn<mode>"
3451   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3452         (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3453                             (match_operand:VQN 2 "register_operand" "w")]
3454                            ADDSUBHN))]
3455   "TARGET_SIMD"
3456   "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3457   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3458 )
3459
3460 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3461   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3462         (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3463                              (match_operand:VQN 2 "register_operand" "w")
3464                              (match_operand:VQN 3 "register_operand" "w")]
3465                             ADDSUBHN2))]
3466   "TARGET_SIMD"
3467   "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3468   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3469 )
3470
3471 ;; pmul.
3472
3473 (define_insn "aarch64_pmul<mode>"
3474   [(set (match_operand:VB 0 "register_operand" "=w")
3475         (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3476                     (match_operand:VB 2 "register_operand" "w")]
3477                    UNSPEC_PMUL))]
3478  "TARGET_SIMD"
3479  "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3480   [(set_attr "type" "neon_mul_<Vetype><q>")]
3481 )
3482
3483 ;; fmulx.
3484
3485 (define_insn "aarch64_fmulx<mode>"
3486   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3487         (unspec:VHSDF_HSDF
3488           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3489            (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3490            UNSPEC_FMULX))]
3491  "TARGET_SIMD"
3492  "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3493  [(set_attr "type" "neon_fp_mul_<stype>")]
3494 )
3495
3496 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3497
3498 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3499   [(set (match_operand:VDQSF 0 "register_operand" "=w")
3500         (unspec:VDQSF
3501          [(match_operand:VDQSF 1 "register_operand" "w")
3502           (vec_duplicate:VDQSF
3503            (vec_select:<VEL>
3504             (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3505             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3506          UNSPEC_FMULX))]
3507   "TARGET_SIMD"
3508   {
3509     operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3510     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3511   }
3512   [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3513 )
3514
3515 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3516
3517 (define_insn "*aarch64_mulx_elt<mode>"
3518   [(set (match_operand:VDQF 0 "register_operand" "=w")
3519         (unspec:VDQF
3520          [(match_operand:VDQF 1 "register_operand" "w")
3521           (vec_duplicate:VDQF
3522            (vec_select:<VEL>
3523             (match_operand:VDQF 2 "register_operand" "w")
3524             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3525          UNSPEC_FMULX))]
3526   "TARGET_SIMD"
3527   {
3528     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3529     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3530   }
3531   [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3532 )
3533
3534 ;; vmulxq_lane
3535
3536 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3537   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3538         (unspec:VHSDF
3539          [(match_operand:VHSDF 1 "register_operand" "w")
3540           (vec_duplicate:VHSDF
3541             (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3542          UNSPEC_FMULX))]
3543   "TARGET_SIMD"
3544   "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3545   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3546 )
3547
3548 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3549 ;; vmulxd_lane_f64 ==  vmulx_lane_f64
3550 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3551
3552 (define_insn "*aarch64_vgetfmulx<mode>"
3553   [(set (match_operand:<VEL> 0 "register_operand" "=w")
3554         (unspec:<VEL>
3555          [(match_operand:<VEL> 1 "register_operand" "w")
3556           (vec_select:<VEL>
3557            (match_operand:VDQF 2 "register_operand" "w")
3558             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3559          UNSPEC_FMULX))]
3560   "TARGET_SIMD"
3561   {
3562     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3563     return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3564   }
3565   [(set_attr "type" "fmul<Vetype>")]
3566 )
3567 ;; <su>q<addsub>
3568
3569 (define_insn "aarch64_<su_optab><optab><mode>"
3570   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3571         (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3572                           (match_operand:VSDQ_I 2 "register_operand" "w")))]
3573   "TARGET_SIMD"
3574   "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3575   [(set_attr "type" "neon_<optab><q>")]
3576 )
3577
3578 ;; suqadd and usqadd
3579
3580 (define_insn "aarch64_<sur>qadd<mode>"
3581   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3582         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3583                         (match_operand:VSDQ_I 2 "register_operand" "w")]
3584                        USSUQADD))]
3585   "TARGET_SIMD"
3586   "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3587   [(set_attr "type" "neon_qadd<q>")]
3588 )
3589
3590 ;; sqmovun
3591
3592 (define_insn "aarch64_sqmovun<mode>"
3593   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3594         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3595                             UNSPEC_SQXTUN))]
3596    "TARGET_SIMD"
3597    "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3598    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3599 )
3600
3601 ;; sqmovn and uqmovn
3602
3603 (define_insn "aarch64_<sur>qmovn<mode>"
3604   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3605         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3606                             SUQMOVN))]
3607   "TARGET_SIMD"
3608   "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3609    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3610 )
3611
3612 ;; <su>q<absneg>
3613
3614 (define_insn "aarch64_s<optab><mode>"
3615   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3616         (UNQOPS:VSDQ_I
3617           (match_operand:VSDQ_I 1 "register_operand" "w")))]
3618   "TARGET_SIMD"
3619   "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3620   [(set_attr "type" "neon_<optab><q>")]
3621 )
3622
3623 ;; sq<r>dmulh.
3624
3625 (define_insn "aarch64_sq<r>dmulh<mode>"
3626   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3627         (unspec:VSDQ_HSI
3628           [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3629            (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3630          VQDMULH))]
3631   "TARGET_SIMD"
3632   "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3633   [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3634 )
3635
3636 ;; sq<r>dmulh_lane
3637
3638 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3639   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3640         (unspec:VDQHS
3641           [(match_operand:VDQHS 1 "register_operand" "w")
3642            (vec_select:<VEL>
3643              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3644              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3645          VQDMULH))]
3646   "TARGET_SIMD"
3647   "*
3648    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3649    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3650   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3651 )
3652
3653 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3654   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3655         (unspec:VDQHS
3656           [(match_operand:VDQHS 1 "register_operand" "w")
3657            (vec_select:<VEL>
3658              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3659              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3660          VQDMULH))]
3661   "TARGET_SIMD"
3662   "*
3663    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3664    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3665   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3666 )
3667
3668 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3669   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3670         (unspec:SD_HSI
3671           [(match_operand:SD_HSI 1 "register_operand" "w")
3672            (vec_select:<VEL>
3673              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3674              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3675          VQDMULH))]
3676   "TARGET_SIMD"
3677   "*
3678    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3679    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3680   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3681 )
3682
3683 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3684   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3685         (unspec:SD_HSI
3686           [(match_operand:SD_HSI 1 "register_operand" "w")
3687            (vec_select:<VEL>
3688              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3689              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3690          VQDMULH))]
3691   "TARGET_SIMD"
3692   "*
3693    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3694    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3695   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3696 )
3697
3698 ;; sqrdml[as]h.
3699
3700 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3701   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3702         (unspec:VSDQ_HSI
3703           [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3704            (match_operand:VSDQ_HSI 2 "register_operand" "w")
3705            (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3706           SQRDMLH_AS))]
3707    "TARGET_SIMD_RDMA"
3708    "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3709    [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3710 )
3711
3712 ;; sqrdml[as]h_lane.
3713
3714 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3715   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3716         (unspec:VDQHS
3717           [(match_operand:VDQHS 1 "register_operand" "0")
3718            (match_operand:VDQHS 2 "register_operand" "w")
3719            (vec_select:<VEL>
3720              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3721              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3722           SQRDMLH_AS))]
3723    "TARGET_SIMD_RDMA"
3724    {
3725      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3726      return
3727       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3728    }
3729    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3730 )
3731
3732 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3733   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3734         (unspec:SD_HSI
3735           [(match_operand:SD_HSI 1 "register_operand" "0")
3736            (match_operand:SD_HSI 2 "register_operand" "w")
3737            (vec_select:<VEL>
3738              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3739              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3740           SQRDMLH_AS))]
3741    "TARGET_SIMD_RDMA"
3742    {
3743      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3744      return
3745       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3746    }
3747    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3748 )
3749
3750 ;; sqrdml[as]h_laneq.
3751
3752 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3753   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3754         (unspec:VDQHS
3755           [(match_operand:VDQHS 1 "register_operand" "0")
3756            (match_operand:VDQHS 2 "register_operand" "w")
3757            (vec_select:<VEL>
3758              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3759              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3760           SQRDMLH_AS))]
3761    "TARGET_SIMD_RDMA"
3762    {
3763      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3764      return
3765       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3766    }
3767    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3768 )
3769
3770 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3771   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3772         (unspec:SD_HSI
3773           [(match_operand:SD_HSI 1 "register_operand" "0")
3774            (match_operand:SD_HSI 2 "register_operand" "w")
3775            (vec_select:<VEL>
3776              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3777              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3778           SQRDMLH_AS))]
3779    "TARGET_SIMD_RDMA"
3780    {
3781      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3782      return
3783       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3784    }
3785    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3786 )
3787
3788 ;; vqdml[sa]l
3789
3790 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3791   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3792         (SBINQOPS:<VWIDE>
3793           (match_operand:<VWIDE> 1 "register_operand" "0")
3794           (ss_ashift:<VWIDE>
3795               (mult:<VWIDE>
3796                 (sign_extend:<VWIDE>
3797                       (match_operand:VSD_HSI 2 "register_operand" "w"))
3798                 (sign_extend:<VWIDE>
3799                       (match_operand:VSD_HSI 3 "register_operand" "w")))
3800               (const_int 1))))]
3801   "TARGET_SIMD"
3802   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3803   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3804 )
3805
3806 ;; vqdml[sa]l_lane
3807
3808 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3809   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3810         (SBINQOPS:<VWIDE>
3811           (match_operand:<VWIDE> 1 "register_operand" "0")
3812           (ss_ashift:<VWIDE>
3813             (mult:<VWIDE>
3814               (sign_extend:<VWIDE>
3815                 (match_operand:VD_HSI 2 "register_operand" "w"))
3816               (sign_extend:<VWIDE>
3817                 (vec_duplicate:VD_HSI
3818                   (vec_select:<VEL>
3819                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3820                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3821               ))
3822             (const_int 1))))]
3823   "TARGET_SIMD"
3824   {
3825     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3826     return
3827       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3828   }
3829   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3830 )
3831
3832 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3833   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3834         (SBINQOPS:<VWIDE>
3835           (match_operand:<VWIDE> 1 "register_operand" "0")
3836           (ss_ashift:<VWIDE>
3837             (mult:<VWIDE>
3838               (sign_extend:<VWIDE>
3839                 (match_operand:VD_HSI 2 "register_operand" "w"))
3840               (sign_extend:<VWIDE>
3841                 (vec_duplicate:VD_HSI
3842                   (vec_select:<VEL>
3843                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3844                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3845               ))
3846             (const_int 1))))]
3847   "TARGET_SIMD"
3848   {
3849     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3850     return
3851       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3852   }
3853   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3854 )
3855
3856 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3857   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3858         (SBINQOPS:<VWIDE>
3859           (match_operand:<VWIDE> 1 "register_operand" "0")
3860           (ss_ashift:<VWIDE>
3861             (mult:<VWIDE>
3862               (sign_extend:<VWIDE>
3863                 (match_operand:SD_HSI 2 "register_operand" "w"))
3864               (sign_extend:<VWIDE>
3865                 (vec_select:<VEL>
3866                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3867                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3868               )
3869             (const_int 1))))]
3870   "TARGET_SIMD"
3871   {
3872     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3873     return
3874       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3875   }
3876   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3877 )
3878
3879 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3880   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3881         (SBINQOPS:<VWIDE>
3882           (match_operand:<VWIDE> 1 "register_operand" "0")
3883           (ss_ashift:<VWIDE>
3884             (mult:<VWIDE>
3885               (sign_extend:<VWIDE>
3886                 (match_operand:SD_HSI 2 "register_operand" "w"))
3887               (sign_extend:<VWIDE>
3888                 (vec_select:<VEL>
3889                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3890                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3891               )
3892             (const_int 1))))]
3893   "TARGET_SIMD"
3894   {
3895     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3896     return
3897       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3898   }
3899   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3900 )
3901
3902 ;; vqdml[sa]l_n
3903
3904 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3905   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3906         (SBINQOPS:<VWIDE>
3907           (match_operand:<VWIDE> 1 "register_operand" "0")
3908           (ss_ashift:<VWIDE>
3909               (mult:<VWIDE>
3910                 (sign_extend:<VWIDE>
3911                       (match_operand:VD_HSI 2 "register_operand" "w"))
3912                 (sign_extend:<VWIDE>
3913                   (vec_duplicate:VD_HSI
3914                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3915               (const_int 1))))]
3916   "TARGET_SIMD"
3917   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3918   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3919 )
3920
3921 ;; sqdml[as]l2
3922
3923 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3924   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3925         (SBINQOPS:<VWIDE>
3926          (match_operand:<VWIDE> 1 "register_operand" "0")
3927          (ss_ashift:<VWIDE>
3928              (mult:<VWIDE>
3929                (sign_extend:<VWIDE>
3930                  (vec_select:<VHALF>
3931                      (match_operand:VQ_HSI 2 "register_operand" "w")
3932                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3933                (sign_extend:<VWIDE>
3934                  (vec_select:<VHALF>
3935                      (match_operand:VQ_HSI 3 "register_operand" "w")
3936                      (match_dup 4))))
3937              (const_int 1))))]
3938   "TARGET_SIMD"
3939   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3940   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3941 )
3942
3943 (define_expand "aarch64_sqdmlal2<mode>"
3944   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3945    (match_operand:<VWIDE> 1 "register_operand" "w")
3946    (match_operand:VQ_HSI 2 "register_operand" "w")
3947    (match_operand:VQ_HSI 3 "register_operand" "w")]
3948   "TARGET_SIMD"
3949 {
3950   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3951   emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3952                                                   operands[2], operands[3], p));
3953   DONE;
3954 })
3955
3956 (define_expand "aarch64_sqdmlsl2<mode>"
3957   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3958    (match_operand:<VWIDE> 1 "register_operand" "w")
3959    (match_operand:VQ_HSI 2 "register_operand" "w")
3960    (match_operand:VQ_HSI 3 "register_operand" "w")]
3961   "TARGET_SIMD"
3962 {
3963   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3964   emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3965                                                   operands[2], operands[3], p));
3966   DONE;
3967 })
3968
3969 ;; vqdml[sa]l2_lane
3970
3971 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3972   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3973         (SBINQOPS:<VWIDE>
3974           (match_operand:<VWIDE> 1 "register_operand" "0")
3975           (ss_ashift:<VWIDE>
3976               (mult:<VWIDE>
3977                 (sign_extend:<VWIDE>
3978                   (vec_select:<VHALF>
3979                     (match_operand:VQ_HSI 2 "register_operand" "w")
3980                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3981                 (sign_extend:<VWIDE>
3982                   (vec_duplicate:<VHALF>
3983                     (vec_select:<VEL>
3984                       (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3985                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3986                     ))))
3987               (const_int 1))))]
3988   "TARGET_SIMD"
3989   {
3990     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3991     return
3992      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3993   }
3994   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3995 )
3996
3997 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3998   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3999         (SBINQOPS:<VWIDE>
4000           (match_operand:<VWIDE> 1 "register_operand" "0")
4001           (ss_ashift:<VWIDE>
4002               (mult:<VWIDE>
4003                 (sign_extend:<VWIDE>
4004                   (vec_select:<VHALF>
4005                     (match_operand:VQ_HSI 2 "register_operand" "w")
4006                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4007                 (sign_extend:<VWIDE>
4008                   (vec_duplicate:<VHALF>
4009                     (vec_select:<VEL>
4010                       (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4011                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4012                     ))))
4013               (const_int 1))))]
4014   "TARGET_SIMD"
4015   {
4016     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4017     return
4018      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4019   }
4020   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4021 )
4022
4023 (define_expand "aarch64_sqdmlal2_lane<mode>"
4024   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4025    (match_operand:<VWIDE> 1 "register_operand" "w")
4026    (match_operand:VQ_HSI 2 "register_operand" "w")
4027    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4028    (match_operand:SI 4 "immediate_operand" "i")]
4029   "TARGET_SIMD"
4030 {
4031   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4032   emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
4033                                                        operands[2], operands[3],
4034                                                        operands[4], p));
4035   DONE;
4036 })
4037
4038 (define_expand "aarch64_sqdmlal2_laneq<mode>"
4039   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4040    (match_operand:<VWIDE> 1 "register_operand" "w")
4041    (match_operand:VQ_HSI 2 "register_operand" "w")
4042    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4043    (match_operand:SI 4 "immediate_operand" "i")]
4044   "TARGET_SIMD"
4045 {
4046   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4047   emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
4048                                                        operands[2], operands[3],
4049                                                        operands[4], p));
4050   DONE;
4051 })
4052
4053 (define_expand "aarch64_sqdmlsl2_lane<mode>"
4054   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4055    (match_operand:<VWIDE> 1 "register_operand" "w")
4056    (match_operand:VQ_HSI 2 "register_operand" "w")
4057    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4058    (match_operand:SI 4 "immediate_operand" "i")]
4059   "TARGET_SIMD"
4060 {
4061   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4062   emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
4063                                                        operands[2], operands[3],
4064                                                        operands[4], p));
4065   DONE;
4066 })
4067
4068 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
4069   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4070    (match_operand:<VWIDE> 1 "register_operand" "w")
4071    (match_operand:VQ_HSI 2 "register_operand" "w")
4072    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4073    (match_operand:SI 4 "immediate_operand" "i")]
4074   "TARGET_SIMD"
4075 {
4076   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4077   emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4078                                                        operands[2], operands[3],
4079                                                        operands[4], p));
4080   DONE;
4081 })
4082
4083 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4084   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4085         (SBINQOPS:<VWIDE>
4086           (match_operand:<VWIDE> 1 "register_operand" "0")
4087           (ss_ashift:<VWIDE>
4088             (mult:<VWIDE>
4089               (sign_extend:<VWIDE>
4090                 (vec_select:<VHALF>
4091                   (match_operand:VQ_HSI 2 "register_operand" "w")
4092                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4093               (sign_extend:<VWIDE>
4094                 (vec_duplicate:<VHALF>
4095                   (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4096             (const_int 1))))]
4097   "TARGET_SIMD"
4098   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4099   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4100 )
4101
4102 (define_expand "aarch64_sqdmlal2_n<mode>"
4103   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4104    (match_operand:<VWIDE> 1 "register_operand" "w")
4105    (match_operand:VQ_HSI 2 "register_operand" "w")
4106    (match_operand:<VEL> 3 "register_operand" "w")]
4107   "TARGET_SIMD"
4108 {
4109   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4110   emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4111                                                     operands[2], operands[3],
4112                                                     p));
4113   DONE;
4114 })
4115
4116 (define_expand "aarch64_sqdmlsl2_n<mode>"
4117   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4118    (match_operand:<VWIDE> 1 "register_operand" "w")
4119    (match_operand:VQ_HSI 2 "register_operand" "w")
4120    (match_operand:<VEL> 3 "register_operand" "w")]
4121   "TARGET_SIMD"
4122 {
4123   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4124   emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4125                                                     operands[2], operands[3],
4126                                                     p));
4127   DONE;
4128 })
4129
4130 ;; vqdmull
4131
4132 (define_insn "aarch64_sqdmull<mode>"
4133   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4134         (ss_ashift:<VWIDE>
4135              (mult:<VWIDE>
4136                (sign_extend:<VWIDE>
4137                      (match_operand:VSD_HSI 1 "register_operand" "w"))
4138                (sign_extend:<VWIDE>
4139                      (match_operand:VSD_HSI 2 "register_operand" "w")))
4140              (const_int 1)))]
4141   "TARGET_SIMD"
4142   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4143   [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4144 )
4145
4146 ;; vqdmull_lane
4147
4148 (define_insn "aarch64_sqdmull_lane<mode>"
4149   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4150         (ss_ashift:<VWIDE>
4151              (mult:<VWIDE>
4152                (sign_extend:<VWIDE>
4153                  (match_operand:VD_HSI 1 "register_operand" "w"))
4154                (sign_extend:<VWIDE>
4155                  (vec_duplicate:VD_HSI
4156                    (vec_select:<VEL>
4157                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4158                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4159                ))
4160              (const_int 1)))]
4161   "TARGET_SIMD"
4162   {
4163     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4164     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4165   }
4166   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4167 )
4168
4169 (define_insn "aarch64_sqdmull_laneq<mode>"
4170   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4171         (ss_ashift:<VWIDE>
4172              (mult:<VWIDE>
4173                (sign_extend:<VWIDE>
4174                  (match_operand:VD_HSI 1 "register_operand" "w"))
4175                (sign_extend:<VWIDE>
4176                  (vec_duplicate:VD_HSI
4177                    (vec_select:<VEL>
4178                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4179                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4180                ))
4181              (const_int 1)))]
4182   "TARGET_SIMD"
4183   {
4184     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4185     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4186   }
4187   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4188 )
4189
4190 (define_insn "aarch64_sqdmull_lane<mode>"
4191   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4192         (ss_ashift:<VWIDE>
4193              (mult:<VWIDE>
4194                (sign_extend:<VWIDE>
4195                  (match_operand:SD_HSI 1 "register_operand" "w"))
4196                (sign_extend:<VWIDE>
4197                  (vec_select:<VEL>
4198                    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4199                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4200                ))
4201              (const_int 1)))]
4202   "TARGET_SIMD"
4203   {
4204     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4205     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4206   }
4207   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4208 )
4209
4210 (define_insn "aarch64_sqdmull_laneq<mode>"
4211   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4212         (ss_ashift:<VWIDE>
4213              (mult:<VWIDE>
4214                (sign_extend:<VWIDE>
4215                  (match_operand:SD_HSI 1 "register_operand" "w"))
4216                (sign_extend:<VWIDE>
4217                  (vec_select:<VEL>
4218                    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4219                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4220                ))
4221              (const_int 1)))]
4222   "TARGET_SIMD"
4223   {
4224     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4225     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4226   }
4227   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4228 )
4229
4230 ;; vqdmull_n
4231
4232 (define_insn "aarch64_sqdmull_n<mode>"
4233   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4234         (ss_ashift:<VWIDE>
4235              (mult:<VWIDE>
4236                (sign_extend:<VWIDE>
4237                  (match_operand:VD_HSI 1 "register_operand" "w"))
4238                (sign_extend:<VWIDE>
4239                  (vec_duplicate:VD_HSI
4240                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4241                )
4242              (const_int 1)))]
4243   "TARGET_SIMD"
4244   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4245   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4246 )
4247
4248 ;; vqdmull2
4249
4250
4251
4252 (define_insn "aarch64_sqdmull2<mode>_internal"
4253   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4254         (ss_ashift:<VWIDE>
4255              (mult:<VWIDE>
4256                (sign_extend:<VWIDE>
4257                  (vec_select:<VHALF>
4258                    (match_operand:VQ_HSI 1 "register_operand" "w")
4259                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4260                (sign_extend:<VWIDE>
4261                  (vec_select:<VHALF>
4262                    (match_operand:VQ_HSI 2 "register_operand" "w")
4263                    (match_dup 3)))
4264                )
4265              (const_int 1)))]
4266   "TARGET_SIMD"
4267   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4268   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4269 )
4270
4271 (define_expand "aarch64_sqdmull2<mode>"
4272   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4273    (match_operand:VQ_HSI 1 "register_operand" "w")
4274    (match_operand:VQ_HSI 2 "register_operand" "w")]
4275   "TARGET_SIMD"
4276 {
4277   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4278   emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4279                                                   operands[2], p));
4280   DONE;
4281 })
4282
4283 ;; vqdmull2_lane
4284
4285 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4286   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4287         (ss_ashift:<VWIDE>
4288              (mult:<VWIDE>
4289                (sign_extend:<VWIDE>
4290                  (vec_select:<VHALF>
4291                    (match_operand:VQ_HSI 1 "register_operand" "w")
4292                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4293                (sign_extend:<VWIDE>
4294                  (vec_duplicate:<VHALF>
4295                    (vec_select:<VEL>
4296                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4297                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4298                ))
4299              (const_int 1)))]
4300   "TARGET_SIMD"
4301   {
4302     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4303     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4304   }
4305   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4306 )
4307
4308 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4309   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4310         (ss_ashift:<VWIDE>
4311              (mult:<VWIDE>
4312                (sign_extend:<VWIDE>
4313                  (vec_select:<VHALF>
4314                    (match_operand:VQ_HSI 1 "register_operand" "w")
4315                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4316                (sign_extend:<VWIDE>
4317                  (vec_duplicate:<VHALF>
4318                    (vec_select:<VEL>
4319                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4320                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4321                ))
4322              (const_int 1)))]
4323   "TARGET_SIMD"
4324   {
4325     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4326     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4327   }
4328   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4329 )
4330
4331 (define_expand "aarch64_sqdmull2_lane<mode>"
4332   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4333    (match_operand:VQ_HSI 1 "register_operand" "w")
4334    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4335    (match_operand:SI 3 "immediate_operand" "i")]
4336   "TARGET_SIMD"
4337 {
4338   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4339   emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4340                                                        operands[2], operands[3],
4341                                                        p));
4342   DONE;
4343 })
4344
4345 (define_expand "aarch64_sqdmull2_laneq<mode>"
4346   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4347    (match_operand:VQ_HSI 1 "register_operand" "w")
4348    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4349    (match_operand:SI 3 "immediate_operand" "i")]
4350   "TARGET_SIMD"
4351 {
4352   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4353   emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4354                                                        operands[2], operands[3],
4355                                                        p));
4356   DONE;
4357 })
4358
4359 ;; vqdmull2_n
4360
4361 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4362   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4363         (ss_ashift:<VWIDE>
4364              (mult:<VWIDE>
4365                (sign_extend:<VWIDE>
4366                  (vec_select:<VHALF>
4367                    (match_operand:VQ_HSI 1 "register_operand" "w")
4368                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4369                (sign_extend:<VWIDE>
4370                  (vec_duplicate:<VHALF>
4371                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4372                )
4373              (const_int 1)))]
4374   "TARGET_SIMD"
4375   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4376   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4377 )
4378
4379 (define_expand "aarch64_sqdmull2_n<mode>"
4380   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4381    (match_operand:VQ_HSI 1 "register_operand" "w")
4382    (match_operand:<VEL> 2 "register_operand" "w")]
4383   "TARGET_SIMD"
4384 {
4385   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4386   emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4387                                                     operands[2], p));
4388   DONE;
4389 })
4390
4391 ;; vshl
4392
4393 (define_insn "aarch64_<sur>shl<mode>"
4394   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4395         (unspec:VSDQ_I_DI
4396           [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4397            (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4398          VSHL))]
4399   "TARGET_SIMD"
4400   "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4401   [(set_attr "type" "neon_shift_reg<q>")]
4402 )
4403
4404
4405 ;; vqshl
4406
4407 (define_insn "aarch64_<sur>q<r>shl<mode>"
4408   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4409         (unspec:VSDQ_I
4410           [(match_operand:VSDQ_I 1 "register_operand" "w")
4411            (match_operand:VSDQ_I 2 "register_operand" "w")]
4412          VQSHL))]
4413   "TARGET_SIMD"
4414   "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4415   [(set_attr "type" "neon_sat_shift_reg<q>")]
4416 )
4417
4418 ;; vshll_n
4419
4420 (define_insn "aarch64_<sur>shll_n<mode>"
4421   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4422         (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4423                          (match_operand:SI 2
4424                            "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4425                          VSHLL))]
4426   "TARGET_SIMD"
4427   {
4428     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4429       return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4430     else
4431       return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4432   }
4433   [(set_attr "type" "neon_shift_imm_long")]
4434 )
4435
4436 ;; vshll_high_n
4437
4438 (define_insn "aarch64_<sur>shll2_n<mode>"
4439   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4440         (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4441                          (match_operand:SI 2 "immediate_operand" "i")]
4442                          VSHLL))]
4443   "TARGET_SIMD"
4444   {
4445     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4446       return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4447     else
4448       return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4449   }
4450   [(set_attr "type" "neon_shift_imm_long")]
4451 )
4452
4453 ;; vrshr_n
4454
4455 (define_insn "aarch64_<sur>shr_n<mode>"
4456   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4457         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4458                            (match_operand:SI 2
4459                              "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4460                           VRSHR_N))]
4461   "TARGET_SIMD"
4462   "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4463   [(set_attr "type" "neon_sat_shift_imm<q>")]
4464 )
4465
4466 ;; v(r)sra_n
4467
4468 (define_insn "aarch64_<sur>sra_n<mode>"
4469   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4470         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4471                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4472                        (match_operand:SI 3
4473                          "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4474                       VSRA))]
4475   "TARGET_SIMD"
4476   "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4477   [(set_attr "type" "neon_shift_acc<q>")]
4478 )
4479
4480 ;; vs<lr>i_n
4481
4482 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4483   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4484         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4485                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4486                        (match_operand:SI 3
4487                          "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4488                       VSLRI))]
4489   "TARGET_SIMD"
4490   "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4491   [(set_attr "type" "neon_shift_imm<q>")]
4492 )
4493
4494 ;; vqshl(u)
4495
4496 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4497   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4498         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4499                        (match_operand:SI 2
4500                          "aarch64_simd_shift_imm_<ve_mode>" "i")]
4501                       VQSHL_N))]
4502   "TARGET_SIMD"
4503   "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4504   [(set_attr "type" "neon_sat_shift_imm<q>")]
4505 )
4506
4507
4508 ;; vq(r)shr(u)n_n
4509
4510 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4511   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4512         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4513                             (match_operand:SI 2
4514                               "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4515                            VQSHRN_N))]
4516   "TARGET_SIMD"
4517   "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4518   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4519 )
4520
4521
4522 ;; cm(eq|ge|gt|lt|le)
4523 ;; Note, we have constraints for Dz and Z as different expanders
4524 ;; have different ideas of what should be passed to this pattern.
4525
4526 (define_insn "aarch64_cm<optab><mode>"
4527   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4528         (neg:<V_INT_EQUIV>
4529           (COMPARISONS:<V_INT_EQUIV>
4530             (match_operand:VDQ_I 1 "register_operand" "w,w")
4531             (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4532           )))]
4533   "TARGET_SIMD"
4534   "@
4535   cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4536   cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4537   [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4538 )
4539
4540 (define_insn_and_split "aarch64_cm<optab>di"
4541   [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4542         (neg:DI
4543           (COMPARISONS:DI
4544             (match_operand:DI 1 "register_operand" "w,w,r")
4545             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4546           )))
4547      (clobber (reg:CC CC_REGNUM))]
4548   "TARGET_SIMD"
4549   "#"
4550   "&& reload_completed"
4551   [(set (match_operand:DI 0 "register_operand")
4552         (neg:DI
4553           (COMPARISONS:DI
4554             (match_operand:DI 1 "register_operand")
4555             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4556           )))]
4557   {
4558     /* If we are in the general purpose register file,
4559        we split to a sequence of comparison and store.  */
4560     if (GP_REGNUM_P (REGNO (operands[0]))
4561         && GP_REGNUM_P (REGNO (operands[1])))
4562       {
4563         machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4564         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4565         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4566         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4567         DONE;
4568       }
4569     /* Otherwise, we expand to a similar pattern which does not
4570        clobber CC_REGNUM.  */
4571   }
4572   [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4573 )
4574
4575 (define_insn "*aarch64_cm<optab>di"
4576   [(set (match_operand:DI 0 "register_operand" "=w,w")
4577         (neg:DI
4578           (COMPARISONS:DI
4579             (match_operand:DI 1 "register_operand" "w,w")
4580             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4581           )))]
4582   "TARGET_SIMD && reload_completed"
4583   "@
4584   cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4585   cm<optab>\t%d0, %d1, #0"
4586   [(set_attr "type" "neon_compare, neon_compare_zero")]
4587 )
4588
4589 ;; cm(hs|hi)
4590
4591 (define_insn "aarch64_cm<optab><mode>"
4592   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4593         (neg:<V_INT_EQUIV>
4594           (UCOMPARISONS:<V_INT_EQUIV>
4595             (match_operand:VDQ_I 1 "register_operand" "w")
4596             (match_operand:VDQ_I 2 "register_operand" "w")
4597           )))]
4598   "TARGET_SIMD"
4599   "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4600   [(set_attr "type" "neon_compare<q>")]
4601 )
4602
4603 (define_insn_and_split "aarch64_cm<optab>di"
4604   [(set (match_operand:DI 0 "register_operand" "=w,r")
4605         (neg:DI
4606           (UCOMPARISONS:DI
4607             (match_operand:DI 1 "register_operand" "w,r")
4608             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4609           )))
4610     (clobber (reg:CC CC_REGNUM))]
4611   "TARGET_SIMD"
4612   "#"
4613   "&& reload_completed"
4614   [(set (match_operand:DI 0 "register_operand")
4615         (neg:DI
4616           (UCOMPARISONS:DI
4617             (match_operand:DI 1 "register_operand")
4618             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4619           )))]
4620   {
4621     /* If we are in the general purpose register file,
4622        we split to a sequence of comparison and store.  */
4623     if (GP_REGNUM_P (REGNO (operands[0]))
4624         && GP_REGNUM_P (REGNO (operands[1])))
4625       {
4626         machine_mode mode = CCmode;
4627         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4628         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4629         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4630         DONE;
4631       }
4632     /* Otherwise, we expand to a similar pattern which does not
4633        clobber CC_REGNUM.  */
4634   }
4635   [(set_attr "type" "neon_compare,multiple")]
4636 )
4637
4638 (define_insn "*aarch64_cm<optab>di"
4639   [(set (match_operand:DI 0 "register_operand" "=w")
4640         (neg:DI
4641           (UCOMPARISONS:DI
4642             (match_operand:DI 1 "register_operand" "w")
4643             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4644           )))]
4645   "TARGET_SIMD && reload_completed"
4646   "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4647   [(set_attr "type" "neon_compare")]
4648 )
4649
4650 ;; cmtst
4651
4652 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4653 ;; we don't have any insns using ne, and aarch64_vcond outputs
4654 ;; not (neg (eq (and x y) 0))
4655 ;; which is rewritten by simplify_rtx as
4656 ;; plus (eq (and x y) 0) -1.
4657
4658 (define_insn "aarch64_cmtst<mode>"
4659   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4660         (plus:<V_INT_EQUIV>
4661           (eq:<V_INT_EQUIV>
4662             (and:VDQ_I
4663               (match_operand:VDQ_I 1 "register_operand" "w")
4664               (match_operand:VDQ_I 2 "register_operand" "w"))
4665             (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4666           (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4667   ]
4668   "TARGET_SIMD"
4669   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4670   [(set_attr "type" "neon_tst<q>")]
4671 )
4672
4673 (define_insn_and_split "aarch64_cmtstdi"
4674   [(set (match_operand:DI 0 "register_operand" "=w,r")
4675         (neg:DI
4676           (ne:DI
4677             (and:DI
4678               (match_operand:DI 1 "register_operand" "w,r")
4679               (match_operand:DI 2 "register_operand" "w,r"))
4680             (const_int 0))))
4681     (clobber (reg:CC CC_REGNUM))]
4682   "TARGET_SIMD"
4683   "#"
4684   "&& reload_completed"
4685   [(set (match_operand:DI 0 "register_operand")
4686         (neg:DI
4687           (ne:DI
4688             (and:DI
4689               (match_operand:DI 1 "register_operand")
4690               (match_operand:DI 2 "register_operand"))
4691             (const_int 0))))]
4692   {
4693     /* If we are in the general purpose register file,
4694        we split to a sequence of comparison and store.  */
4695     if (GP_REGNUM_P (REGNO (operands[0]))
4696         && GP_REGNUM_P (REGNO (operands[1])))
4697       {
4698         rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4699         machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4700         rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4701         rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4702         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4703         DONE;
4704       }
4705     /* Otherwise, we expand to a similar pattern which does not
4706        clobber CC_REGNUM.  */
4707   }
4708   [(set_attr "type" "neon_tst,multiple")]
4709 )
4710
4711 (define_insn "*aarch64_cmtstdi"
4712   [(set (match_operand:DI 0 "register_operand" "=w")
4713         (neg:DI
4714           (ne:DI
4715             (and:DI
4716               (match_operand:DI 1 "register_operand" "w")
4717               (match_operand:DI 2 "register_operand" "w"))
4718             (const_int 0))))]
4719   "TARGET_SIMD"
4720   "cmtst\t%d0, %d1, %d2"
4721   [(set_attr "type" "neon_tst")]
4722 )
4723
4724 ;; fcm(eq|ge|gt|le|lt)
4725
4726 (define_insn "aarch64_cm<optab><mode>"
4727   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4728         (neg:<V_INT_EQUIV>
4729           (COMPARISONS:<V_INT_EQUIV>
4730             (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4731             (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4732           )))]
4733   "TARGET_SIMD"
4734   "@
4735   fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4736   fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4737   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4738 )
4739
4740 ;; fac(ge|gt)
4741 ;; Note we can also handle what would be fac(le|lt) by
4742 ;; generating fac(ge|gt).
4743
4744 (define_insn "aarch64_fac<optab><mode>"
4745   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4746         (neg:<V_INT_EQUIV>
4747           (FAC_COMPARISONS:<V_INT_EQUIV>
4748             (abs:VHSDF_HSDF
4749               (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4750             (abs:VHSDF_HSDF
4751               (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4752   )))]
4753   "TARGET_SIMD"
4754   "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4755   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4756 )
4757
4758 ;; addp
4759
4760 (define_insn "aarch64_addp<mode>"
4761   [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4762         (unspec:VD_BHSI
4763           [(match_operand:VD_BHSI 1 "register_operand" "w")
4764            (match_operand:VD_BHSI 2 "register_operand" "w")]
4765           UNSPEC_ADDP))]
4766   "TARGET_SIMD"
4767   "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4768   [(set_attr "type" "neon_reduc_add<q>")]
4769 )
4770
4771 (define_insn "aarch64_addpdi"
4772   [(set (match_operand:DI 0 "register_operand" "=w")
4773         (unspec:DI
4774           [(match_operand:V2DI 1 "register_operand" "w")]
4775           UNSPEC_ADDP))]
4776   "TARGET_SIMD"
4777   "addp\t%d0, %1.2d"
4778   [(set_attr "type" "neon_reduc_add")]
4779 )
4780
4781 ;; sqrt
4782
4783 (define_expand "sqrt<mode>2"
4784   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4785         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4786   "TARGET_SIMD"
4787 {
4788   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4789     DONE;
4790 })
4791
4792 (define_insn "*sqrt<mode>2"
4793   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4794         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4795   "TARGET_SIMD"
4796   "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4797   [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4798 )
4799
4800 ;; Patterns for vector struct loads and stores.
4801
4802 (define_insn "aarch64_simd_ld2<mode>"
4803   [(set (match_operand:OI 0 "register_operand" "=w")
4804         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4805                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4806                    UNSPEC_LD2))]
4807   "TARGET_SIMD"
4808   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4809   [(set_attr "type" "neon_load2_2reg<q>")]
4810 )
4811
4812 (define_insn "aarch64_simd_ld2r<mode>"
4813   [(set (match_operand:OI 0 "register_operand" "=w")
4814        (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4815                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4816                   UNSPEC_LD2_DUP))]
4817   "TARGET_SIMD"
4818   "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4819   [(set_attr "type" "neon_load2_all_lanes<q>")]
4820 )
4821
4822 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4823   [(set (match_operand:OI 0 "register_operand" "=w")
4824         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4825                     (match_operand:OI 2 "register_operand" "0")
4826                     (match_operand:SI 3 "immediate_operand" "i")
4827                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4828                    UNSPEC_LD2_LANE))]
4829   "TARGET_SIMD"
4830   {
4831     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4832     return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4833   }
4834   [(set_attr "type" "neon_load2_one_lane")]
4835 )
4836
4837 (define_expand "vec_load_lanesoi<mode>"
4838   [(set (match_operand:OI 0 "register_operand" "=w")
4839         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4840                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4841                    UNSPEC_LD2))]
4842   "TARGET_SIMD"
4843 {
4844   if (BYTES_BIG_ENDIAN)
4845     {
4846       rtx tmp = gen_reg_rtx (OImode);
4847       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4848       emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4849       emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4850     }
4851   else
4852     emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4853   DONE;
4854 })
4855
4856 (define_insn "aarch64_simd_st2<mode>"
4857   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4858         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4859                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4860                    UNSPEC_ST2))]
4861   "TARGET_SIMD"
4862   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4863   [(set_attr "type" "neon_store2_2reg<q>")]
4864 )
4865
4866 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4867 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4868   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4869         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4870                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4871                     (match_operand:SI 2 "immediate_operand" "i")]
4872                    UNSPEC_ST2_LANE))]
4873   "TARGET_SIMD"
4874   {
4875     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4876     return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4877   }
4878   [(set_attr "type" "neon_store2_one_lane<q>")]
4879 )
4880
4881 (define_expand "vec_store_lanesoi<mode>"
4882   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4883         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4884                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4885                    UNSPEC_ST2))]
4886   "TARGET_SIMD"
4887 {
4888   if (BYTES_BIG_ENDIAN)
4889     {
4890       rtx tmp = gen_reg_rtx (OImode);
4891       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4892       emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4893       emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4894     }
4895   else
4896     emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4897   DONE;
4898 })
4899
4900 (define_insn "aarch64_simd_ld3<mode>"
4901   [(set (match_operand:CI 0 "register_operand" "=w")
4902         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4903                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4904                    UNSPEC_LD3))]
4905   "TARGET_SIMD"
4906   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4907   [(set_attr "type" "neon_load3_3reg<q>")]
4908 )
4909
4910 (define_insn "aarch64_simd_ld3r<mode>"
4911   [(set (match_operand:CI 0 "register_operand" "=w")
4912        (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4913                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4914                   UNSPEC_LD3_DUP))]
4915   "TARGET_SIMD"
4916   "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4917   [(set_attr "type" "neon_load3_all_lanes<q>")]
4918 )
4919
4920 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4921   [(set (match_operand:CI 0 "register_operand" "=w")
4922         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4923                     (match_operand:CI 2 "register_operand" "0")
4924                     (match_operand:SI 3 "immediate_operand" "i")
4925                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4926                    UNSPEC_LD3_LANE))]
4927   "TARGET_SIMD"
4928 {
4929     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4930     return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4931 }
4932   [(set_attr "type" "neon_load3_one_lane")]
4933 )
4934
4935 (define_expand "vec_load_lanesci<mode>"
4936   [(set (match_operand:CI 0 "register_operand" "=w")
4937         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4938                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4939                    UNSPEC_LD3))]
4940   "TARGET_SIMD"
4941 {
4942   if (BYTES_BIG_ENDIAN)
4943     {
4944       rtx tmp = gen_reg_rtx (CImode);
4945       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4946       emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4947       emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4948     }
4949   else
4950     emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4951   DONE;
4952 })
4953
4954 (define_insn "aarch64_simd_st3<mode>"
4955   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4956         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4957                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4958                    UNSPEC_ST3))]
4959   "TARGET_SIMD"
4960   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4961   [(set_attr "type" "neon_store3_3reg<q>")]
4962 )
4963
4964 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4965 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4966   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4967         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4968                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4969                      (match_operand:SI 2 "immediate_operand" "i")]
4970                     UNSPEC_ST3_LANE))]
4971   "TARGET_SIMD"
4972   {
4973     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4974     return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4975   }
4976   [(set_attr "type" "neon_store3_one_lane<q>")]
4977 )
4978
4979 (define_expand "vec_store_lanesci<mode>"
4980   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4981         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4982                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4983                    UNSPEC_ST3))]
4984   "TARGET_SIMD"
4985 {
4986   if (BYTES_BIG_ENDIAN)
4987     {
4988       rtx tmp = gen_reg_rtx (CImode);
4989       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4990       emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4991       emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4992     }
4993   else
4994     emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4995   DONE;
4996 })
4997
4998 (define_insn "aarch64_simd_ld4<mode>"
4999   [(set (match_operand:XI 0 "register_operand" "=w")
5000         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5001                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5002                    UNSPEC_LD4))]
5003   "TARGET_SIMD"
5004   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5005   [(set_attr "type" "neon_load4_4reg<q>")]
5006 )
5007
5008 (define_insn "aarch64_simd_ld4r<mode>"
5009   [(set (match_operand:XI 0 "register_operand" "=w")
5010        (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5011                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5012                   UNSPEC_LD4_DUP))]
5013   "TARGET_SIMD"
5014   "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5015   [(set_attr "type" "neon_load4_all_lanes<q>")]
5016 )
5017
5018 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
5019   [(set (match_operand:XI 0 "register_operand" "=w")
5020         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5021                     (match_operand:XI 2 "register_operand" "0")
5022                     (match_operand:SI 3 "immediate_operand" "i")
5023                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5024                    UNSPEC_LD4_LANE))]
5025   "TARGET_SIMD"
5026 {
5027     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5028     return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
5029 }
5030   [(set_attr "type" "neon_load4_one_lane")]
5031 )
5032
5033 (define_expand "vec_load_lanesxi<mode>"
5034   [(set (match_operand:XI 0 "register_operand" "=w")
5035         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5036                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5037                    UNSPEC_LD4))]
5038   "TARGET_SIMD"
5039 {
5040   if (BYTES_BIG_ENDIAN)
5041     {
5042       rtx tmp = gen_reg_rtx (XImode);
5043       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5044       emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
5045       emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
5046     }
5047   else
5048     emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
5049   DONE;
5050 })
5051
5052 (define_insn "aarch64_simd_st4<mode>"
5053   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5054         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5055                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5056                    UNSPEC_ST4))]
5057   "TARGET_SIMD"
5058   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5059   [(set_attr "type" "neon_store4_4reg<q>")]
5060 )
5061
5062 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5063 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
5064   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5065         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5066                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5067                      (match_operand:SI 2 "immediate_operand" "i")]
5068                     UNSPEC_ST4_LANE))]
5069   "TARGET_SIMD"
5070   {
5071     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5072     return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5073   }
5074   [(set_attr "type" "neon_store4_one_lane<q>")]
5075 )
5076
5077 (define_expand "vec_store_lanesxi<mode>"
5078   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5079         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5080                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5081                    UNSPEC_ST4))]
5082   "TARGET_SIMD"
5083 {
5084   if (BYTES_BIG_ENDIAN)
5085     {
5086       rtx tmp = gen_reg_rtx (XImode);
5087       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5088       emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5089       emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5090     }
5091   else
5092     emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5093   DONE;
5094 })
5095
5096 (define_insn_and_split "aarch64_rev_reglist<mode>"
5097 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5098         (unspec:VSTRUCT
5099                    [(match_operand:VSTRUCT 1 "register_operand" "w")
5100                     (match_operand:V16QI 2 "register_operand" "w")]
5101                    UNSPEC_REV_REGLIST))]
5102   "TARGET_SIMD"
5103   "#"
5104   "&& reload_completed"
5105   [(const_int 0)]
5106 {
5107   int i;
5108   int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5109   for (i = 0; i < nregs; i++)
5110     {
5111       rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5112       rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5113       emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5114     }
5115   DONE;
5116 }
5117   [(set_attr "type" "neon_tbl1_q")
5118    (set_attr "length" "<insn_count>")]
5119 )
5120
5121 ;; Reload patterns for AdvSIMD register list operands.
5122
5123 (define_expand "mov<mode>"
5124   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
5125         (match_operand:VSTRUCT 1 "general_operand" ""))]
5126   "TARGET_SIMD"
5127 {
5128   if (can_create_pseudo_p ())
5129     {
5130       if (GET_CODE (operands[0]) != REG)
5131         operands[1] = force_reg (<MODE>mode, operands[1]);
5132     }
5133 })
5134
5135
5136 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5137   [(match_operand:CI 0 "register_operand" "=w")
5138    (match_operand:DI 1 "register_operand" "r")
5139    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5140   "TARGET_SIMD"
5141 {
5142   rtx mem = gen_rtx_MEM (CImode, operands[1]);
5143   emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5144   DONE;
5145 })
5146
5147 (define_insn "aarch64_ld1_x3_<mode>"
5148   [(set (match_operand:CI 0 "register_operand" "=w")
5149         (unspec:CI
5150           [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5151            (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5152   "TARGET_SIMD"
5153   "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5154   [(set_attr "type" "neon_load1_3reg<q>")]
5155 )
5156
5157 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5158   [(match_operand:DI 0 "register_operand" "")
5159    (match_operand:OI 1 "register_operand" "")
5160    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5161   "TARGET_SIMD"
5162 {
5163   rtx mem = gen_rtx_MEM (OImode, operands[0]);
5164   emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5165   DONE;
5166 })
5167
5168 (define_insn "aarch64_st1_x2_<mode>"
5169    [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5170          (unspec:OI
5171           [(match_operand:OI 1 "register_operand" "w")
5172           (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5173   "TARGET_SIMD"
5174   "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5175   [(set_attr "type" "neon_store1_2reg<q>")]
5176 )
5177
5178 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5179   [(match_operand:DI 0 "register_operand" "")
5180    (match_operand:CI 1 "register_operand" "")
5181    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5182   "TARGET_SIMD"
5183 {
5184   rtx mem = gen_rtx_MEM (CImode, operands[0]);
5185   emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5186   DONE;
5187 })
5188
5189 (define_insn "aarch64_st1_x3_<mode>"
5190    [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5191         (unspec:CI
5192          [(match_operand:CI 1 "register_operand" "w")
5193           (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5194   "TARGET_SIMD"
5195   "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5196   [(set_attr "type" "neon_store1_3reg<q>")]
5197 )
5198
5199 (define_insn "*aarch64_mov<mode>"
5200   [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5201         (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5202   "TARGET_SIMD && !BYTES_BIG_ENDIAN
5203    && (register_operand (operands[0], <MODE>mode)
5204        || register_operand (operands[1], <MODE>mode))"
5205   "@
5206    #
5207    st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5208    ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5209   [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5210                      neon_load<nregs>_<nregs>reg_q")
5211    (set_attr "length" "<insn_count>,4,4")]
5212 )
5213
5214 (define_insn "aarch64_be_ld1<mode>"
5215   [(set (match_operand:VALLDI_F16 0     "register_operand" "=w")
5216         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5217                              "aarch64_simd_struct_operand" "Utv")]
5218         UNSPEC_LD1))]
5219   "TARGET_SIMD"
5220   "ld1\\t{%0<Vmtype>}, %1"
5221   [(set_attr "type" "neon_load1_1reg<q>")]
5222 )
5223
5224 (define_insn "aarch64_be_st1<mode>"
5225   [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5226         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5227         UNSPEC_ST1))]
5228   "TARGET_SIMD"
5229   "st1\\t{%1<Vmtype>}, %0"
5230   [(set_attr "type" "neon_store1_1reg<q>")]
5231 )
5232
5233 (define_insn "*aarch64_be_movoi"
5234   [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5235         (match_operand:OI 1 "general_operand"      " w,w,m"))]
5236   "TARGET_SIMD && BYTES_BIG_ENDIAN
5237    && (register_operand (operands[0], OImode)
5238        || register_operand (operands[1], OImode))"
5239   "@
5240    #
5241    stp\\t%q1, %R1, %0
5242    ldp\\t%q0, %R0, %1"
5243   [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5244    (set_attr "length" "8,4,4")]
5245 )
5246
5247 (define_insn "*aarch64_be_movci"
5248   [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5249         (match_operand:CI 1 "general_operand"      " w,w,o"))]
5250   "TARGET_SIMD && BYTES_BIG_ENDIAN
5251    && (register_operand (operands[0], CImode)
5252        || register_operand (operands[1], CImode))"
5253   "#"
5254   [(set_attr "type" "multiple")
5255    (set_attr "length" "12,4,4")]
5256 )
5257
5258 (define_insn "*aarch64_be_movxi"
5259   [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5260         (match_operand:XI 1 "general_operand"      " w,w,o"))]
5261   "TARGET_SIMD && BYTES_BIG_ENDIAN
5262    && (register_operand (operands[0], XImode)
5263        || register_operand (operands[1], XImode))"
5264   "#"
5265   [(set_attr "type" "multiple")
5266    (set_attr "length" "16,4,4")]
5267 )
5268
5269 (define_split
5270   [(set (match_operand:OI 0 "register_operand")
5271         (match_operand:OI 1 "register_operand"))]
5272   "TARGET_SIMD && reload_completed"
5273   [(const_int 0)]
5274 {
5275   aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5276   DONE;
5277 })
5278
5279 (define_split
5280   [(set (match_operand:CI 0 "nonimmediate_operand")
5281         (match_operand:CI 1 "general_operand"))]
5282   "TARGET_SIMD && reload_completed"
5283   [(const_int 0)]
5284 {
5285   if (register_operand (operands[0], CImode)
5286       && register_operand (operands[1], CImode))
5287     {
5288       aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5289       DONE;
5290     }
5291   else if (BYTES_BIG_ENDIAN)
5292     {
5293       emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5294                       simplify_gen_subreg (OImode, operands[1], CImode, 0));
5295       emit_move_insn (gen_lowpart (V16QImode,
5296                                    simplify_gen_subreg (TImode, operands[0],
5297                                                         CImode, 32)),
5298                       gen_lowpart (V16QImode,
5299                                    simplify_gen_subreg (TImode, operands[1],
5300                                                         CImode, 32)));
5301       DONE;
5302     }
5303   else
5304     FAIL;
5305 })
5306
5307 (define_split
5308   [(set (match_operand:XI 0 "nonimmediate_operand")
5309         (match_operand:XI 1 "general_operand"))]
5310   "TARGET_SIMD && reload_completed"
5311   [(const_int 0)]
5312 {
5313   if (register_operand (operands[0], XImode)
5314       && register_operand (operands[1], XImode))
5315     {
5316       aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5317       DONE;
5318     }
5319   else if (BYTES_BIG_ENDIAN)
5320     {
5321       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5322                       simplify_gen_subreg (OImode, operands[1], XImode, 0));
5323       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5324                       simplify_gen_subreg (OImode, operands[1], XImode, 32));
5325       DONE;
5326     }
5327   else
5328     FAIL;
5329 })
5330
5331 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5332   [(match_operand:VSTRUCT 0 "register_operand" "=w")
5333    (match_operand:DI 1 "register_operand" "w")
5334    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5335   "TARGET_SIMD"
5336 {
5337   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5338   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5339                      * <VSTRUCT:nregs>);
5340
5341   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5342                                                                 mem));
5343   DONE;
5344 })
5345
5346 (define_insn "aarch64_ld2<mode>_dreg"
5347   [(set (match_operand:OI 0 "register_operand" "=w")
5348         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5349                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5350                    UNSPEC_LD2_DREG))]
5351   "TARGET_SIMD"
5352   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5353   [(set_attr "type" "neon_load2_2reg<q>")]
5354 )
5355
5356 (define_insn "aarch64_ld2<mode>_dreg"
5357   [(set (match_operand:OI 0 "register_operand" "=w")
5358         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5359                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5360                    UNSPEC_LD2_DREG))]
5361   "TARGET_SIMD"
5362   "ld1\\t{%S0.1d - %T0.1d}, %1"
5363   [(set_attr "type" "neon_load1_2reg<q>")]
5364 )
5365
5366 (define_insn "aarch64_ld3<mode>_dreg"
5367   [(set (match_operand:CI 0 "register_operand" "=w")
5368         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5369                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5370                    UNSPEC_LD3_DREG))]
5371   "TARGET_SIMD"
5372   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5373   [(set_attr "type" "neon_load3_3reg<q>")]
5374 )
5375
5376 (define_insn "aarch64_ld3<mode>_dreg"
5377   [(set (match_operand:CI 0 "register_operand" "=w")
5378         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5379                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5380                    UNSPEC_LD3_DREG))]
5381   "TARGET_SIMD"
5382   "ld1\\t{%S0.1d - %U0.1d}, %1"
5383   [(set_attr "type" "neon_load1_3reg<q>")]
5384 )
5385
5386 (define_insn "aarch64_ld4<mode>_dreg"
5387   [(set (match_operand:XI 0 "register_operand" "=w")
5388         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5389                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5390                    UNSPEC_LD4_DREG))]
5391   "TARGET_SIMD"
5392   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5393   [(set_attr "type" "neon_load4_4reg<q>")]
5394 )
5395
5396 (define_insn "aarch64_ld4<mode>_dreg"
5397   [(set (match_operand:XI 0 "register_operand" "=w")
5398         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5399                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5400                    UNSPEC_LD4_DREG))]
5401   "TARGET_SIMD"
5402   "ld1\\t{%S0.1d - %V0.1d}, %1"
5403   [(set_attr "type" "neon_load1_4reg<q>")]
5404 )
5405
5406 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5407  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5408   (match_operand:DI 1 "register_operand" "r")
5409   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5410   "TARGET_SIMD"
5411 {
5412   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5413   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5414
5415   emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5416   DONE;
5417 })
5418
5419 (define_expand "aarch64_ld1<VALL_F16:mode>"
5420  [(match_operand:VALL_F16 0 "register_operand")
5421   (match_operand:DI 1 "register_operand")]
5422   "TARGET_SIMD"
5423 {
5424   machine_mode mode = <VALL_F16:MODE>mode;
5425   rtx mem = gen_rtx_MEM (mode, operands[1]);
5426
5427   if (BYTES_BIG_ENDIAN)
5428     emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5429   else
5430     emit_move_insn (operands[0], mem);
5431   DONE;
5432 })
5433
5434 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5435  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5436   (match_operand:DI 1 "register_operand" "r")
5437   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5438   "TARGET_SIMD"
5439 {
5440   machine_mode mode = <VSTRUCT:MODE>mode;
5441   rtx mem = gen_rtx_MEM (mode, operands[1]);
5442
5443   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5444   DONE;
5445 })
5446
5447 (define_expand "aarch64_ld1x2<VQ:mode>"
5448  [(match_operand:OI 0 "register_operand" "=w")
5449   (match_operand:DI 1 "register_operand" "r")
5450   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5451   "TARGET_SIMD"
5452 {
5453   machine_mode mode = OImode;
5454   rtx mem = gen_rtx_MEM (mode, operands[1]);
5455
5456   emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5457   DONE;
5458 })
5459
5460 (define_expand "aarch64_ld1x2<VDC:mode>"
5461  [(match_operand:OI 0 "register_operand" "=w")
5462   (match_operand:DI 1 "register_operand" "r")
5463   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5464   "TARGET_SIMD"
5465 {
5466   machine_mode mode = OImode;
5467   rtx mem = gen_rtx_MEM (mode, operands[1]);
5468
5469   emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5470   DONE;
5471 })
5472
5473
5474 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5475   [(match_operand:VSTRUCT 0 "register_operand" "=w")
5476         (match_operand:DI 1 "register_operand" "w")
5477         (match_operand:VSTRUCT 2 "register_operand" "0")
5478         (match_operand:SI 3 "immediate_operand" "i")
5479         (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5480   "TARGET_SIMD"
5481 {
5482   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5483   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5484                      * <VSTRUCT:nregs>);
5485
5486   aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5487   emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5488         operands[0], mem, operands[2], operands[3]));
5489   DONE;
5490 })
5491
5492 ;; Expanders for builtins to extract vector registers from large
5493 ;; opaque integer modes.
5494
5495 ;; D-register list.
5496
5497 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5498  [(match_operand:VDC 0 "register_operand" "=w")
5499   (match_operand:VSTRUCT 1 "register_operand" "w")
5500   (match_operand:SI 2 "immediate_operand" "i")]
5501   "TARGET_SIMD"
5502 {
5503   int part = INTVAL (operands[2]);
5504   rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5505   int offset = part * 16;
5506
5507   emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5508   emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5509   DONE;
5510 })
5511
5512 ;; Q-register list.
5513
5514 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5515  [(match_operand:VQ 0 "register_operand" "=w")
5516   (match_operand:VSTRUCT 1 "register_operand" "w")
5517   (match_operand:SI 2 "immediate_operand" "i")]
5518   "TARGET_SIMD"
5519 {
5520   int part = INTVAL (operands[2]);
5521   int offset = part * 16;
5522
5523   emit_move_insn (operands[0],
5524                   gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5525   DONE;
5526 })
5527
5528 ;; Permuted-store expanders for neon intrinsics.
5529
5530 ;; Permute instructions
5531
5532 ;; vec_perm support
5533
5534 (define_expand "vec_perm<mode>"
5535   [(match_operand:VB 0 "register_operand")
5536    (match_operand:VB 1 "register_operand")
5537    (match_operand:VB 2 "register_operand")
5538    (match_operand:VB 3 "register_operand")]
5539   "TARGET_SIMD"
5540 {
5541   aarch64_expand_vec_perm (operands[0], operands[1],
5542                            operands[2], operands[3], <nunits>);
5543   DONE;
5544 })
5545
5546 (define_insn "aarch64_tbl1<mode>"
5547   [(set (match_operand:VB 0 "register_operand" "=w")
5548         (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5549                     (match_operand:VB 2 "register_operand" "w")]
5550                    UNSPEC_TBL))]
5551   "TARGET_SIMD"
5552   "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5553   [(set_attr "type" "neon_tbl1<q>")]
5554 )
5555
5556 ;; Two source registers.
5557
5558 (define_insn "aarch64_tbl2v16qi"
5559   [(set (match_operand:V16QI 0 "register_operand" "=w")
5560         (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5561                        (match_operand:V16QI 2 "register_operand" "w")]
5562                       UNSPEC_TBL))]
5563   "TARGET_SIMD"
5564   "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5565   [(set_attr "type" "neon_tbl2_q")]
5566 )
5567
5568 (define_insn "aarch64_tbl3<mode>"
5569   [(set (match_operand:VB 0 "register_operand" "=w")
5570         (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5571                       (match_operand:VB 2 "register_operand" "w")]
5572                       UNSPEC_TBL))]
5573   "TARGET_SIMD"
5574   "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5575   [(set_attr "type" "neon_tbl3")]
5576 )
5577
5578 (define_insn "aarch64_tbx4<mode>"
5579   [(set (match_operand:VB 0 "register_operand" "=w")
5580         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5581                       (match_operand:OI 2 "register_operand" "w")
5582                       (match_operand:VB 3 "register_operand" "w")]
5583                       UNSPEC_TBX))]
5584   "TARGET_SIMD"
5585   "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5586   [(set_attr "type" "neon_tbl4")]
5587 )
5588
5589 ;; Three source registers.
5590
5591 (define_insn "aarch64_qtbl3<mode>"
5592   [(set (match_operand:VB 0 "register_operand" "=w")
5593         (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5594                       (match_operand:VB 2 "register_operand" "w")]
5595                       UNSPEC_TBL))]
5596   "TARGET_SIMD"
5597   "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5598   [(set_attr "type" "neon_tbl3")]
5599 )
5600
5601 (define_insn "aarch64_qtbx3<mode>"
5602   [(set (match_operand:VB 0 "register_operand" "=w")
5603         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5604                       (match_operand:CI 2 "register_operand" "w")
5605                       (match_operand:VB 3 "register_operand" "w")]
5606                       UNSPEC_TBX))]
5607   "TARGET_SIMD"
5608   "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5609   [(set_attr "type" "neon_tbl3")]
5610 )
5611
5612 ;; Four source registers.
5613
5614 (define_insn "aarch64_qtbl4<mode>"
5615   [(set (match_operand:VB 0 "register_operand" "=w")
5616         (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5617                       (match_operand:VB 2 "register_operand" "w")]
5618                       UNSPEC_TBL))]
5619   "TARGET_SIMD"
5620   "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5621   [(set_attr "type" "neon_tbl4")]
5622 )
5623
5624 (define_insn "aarch64_qtbx4<mode>"
5625   [(set (match_operand:VB 0 "register_operand" "=w")
5626         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5627                       (match_operand:XI 2 "register_operand" "w")
5628                       (match_operand:VB 3 "register_operand" "w")]
5629                       UNSPEC_TBX))]
5630   "TARGET_SIMD"
5631   "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5632   [(set_attr "type" "neon_tbl4")]
5633 )
5634
5635 (define_insn_and_split "aarch64_combinev16qi"
5636   [(set (match_operand:OI 0 "register_operand" "=w")
5637         (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5638                     (match_operand:V16QI 2 "register_operand" "w")]
5639                    UNSPEC_CONCAT))]
5640   "TARGET_SIMD"
5641   "#"
5642   "&& reload_completed"
5643   [(const_int 0)]
5644 {
5645   aarch64_split_combinev16qi (operands);
5646   DONE;
5647 }
5648 [(set_attr "type" "multiple")]
5649 )
5650
5651 ;; This instruction's pattern is generated directly by
5652 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5653 ;; need corresponding changes there.
5654 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5655   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5656         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5657                           (match_operand:VALL_F16 2 "register_operand" "w")]
5658          PERMUTE))]
5659   "TARGET_SIMD"
5660   "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5661   [(set_attr "type" "neon_permute<q>")]
5662 )
5663
5664 ;; This instruction's pattern is generated directly by
5665 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5666 ;; need corresponding changes there.  Note that the immediate (third)
5667 ;; operand is a lane index not a byte index.
5668 (define_insn "aarch64_ext<mode>"
5669   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5670         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5671                           (match_operand:VALL_F16 2 "register_operand" "w")
5672                           (match_operand:SI 3 "immediate_operand" "i")]
5673          UNSPEC_EXT))]
5674   "TARGET_SIMD"
5675 {
5676   operands[3] = GEN_INT (INTVAL (operands[3])
5677       * GET_MODE_UNIT_SIZE (<MODE>mode));
5678   return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5679 }
5680   [(set_attr "type" "neon_ext<q>")]
5681 )
5682
5683 ;; This instruction's pattern is generated directly by
5684 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5685 ;; need corresponding changes there.
5686 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5687   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5688         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5689                     REVERSE))]
5690   "TARGET_SIMD"
5691   "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5692   [(set_attr "type" "neon_rev<q>")]
5693 )
5694
5695 (define_insn "aarch64_st2<mode>_dreg"
5696   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5697         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5698                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5699                    UNSPEC_ST2))]
5700   "TARGET_SIMD"
5701   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5702   [(set_attr "type" "neon_store2_2reg")]
5703 )
5704
5705 (define_insn "aarch64_st2<mode>_dreg"
5706   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5707         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5708                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5709                    UNSPEC_ST2))]
5710   "TARGET_SIMD"
5711   "st1\\t{%S1.1d - %T1.1d}, %0"
5712   [(set_attr "type" "neon_store1_2reg")]
5713 )
5714
5715 (define_insn "aarch64_st3<mode>_dreg"
5716   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5717         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5718                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5719                    UNSPEC_ST3))]
5720   "TARGET_SIMD"
5721   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5722   [(set_attr "type" "neon_store3_3reg")]
5723 )
5724
5725 (define_insn "aarch64_st3<mode>_dreg"
5726   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5727         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5728                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5729                    UNSPEC_ST3))]
5730   "TARGET_SIMD"
5731   "st1\\t{%S1.1d - %U1.1d}, %0"
5732   [(set_attr "type" "neon_store1_3reg")]
5733 )
5734
5735 (define_insn "aarch64_st4<mode>_dreg"
5736   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5737         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5738                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5739                    UNSPEC_ST4))]
5740   "TARGET_SIMD"
5741   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5742   [(set_attr "type" "neon_store4_4reg")]
5743 )
5744
5745 (define_insn "aarch64_st4<mode>_dreg"
5746   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5747         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5748                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5749                    UNSPEC_ST4))]
5750   "TARGET_SIMD"
5751   "st1\\t{%S1.1d - %V1.1d}, %0"
5752   [(set_attr "type" "neon_store1_4reg")]
5753 )
5754
5755 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5756  [(match_operand:DI 0 "register_operand" "r")
5757   (match_operand:VSTRUCT 1 "register_operand" "w")
5758   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5759   "TARGET_SIMD"
5760 {
5761   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5762   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5763
5764   emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5765   DONE;
5766 })
5767
5768 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5769  [(match_operand:DI 0 "register_operand" "r")
5770   (match_operand:VSTRUCT 1 "register_operand" "w")
5771   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5772   "TARGET_SIMD"
5773 {
5774   machine_mode mode = <VSTRUCT:MODE>mode;
5775   rtx mem = gen_rtx_MEM (mode, operands[0]);
5776
5777   emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5778   DONE;
5779 })
5780
5781 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5782  [(match_operand:DI 0 "register_operand" "r")
5783   (match_operand:VSTRUCT 1 "register_operand" "w")
5784   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5785   (match_operand:SI 2 "immediate_operand")]
5786   "TARGET_SIMD"
5787 {
5788   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5789   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5790                      * <VSTRUCT:nregs>);
5791
5792   emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5793                 mem, operands[1], operands[2]));
5794   DONE;
5795 })
5796
5797 (define_expand "aarch64_st1<VALL_F16:mode>"
5798  [(match_operand:DI 0 "register_operand")
5799   (match_operand:VALL_F16 1 "register_operand")]
5800   "TARGET_SIMD"
5801 {
5802   machine_mode mode = <VALL_F16:MODE>mode;
5803   rtx mem = gen_rtx_MEM (mode, operands[0]);
5804
5805   if (BYTES_BIG_ENDIAN)
5806     emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5807   else
5808     emit_move_insn (mem, operands[1]);
5809   DONE;
5810 })
5811
5812 ;; Expander for builtins to insert vector registers into large
5813 ;; opaque integer modes.
5814
5815 ;; Q-register list.  We don't need a D-reg inserter as we zero
5816 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5817
5818 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5819  [(match_operand:VSTRUCT 0 "register_operand" "+w")
5820   (match_operand:VSTRUCT 1 "register_operand" "0")
5821   (match_operand:VQ 2 "register_operand" "w")
5822   (match_operand:SI 3 "immediate_operand" "i")]
5823   "TARGET_SIMD"
5824 {
5825   int part = INTVAL (operands[3]);
5826   int offset = part * 16;
5827
5828   emit_move_insn (operands[0], operands[1]);
5829   emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5830                   operands[2]);
5831   DONE;
5832 })
5833
5834 ;; Standard pattern name vec_init<mode><Vel>.
5835
5836 (define_expand "vec_init<mode><Vel>"
5837   [(match_operand:VALL_F16 0 "register_operand" "")
5838    (match_operand 1 "" "")]
5839   "TARGET_SIMD"
5840 {
5841   aarch64_expand_vector_init (operands[0], operands[1]);
5842   DONE;
5843 })
5844
5845 (define_insn "*aarch64_simd_ld1r<mode>"
5846   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5847         (vec_duplicate:VALL_F16
5848           (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5849   "TARGET_SIMD"
5850   "ld1r\\t{%0.<Vtype>}, %1"
5851   [(set_attr "type" "neon_load1_all_lanes")]
5852 )
5853
5854 (define_insn "aarch64_simd_ld1<mode>_x2"
5855   [(set (match_operand:OI 0 "register_operand" "=w")
5856         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5857                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5858                    UNSPEC_LD1))]
5859   "TARGET_SIMD"
5860   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5861   [(set_attr "type" "neon_load1_2reg<q>")]
5862 )
5863
5864 (define_insn "aarch64_simd_ld1<mode>_x2"
5865   [(set (match_operand:OI 0 "register_operand" "=w")
5866         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5867                     (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5868                    UNSPEC_LD1))]
5869   "TARGET_SIMD"
5870   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5871   [(set_attr "type" "neon_load1_2reg<q>")]
5872 )
5873
5874
5875 (define_insn "@aarch64_frecpe<mode>"
5876   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5877         (unspec:VHSDF_HSDF
5878          [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
5879          UNSPEC_FRECPE))]
5880   "TARGET_SIMD"
5881   "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
5882   [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5883 )
5884
5885 (define_insn "aarch64_frecpx<mode>"
5886   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5887         (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5888          UNSPEC_FRECPX))]
5889   "TARGET_SIMD"
5890   "frecpx\t%<s>0, %<s>1"
5891   [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
5892 )
5893
5894 (define_insn "@aarch64_frecps<mode>"
5895   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5896         (unspec:VHSDF_HSDF
5897           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5898           (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5899           UNSPEC_FRECPS))]
5900   "TARGET_SIMD"
5901   "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5902   [(set_attr "type" "neon_fp_recps_<stype><q>")]
5903 )
5904
5905 (define_insn "aarch64_urecpe<mode>"
5906   [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5907         (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5908                 UNSPEC_URECPE))]
5909  "TARGET_SIMD"
5910  "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5911   [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5912
5913 ;; Standard pattern name vec_extract<mode><Vel>.
5914
5915 (define_expand "vec_extract<mode><Vel>"
5916   [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5917    (match_operand:VALL_F16 1 "register_operand" "")
5918    (match_operand:SI 2 "immediate_operand" "")]
5919   "TARGET_SIMD"
5920 {
5921     emit_insn
5922       (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5923     DONE;
5924 })
5925
5926 ;; aes
5927
5928 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5929   [(set (match_operand:V16QI 0 "register_operand" "=w")
5930         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%0")
5931                        (match_operand:V16QI 2 "register_operand" "w")]
5932          CRYPTO_AES))]
5933   "TARGET_SIMD && TARGET_AES"
5934   "aes<aes_op>\\t%0.16b, %2.16b"
5935   [(set_attr "type" "crypto_aese")]
5936 )
5937
5938 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
5939   [(set (match_operand:V16QI 0 "register_operand" "=w")
5940         (unspec:V16QI [(xor:V16QI
5941                         (match_operand:V16QI 1 "register_operand" "%0")
5942                         (match_operand:V16QI 2 "register_operand" "w"))
5943                        (match_operand:V16QI 3 "aarch64_simd_imm_zero" "")]
5944                        CRYPTO_AES))]
5945   "TARGET_SIMD && TARGET_AES"
5946   "aes<aes_op>\\t%0.16b, %2.16b"
5947   [(set_attr "type" "crypto_aese")]
5948 )
5949
5950 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
5951   [(set (match_operand:V16QI 0 "register_operand" "=w")
5952         (unspec:V16QI [(match_operand:V16QI 3 "aarch64_simd_imm_zero" "")
5953         (xor:V16QI (match_operand:V16QI 1 "register_operand" "%0")
5954                    (match_operand:V16QI 2 "register_operand" "w"))]
5955         CRYPTO_AES))]
5956   "TARGET_SIMD && TARGET_AES"
5957   "aes<aes_op>\\t%0.16b, %2.16b"
5958   [(set_attr "type" "crypto_aese")]
5959 )
5960
5961 ;; When AES/AESMC fusion is enabled we want the register allocation to
5962 ;; look like:
5963 ;;    AESE Vn, _
5964 ;;    AESMC Vn, Vn
5965 ;; So prefer to tie operand 1 to operand 0 when fusing.
5966
5967 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5968   [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5969         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5970          CRYPTO_AESMC))]
5971   "TARGET_SIMD && TARGET_AES"
5972   "aes<aesmc_op>\\t%0.16b, %1.16b"
5973   [(set_attr "type" "crypto_aesmc")
5974    (set_attr_alternative "enabled"
5975      [(if_then_else (match_test
5976                        "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5977                      (const_string "yes" )
5978                      (const_string "no"))
5979       (const_string "yes")])]
5980 )
5981
5982 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
5983 ;; and enforce the register dependency without scheduling or register
5984 ;; allocation messing up the order or introducing moves inbetween.
5985 ;;  Mash the two together during combine.
5986
5987 (define_insn "*aarch64_crypto_aese_fused"
5988   [(set (match_operand:V16QI 0 "register_operand" "=&w")
5989         (unspec:V16QI
5990           [(unspec:V16QI
5991             [(match_operand:V16QI 1 "register_operand" "0")
5992              (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE)
5993           ] UNSPEC_AESMC))]
5994   "TARGET_SIMD && TARGET_AES
5995    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
5996   "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
5997   [(set_attr "type" "crypto_aese")
5998    (set_attr "length" "8")]
5999 )
6000
6001 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
6002 ;; and enforce the register dependency without scheduling or register
6003 ;; allocation messing up the order or introducing moves inbetween.
6004 ;;  Mash the two together during combine.
6005
6006 (define_insn "*aarch64_crypto_aesd_fused"
6007   [(set (match_operand:V16QI 0 "register_operand" "=&w")
6008         (unspec:V16QI
6009           [(unspec:V16QI
6010             [(match_operand:V16QI 1 "register_operand" "0")
6011              (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD)
6012           ] UNSPEC_AESIMC))]
6013   "TARGET_SIMD && TARGET_AES
6014    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6015   "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
6016   [(set_attr "type" "crypto_aese")
6017    (set_attr "length" "8")]
6018 )
6019
6020 ;; sha1
6021
6022 (define_insn "aarch64_crypto_sha1hsi"
6023   [(set (match_operand:SI 0 "register_operand" "=w")
6024         (unspec:SI [(match_operand:SI 1
6025                        "register_operand" "w")]
6026          UNSPEC_SHA1H))]
6027   "TARGET_SIMD && TARGET_SHA2"
6028   "sha1h\\t%s0, %s1"
6029   [(set_attr "type" "crypto_sha1_fast")]
6030 )
6031
6032 (define_insn "aarch64_crypto_sha1hv4si"
6033   [(set (match_operand:SI 0 "register_operand" "=w")
6034         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6035                      (parallel [(const_int 0)]))]
6036          UNSPEC_SHA1H))]
6037   "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
6038   "sha1h\\t%s0, %s1"
6039   [(set_attr "type" "crypto_sha1_fast")]
6040 )
6041
6042 (define_insn "aarch64_be_crypto_sha1hv4si"
6043   [(set (match_operand:SI 0 "register_operand" "=w")
6044         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6045                      (parallel [(const_int 3)]))]
6046          UNSPEC_SHA1H))]
6047   "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
6048   "sha1h\\t%s0, %s1"
6049   [(set_attr "type" "crypto_sha1_fast")]
6050 )
6051
6052 (define_insn "aarch64_crypto_sha1su1v4si"
6053   [(set (match_operand:V4SI 0 "register_operand" "=w")
6054         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6055                       (match_operand:V4SI 2 "register_operand" "w")]
6056          UNSPEC_SHA1SU1))]
6057   "TARGET_SIMD && TARGET_SHA2"
6058   "sha1su1\\t%0.4s, %2.4s"
6059   [(set_attr "type" "crypto_sha1_fast")]
6060 )
6061
6062 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
6063   [(set (match_operand:V4SI 0 "register_operand" "=w")
6064         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6065                       (match_operand:SI 2 "register_operand" "w")
6066                       (match_operand:V4SI 3 "register_operand" "w")]
6067          CRYPTO_SHA1))]
6068   "TARGET_SIMD && TARGET_SHA2"
6069   "sha1<sha1_op>\\t%q0, %s2, %3.4s"
6070   [(set_attr "type" "crypto_sha1_slow")]
6071 )
6072
6073 (define_insn "aarch64_crypto_sha1su0v4si"
6074   [(set (match_operand:V4SI 0 "register_operand" "=w")
6075         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6076                       (match_operand:V4SI 2 "register_operand" "w")
6077                       (match_operand:V4SI 3 "register_operand" "w")]
6078          UNSPEC_SHA1SU0))]
6079   "TARGET_SIMD && TARGET_SHA2"
6080   "sha1su0\\t%0.4s, %2.4s, %3.4s"
6081   [(set_attr "type" "crypto_sha1_xor")]
6082 )
6083
6084 ;; sha256
6085
6086 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6087   [(set (match_operand:V4SI 0 "register_operand" "=w")
6088         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6089                       (match_operand:V4SI 2 "register_operand" "w")
6090                       (match_operand:V4SI 3 "register_operand" "w")]
6091          CRYPTO_SHA256))]
6092   "TARGET_SIMD && TARGET_SHA2"
6093   "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
6094   [(set_attr "type" "crypto_sha256_slow")]
6095 )
6096
6097 (define_insn "aarch64_crypto_sha256su0v4si"
6098   [(set (match_operand:V4SI 0 "register_operand" "=w")
6099         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6100                       (match_operand:V4SI 2 "register_operand" "w")]
6101          UNSPEC_SHA256SU0))]
6102   "TARGET_SIMD && TARGET_SHA2"
6103   "sha256su0\\t%0.4s, %2.4s"
6104   [(set_attr "type" "crypto_sha256_fast")]
6105 )
6106
6107 (define_insn "aarch64_crypto_sha256su1v4si"
6108   [(set (match_operand:V4SI 0 "register_operand" "=w")
6109         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6110                       (match_operand:V4SI 2 "register_operand" "w")
6111                       (match_operand:V4SI 3 "register_operand" "w")]
6112          UNSPEC_SHA256SU1))]
6113   "TARGET_SIMD && TARGET_SHA2"
6114   "sha256su1\\t%0.4s, %2.4s, %3.4s"
6115   [(set_attr "type" "crypto_sha256_slow")]
6116 )
6117
6118 ;; sha512
6119
6120 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6121   [(set (match_operand:V2DI 0 "register_operand" "=w")
6122         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6123                       (match_operand:V2DI 2 "register_operand" "w")
6124                       (match_operand:V2DI 3 "register_operand" "w")]
6125          CRYPTO_SHA512))]
6126   "TARGET_SIMD && TARGET_SHA3"
6127   "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6128   [(set_attr "type" "crypto_sha512")]
6129 )
6130
6131 (define_insn "aarch64_crypto_sha512su0qv2di"
6132   [(set (match_operand:V2DI 0 "register_operand" "=w")
6133         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6134                       (match_operand:V2DI 2 "register_operand" "w")]
6135          UNSPEC_SHA512SU0))]
6136   "TARGET_SIMD && TARGET_SHA3"
6137   "sha512su0\\t%0.2d, %2.2d"
6138   [(set_attr "type" "crypto_sha512")]
6139 )
6140
6141 (define_insn "aarch64_crypto_sha512su1qv2di"
6142   [(set (match_operand:V2DI 0 "register_operand" "=w")
6143         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6144                       (match_operand:V2DI 2 "register_operand" "w")
6145                       (match_operand:V2DI 3 "register_operand" "w")]
6146          UNSPEC_SHA512SU1))]
6147   "TARGET_SIMD && TARGET_SHA3"
6148   "sha512su1\\t%0.2d, %2.2d, %3.2d"
6149   [(set_attr "type" "crypto_sha512")]
6150 )
6151
6152 ;; sha3
6153
6154 (define_insn "eor3q<mode>4"
6155   [(set (match_operand:VQ_I 0 "register_operand" "=w")
6156         (xor:VQ_I
6157          (xor:VQ_I
6158           (match_operand:VQ_I 2 "register_operand" "w")
6159           (match_operand:VQ_I 3 "register_operand" "w"))
6160          (match_operand:VQ_I 1 "register_operand" "w")))]
6161   "TARGET_SIMD && TARGET_SHA3"
6162   "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6163   [(set_attr "type" "crypto_sha3")]
6164 )
6165
6166 (define_insn "aarch64_rax1qv2di"
6167   [(set (match_operand:V2DI 0 "register_operand" "=w")
6168         (xor:V2DI
6169          (rotate:V2DI
6170           (match_operand:V2DI 2 "register_operand" "w")
6171           (const_int 1))
6172          (match_operand:V2DI 1 "register_operand" "w")))]
6173   "TARGET_SIMD && TARGET_SHA3"
6174   "rax1\\t%0.2d, %1.2d, %2.2d"
6175   [(set_attr "type" "crypto_sha3")]
6176 )
6177
6178 (define_insn "aarch64_xarqv2di"
6179   [(set (match_operand:V2DI 0 "register_operand" "=w")
6180         (rotatert:V2DI
6181          (xor:V2DI
6182           (match_operand:V2DI 1 "register_operand" "%w")
6183           (match_operand:V2DI 2 "register_operand" "w"))
6184          (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6185   "TARGET_SIMD && TARGET_SHA3"
6186   "xar\\t%0.2d, %1.2d, %2.2d, %3"
6187   [(set_attr "type" "crypto_sha3")]
6188 )
6189
6190 (define_insn "bcaxq<mode>4"
6191   [(set (match_operand:VQ_I 0 "register_operand" "=w")
6192         (xor:VQ_I
6193          (and:VQ_I
6194           (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6195           (match_operand:VQ_I 2 "register_operand" "w"))
6196          (match_operand:VQ_I 1 "register_operand" "w")))]
6197   "TARGET_SIMD && TARGET_SHA3"
6198   "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6199   [(set_attr "type" "crypto_sha3")]
6200 )
6201
6202 ;; SM3
6203
6204 (define_insn "aarch64_sm3ss1qv4si"
6205   [(set (match_operand:V4SI 0 "register_operand" "=w")
6206         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6207                       (match_operand:V4SI 2 "register_operand" "w")
6208                       (match_operand:V4SI 3 "register_operand" "w")]
6209          UNSPEC_SM3SS1))]
6210   "TARGET_SIMD && TARGET_SM4"
6211   "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6212   [(set_attr "type" "crypto_sm3")]
6213 )
6214
6215
6216 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6217   [(set (match_operand:V4SI 0 "register_operand" "=w")
6218         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6219                       (match_operand:V4SI 2 "register_operand" "w")
6220                       (match_operand:V4SI 3 "register_operand" "w")
6221                       (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6222          CRYPTO_SM3TT))]
6223   "TARGET_SIMD && TARGET_SM4"
6224   "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6225   [(set_attr "type" "crypto_sm3")]
6226 )
6227
6228 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6229   [(set (match_operand:V4SI 0 "register_operand" "=w")
6230         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6231                       (match_operand:V4SI 2 "register_operand" "w")
6232                       (match_operand:V4SI 3 "register_operand" "w")]
6233          CRYPTO_SM3PART))]
6234   "TARGET_SIMD && TARGET_SM4"
6235   "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6236   [(set_attr "type" "crypto_sm3")]
6237 )
6238
6239 ;; SM4
6240
6241 (define_insn "aarch64_sm4eqv4si"
6242   [(set (match_operand:V4SI 0 "register_operand" "=w")
6243         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6244                       (match_operand:V4SI 2 "register_operand" "w")]
6245          UNSPEC_SM4E))]
6246   "TARGET_SIMD && TARGET_SM4"
6247   "sm4e\\t%0.4s, %2.4s"
6248   [(set_attr "type" "crypto_sm4")]
6249 )
6250
6251 (define_insn "aarch64_sm4ekeyqv4si"
6252   [(set (match_operand:V4SI 0 "register_operand" "=w")
6253         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6254                       (match_operand:V4SI 2 "register_operand" "w")]
6255          UNSPEC_SM4EKEY))]
6256   "TARGET_SIMD && TARGET_SM4"
6257   "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6258   [(set_attr "type" "crypto_sm4")]
6259 )
6260
6261 ;; fp16fml
6262
6263 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6264   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6265         (unspec:VDQSF
6266          [(match_operand:VDQSF 1 "register_operand" "0")
6267           (match_operand:<VFMLA_W> 2 "register_operand" "w")
6268           (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6269          VFMLA16_LOW))]
6270   "TARGET_F16FML"
6271 {
6272   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6273                                             <nunits> * 2, false);
6274   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6275                                             <nunits> * 2, false);
6276
6277   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6278                                                                 operands[1],
6279                                                                 operands[2],
6280                                                                 operands[3],
6281                                                                 p1, p2));
6282   DONE;
6283
6284 })
6285
6286 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6287   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6288         (unspec:VDQSF
6289          [(match_operand:VDQSF 1 "register_operand" "0")
6290           (match_operand:<VFMLA_W> 2 "register_operand" "w")
6291           (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6292          VFMLA16_HIGH))]
6293   "TARGET_F16FML"
6294 {
6295   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6296   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6297
6298   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6299                                                                  operands[1],
6300                                                                  operands[2],
6301                                                                  operands[3],
6302                                                                  p1, p2));
6303   DONE;
6304 })
6305
6306 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6307   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6308         (fma:VDQSF
6309          (float_extend:VDQSF
6310           (vec_select:<VFMLA_SEL_W>
6311            (match_operand:<VFMLA_W> 2 "register_operand" "w")
6312            (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6313          (float_extend:VDQSF
6314           (vec_select:<VFMLA_SEL_W>
6315            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6316            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6317          (match_operand:VDQSF 1 "register_operand" "0")))]
6318   "TARGET_F16FML"
6319   "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6320   [(set_attr "type" "neon_fp_mul_s")]
6321 )
6322
6323 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6324   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6325         (fma:VDQSF
6326          (float_extend:VDQSF
6327           (neg:<VFMLA_SEL_W>
6328            (vec_select:<VFMLA_SEL_W>
6329             (match_operand:<VFMLA_W> 2 "register_operand" "w")
6330             (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6331          (float_extend:VDQSF
6332           (vec_select:<VFMLA_SEL_W>
6333            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6334            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6335          (match_operand:VDQSF 1 "register_operand" "0")))]
6336   "TARGET_F16FML"
6337   "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6338   [(set_attr "type" "neon_fp_mul_s")]
6339 )
6340
6341 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6342   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6343         (fma:VDQSF
6344          (float_extend:VDQSF
6345           (vec_select:<VFMLA_SEL_W>
6346            (match_operand:<VFMLA_W> 2 "register_operand" "w")
6347            (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6348          (float_extend:VDQSF
6349           (vec_select:<VFMLA_SEL_W>
6350            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6351            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6352          (match_operand:VDQSF 1 "register_operand" "0")))]
6353   "TARGET_F16FML"
6354   "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6355   [(set_attr "type" "neon_fp_mul_s")]
6356 )
6357
6358 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6359   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6360         (fma:VDQSF
6361          (float_extend:VDQSF
6362           (neg:<VFMLA_SEL_W>
6363            (vec_select:<VFMLA_SEL_W>
6364             (match_operand:<VFMLA_W> 2 "register_operand" "w")
6365             (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6366          (float_extend:VDQSF
6367           (vec_select:<VFMLA_SEL_W>
6368            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6369            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6370          (match_operand:VDQSF 1 "register_operand" "0")))]
6371   "TARGET_F16FML"
6372   "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6373   [(set_attr "type" "neon_fp_mul_s")]
6374 )
6375
6376 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6377   [(set (match_operand:V2SF 0 "register_operand" "")
6378         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6379                            (match_operand:V4HF 2 "register_operand" "")
6380                            (match_operand:V4HF 3 "register_operand" "")
6381                            (match_operand:SI 4 "aarch64_imm2" "")]
6382          VFMLA16_LOW))]
6383   "TARGET_F16FML"
6384 {
6385     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6386     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6387
6388     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6389                                                             operands[1],
6390                                                             operands[2],
6391                                                             operands[3],
6392                                                             p1, lane));
6393     DONE;
6394 }
6395 )
6396
6397 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6398   [(set (match_operand:V2SF 0 "register_operand" "")
6399         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6400                            (match_operand:V4HF 2 "register_operand" "")
6401                            (match_operand:V4HF 3 "register_operand" "")
6402                            (match_operand:SI 4 "aarch64_imm2" "")]
6403          VFMLA16_HIGH))]
6404   "TARGET_F16FML"
6405 {
6406     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6407     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6408
6409     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6410                                                              operands[1],
6411                                                              operands[2],
6412                                                              operands[3],
6413                                                              p1, lane));
6414     DONE;
6415 })
6416
6417 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6418   [(set (match_operand:V2SF 0 "register_operand" "=w")
6419         (fma:V2SF
6420          (float_extend:V2SF
6421            (vec_select:V2HF
6422             (match_operand:V4HF 2 "register_operand" "w")
6423             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6424          (float_extend:V2SF
6425            (vec_duplicate:V2HF
6426             (vec_select:HF
6427              (match_operand:V4HF 3 "register_operand" "x")
6428              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6429          (match_operand:V2SF 1 "register_operand" "0")))]
6430   "TARGET_F16FML"
6431   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6432   [(set_attr "type" "neon_fp_mul_s")]
6433 )
6434
6435 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6436   [(set (match_operand:V2SF 0 "register_operand" "=w")
6437         (fma:V2SF
6438          (float_extend:V2SF
6439           (neg:V2HF
6440            (vec_select:V2HF
6441             (match_operand:V4HF 2 "register_operand" "w")
6442             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6443          (float_extend:V2SF
6444           (vec_duplicate:V2HF
6445            (vec_select:HF
6446             (match_operand:V4HF 3 "register_operand" "x")
6447             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6448          (match_operand:V2SF 1 "register_operand" "0")))]
6449   "TARGET_F16FML"
6450   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6451   [(set_attr "type" "neon_fp_mul_s")]
6452 )
6453
6454 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6455   [(set (match_operand:V2SF 0 "register_operand" "=w")
6456         (fma:V2SF
6457          (float_extend:V2SF
6458            (vec_select:V2HF
6459             (match_operand:V4HF 2 "register_operand" "w")
6460             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6461          (float_extend:V2SF
6462            (vec_duplicate:V2HF
6463             (vec_select:HF
6464              (match_operand:V4HF 3 "register_operand" "x")
6465              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6466          (match_operand:V2SF 1 "register_operand" "0")))]
6467   "TARGET_F16FML"
6468   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6469   [(set_attr "type" "neon_fp_mul_s")]
6470 )
6471
6472 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6473   [(set (match_operand:V2SF 0 "register_operand" "=w")
6474         (fma:V2SF
6475          (float_extend:V2SF
6476            (neg:V2HF
6477             (vec_select:V2HF
6478              (match_operand:V4HF 2 "register_operand" "w")
6479              (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6480          (float_extend:V2SF
6481            (vec_duplicate:V2HF
6482             (vec_select:HF
6483              (match_operand:V4HF 3 "register_operand" "x")
6484              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6485          (match_operand:V2SF 1 "register_operand" "0")))]
6486   "TARGET_F16FML"
6487   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6488   [(set_attr "type" "neon_fp_mul_s")]
6489 )
6490
6491 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6492   [(set (match_operand:V4SF 0 "register_operand" "")
6493         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6494                            (match_operand:V8HF 2 "register_operand" "")
6495                            (match_operand:V8HF 3 "register_operand" "")
6496                            (match_operand:SI 4 "aarch64_lane_imm3" "")]
6497          VFMLA16_LOW))]
6498   "TARGET_F16FML"
6499 {
6500     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6501     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6502
6503     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6504                                                               operands[1],
6505                                                               operands[2],
6506                                                               operands[3],
6507                                                               p1, lane));
6508     DONE;
6509 })
6510
6511 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6512   [(set (match_operand:V4SF 0 "register_operand" "")
6513         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6514                            (match_operand:V8HF 2 "register_operand" "")
6515                            (match_operand:V8HF 3 "register_operand" "")
6516                            (match_operand:SI 4 "aarch64_lane_imm3" "")]
6517          VFMLA16_HIGH))]
6518   "TARGET_F16FML"
6519 {
6520     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6521     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6522
6523     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6524                                                                operands[1],
6525                                                                operands[2],
6526                                                                operands[3],
6527                                                                p1, lane));
6528     DONE;
6529 })
6530
6531 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6532   [(set (match_operand:V4SF 0 "register_operand" "=w")
6533         (fma:V4SF
6534          (float_extend:V4SF
6535           (vec_select:V4HF
6536             (match_operand:V8HF 2 "register_operand" "w")
6537             (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6538          (float_extend:V4SF
6539           (vec_duplicate:V4HF
6540            (vec_select:HF
6541             (match_operand:V8HF 3 "register_operand" "x")
6542             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6543          (match_operand:V4SF 1 "register_operand" "0")))]
6544   "TARGET_F16FML"
6545   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6546   [(set_attr "type" "neon_fp_mul_s")]
6547 )
6548
6549 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6550   [(set (match_operand:V4SF 0 "register_operand" "=w")
6551         (fma:V4SF
6552           (float_extend:V4SF
6553            (neg:V4HF
6554             (vec_select:V4HF
6555              (match_operand:V8HF 2 "register_operand" "w")
6556              (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6557          (float_extend:V4SF
6558           (vec_duplicate:V4HF
6559            (vec_select:HF
6560             (match_operand:V8HF 3 "register_operand" "x")
6561             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6562          (match_operand:V4SF 1 "register_operand" "0")))]
6563   "TARGET_F16FML"
6564   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6565   [(set_attr "type" "neon_fp_mul_s")]
6566 )
6567
6568 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6569   [(set (match_operand:V4SF 0 "register_operand" "=w")
6570         (fma:V4SF
6571          (float_extend:V4SF
6572           (vec_select:V4HF
6573             (match_operand:V8HF 2 "register_operand" "w")
6574             (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6575          (float_extend:V4SF
6576           (vec_duplicate:V4HF
6577            (vec_select:HF
6578             (match_operand:V8HF 3 "register_operand" "x")
6579             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6580          (match_operand:V4SF 1 "register_operand" "0")))]
6581   "TARGET_F16FML"
6582   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6583   [(set_attr "type" "neon_fp_mul_s")]
6584 )
6585
6586 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6587   [(set (match_operand:V4SF 0 "register_operand" "=w")
6588         (fma:V4SF
6589          (float_extend:V4SF
6590           (neg:V4HF
6591            (vec_select:V4HF
6592             (match_operand:V8HF 2 "register_operand" "w")
6593             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6594          (float_extend:V4SF
6595           (vec_duplicate:V4HF
6596            (vec_select:HF
6597             (match_operand:V8HF 3 "register_operand" "x")
6598             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6599          (match_operand:V4SF 1 "register_operand" "0")))]
6600   "TARGET_F16FML"
6601   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6602   [(set_attr "type" "neon_fp_mul_s")]
6603 )
6604
6605 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6606   [(set (match_operand:V2SF 0 "register_operand" "")
6607         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6608                       (match_operand:V4HF 2 "register_operand" "")
6609                       (match_operand:V8HF 3 "register_operand" "")
6610                       (match_operand:SI 4 "aarch64_lane_imm3" "")]
6611          VFMLA16_LOW))]
6612   "TARGET_F16FML"
6613 {
6614     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6615     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6616
6617     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6618                                                              operands[1],
6619                                                              operands[2],
6620                                                              operands[3],
6621                                                              p1, lane));
6622     DONE;
6623
6624 })
6625
6626 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6627   [(set (match_operand:V2SF 0 "register_operand" "")
6628         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6629                       (match_operand:V4HF 2 "register_operand" "")
6630                       (match_operand:V8HF 3 "register_operand" "")
6631                       (match_operand:SI 4 "aarch64_lane_imm3" "")]
6632          VFMLA16_HIGH))]
6633   "TARGET_F16FML"
6634 {
6635     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6636     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6637
6638     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6639                                                               operands[1],
6640                                                               operands[2],
6641                                                               operands[3],
6642                                                               p1, lane));
6643     DONE;
6644
6645 })
6646
6647 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6648   [(set (match_operand:V2SF 0 "register_operand" "=w")
6649         (fma:V2SF
6650          (float_extend:V2SF
6651            (vec_select:V2HF
6652             (match_operand:V4HF 2 "register_operand" "w")
6653             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6654          (float_extend:V2SF
6655           (vec_duplicate:V2HF
6656            (vec_select:HF
6657             (match_operand:V8HF 3 "register_operand" "x")
6658             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6659          (match_operand:V2SF 1 "register_operand" "0")))]
6660   "TARGET_F16FML"
6661   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6662   [(set_attr "type" "neon_fp_mul_s")]
6663 )
6664
6665 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6666   [(set (match_operand:V2SF 0 "register_operand" "=w")
6667         (fma:V2SF
6668          (float_extend:V2SF
6669           (neg:V2HF
6670            (vec_select:V2HF
6671             (match_operand:V4HF 2 "register_operand" "w")
6672             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6673          (float_extend:V2SF
6674           (vec_duplicate:V2HF
6675            (vec_select:HF
6676             (match_operand:V8HF 3 "register_operand" "x")
6677             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6678          (match_operand:V2SF 1 "register_operand" "0")))]
6679   "TARGET_F16FML"
6680   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6681   [(set_attr "type" "neon_fp_mul_s")]
6682 )
6683
6684 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6685   [(set (match_operand:V2SF 0 "register_operand" "=w")
6686         (fma:V2SF
6687          (float_extend:V2SF
6688            (vec_select:V2HF
6689             (match_operand:V4HF 2 "register_operand" "w")
6690             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6691          (float_extend:V2SF
6692           (vec_duplicate:V2HF
6693            (vec_select:HF
6694             (match_operand:V8HF 3 "register_operand" "x")
6695             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6696          (match_operand:V2SF 1 "register_operand" "0")))]
6697   "TARGET_F16FML"
6698   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6699   [(set_attr "type" "neon_fp_mul_s")]
6700 )
6701
6702 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6703   [(set (match_operand:V2SF 0 "register_operand" "=w")
6704         (fma:V2SF
6705          (float_extend:V2SF
6706           (neg:V2HF
6707            (vec_select:V2HF
6708             (match_operand:V4HF 2 "register_operand" "w")
6709             (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6710          (float_extend:V2SF
6711           (vec_duplicate:V2HF
6712            (vec_select:HF
6713             (match_operand:V8HF 3 "register_operand" "x")
6714             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6715          (match_operand:V2SF 1 "register_operand" "0")))]
6716   "TARGET_F16FML"
6717   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6718   [(set_attr "type" "neon_fp_mul_s")]
6719 )
6720
6721 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6722   [(set (match_operand:V4SF 0 "register_operand" "")
6723         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6724                       (match_operand:V8HF 2 "register_operand" "")
6725                       (match_operand:V4HF 3 "register_operand" "")
6726                       (match_operand:SI 4 "aarch64_imm2" "")]
6727          VFMLA16_LOW))]
6728   "TARGET_F16FML"
6729 {
6730     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6731     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6732
6733     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6734                                                              operands[1],
6735                                                              operands[2],
6736                                                              operands[3],
6737                                                              p1, lane));
6738     DONE;
6739 })
6740
6741 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6742   [(set (match_operand:V4SF 0 "register_operand" "")
6743         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6744                       (match_operand:V8HF 2 "register_operand" "")
6745                       (match_operand:V4HF 3 "register_operand" "")
6746                       (match_operand:SI 4 "aarch64_imm2" "")]
6747          VFMLA16_HIGH))]
6748   "TARGET_F16FML"
6749 {
6750     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6751     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6752
6753     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6754                                                               operands[1],
6755                                                               operands[2],
6756                                                               operands[3],
6757                                                               p1, lane));
6758     DONE;
6759 })
6760
6761 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6762   [(set (match_operand:V4SF 0 "register_operand" "=w")
6763         (fma:V4SF
6764          (float_extend:V4SF
6765           (vec_select:V4HF
6766            (match_operand:V8HF 2 "register_operand" "w")
6767            (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6768          (float_extend:V4SF
6769           (vec_duplicate:V4HF
6770            (vec_select:HF
6771             (match_operand:V4HF 3 "register_operand" "x")
6772             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6773          (match_operand:V4SF 1 "register_operand" "0")))]
6774   "TARGET_F16FML"
6775   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6776   [(set_attr "type" "neon_fp_mul_s")]
6777 )
6778
6779 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6780   [(set (match_operand:V4SF 0 "register_operand" "=w")
6781         (fma:V4SF
6782          (float_extend:V4SF
6783           (neg:V4HF
6784            (vec_select:V4HF
6785             (match_operand:V8HF 2 "register_operand" "w")
6786             (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6787          (float_extend:V4SF
6788           (vec_duplicate:V4HF
6789            (vec_select:HF
6790             (match_operand:V4HF 3 "register_operand" "x")
6791             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6792          (match_operand:V4SF 1 "register_operand" "0")))]
6793   "TARGET_F16FML"
6794   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6795   [(set_attr "type" "neon_fp_mul_s")]
6796 )
6797
6798 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6799   [(set (match_operand:V4SF 0 "register_operand" "=w")
6800         (fma:V4SF
6801          (float_extend:V4SF
6802           (vec_select:V4HF
6803            (match_operand:V8HF 2 "register_operand" "w")
6804            (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6805          (float_extend:V4SF
6806           (vec_duplicate:V4HF
6807            (vec_select:HF
6808             (match_operand:V4HF 3 "register_operand" "x")
6809             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6810          (match_operand:V4SF 1 "register_operand" "0")))]
6811   "TARGET_F16FML"
6812   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6813   [(set_attr "type" "neon_fp_mul_s")]
6814 )
6815
6816 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6817   [(set (match_operand:V4SF 0 "register_operand" "=w")
6818         (fma:V4SF
6819          (float_extend:V4SF
6820           (neg:V4HF
6821            (vec_select:V4HF
6822             (match_operand:V8HF 2 "register_operand" "w")
6823             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6824          (float_extend:V4SF
6825           (vec_duplicate:V4HF
6826            (vec_select:HF
6827             (match_operand:V4HF 3 "register_operand" "x")
6828             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6829          (match_operand:V4SF 1 "register_operand" "0")))]
6830   "TARGET_F16FML"
6831   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6832   [(set_attr "type" "neon_fp_mul_s")]
6833 )
6834
6835 ;; pmull
6836
6837 (define_insn "aarch64_crypto_pmulldi"
6838   [(set (match_operand:TI 0 "register_operand" "=w")
6839         (unspec:TI  [(match_operand:DI 1 "register_operand" "w")
6840                      (match_operand:DI 2 "register_operand" "w")]
6841                     UNSPEC_PMULL))]
6842  "TARGET_SIMD && TARGET_AES"
6843  "pmull\\t%0.1q, %1.1d, %2.1d"
6844   [(set_attr "type" "crypto_pmull")]
6845 )
6846
6847 (define_insn "aarch64_crypto_pmullv2di"
6848  [(set (match_operand:TI 0 "register_operand" "=w")
6849        (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
6850                    (match_operand:V2DI 2 "register_operand" "w")]
6851                   UNSPEC_PMULL2))]
6852   "TARGET_SIMD && TARGET_AES"
6853   "pmull2\\t%0.1q, %1.2d, %2.2d"
6854   [(set_attr "type" "crypto_pmull")]
6855 )