gcc/config/aarch64/aarch64-simd.md

   1 ;; Machine description for AArch64 AdvSIMD architecture.
   2 ;; Copyright (C) 2011-2018 Free Software Foundation, Inc.
   3 ;; Contributed by ARM Ltd.
   4 ;;
   5 ;; This file is part of GCC.
   6 ;;
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published by
   9 ;; the Free Software Foundation; either version 3, or (at your option)
  10 ;; any later version.
  11 ;;
  12 ;; GCC is distributed in the hope that it will be useful, but
  13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 ;; General Public License for more details.
  16 ;;
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 (define_expand "mov<mode>"
  22   [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
  23         (match_operand:VALL_F16 1 "general_operand" ""))]
  24   "TARGET_SIMD"
  25   "
  26   /* Force the operand into a register if it is not an
  27      immediate whose use can be replaced with xzr.
  28      If the mode is 16 bytes wide, then we will be doing
  29      a stp in DI mode, so we check the validity of that.
  30      If the mode is 8 bytes wide, then we will do doing a
  31      normal str, so the check need not apply.  */
  32   if (GET_CODE (operands[0]) == MEM
  33       && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
  34            && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
  35                 && aarch64_mem_pair_operand (operands[0], DImode))
  36                || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
  37       operands[1] = force_reg (<MODE>mode, operands[1]);
  38   "
  39 )
  40
  41 (define_expand "movmisalign<mode>"
  42   [(set (match_operand:VALL 0 "nonimmediate_operand" "")
  43         (match_operand:VALL 1 "general_operand" ""))]
  44   "TARGET_SIMD"
  45 {
  46   /* This pattern is not permitted to fail during expansion: if both arguments
  47      are non-registers (e.g. memory := constant, which can be created by the
  48      auto-vectorizer), force operand 1 into a register.  */
  49   if (!register_operand (operands[0], <MODE>mode)
  50       && !register_operand (operands[1], <MODE>mode))
  51     operands[1] = force_reg (<MODE>mode, operands[1]);
  52 })
  53
  54 (define_insn "aarch64_simd_dup<mode>"
  55   [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
  56         (vec_duplicate:VDQ_I
  57           (match_operand:<VEL> 1 "register_operand" "w,?r")))]
  58   "TARGET_SIMD"
  59   "@
  60    dup\\t%0.<Vtype>, %1.<Vetype>[0]
  61    dup\\t%0.<Vtype>, %<vw>1"
  62   [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
  63 )
  64
  65 (define_insn "aarch64_simd_dup<mode>"
  66   [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
  67         (vec_duplicate:VDQF_F16
  68           (match_operand:<VEL> 1 "register_operand" "w")))]
  69   "TARGET_SIMD"
  70   "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
  71   [(set_attr "type" "neon_dup<q>")]
  72 )
  73
  74 (define_insn "aarch64_dup_lane<mode>"
  75   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
  76         (vec_duplicate:VALL_F16
  77           (vec_select:<VEL>
  78             (match_operand:VALL_F16 1 "register_operand" "w")
  79             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  80           )))]
  81   "TARGET_SIMD"
  82   {
  83     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
  84     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
  85   }
  86   [(set_attr "type" "neon_dup<q>")]
  87 )
  88
  89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
  90   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
  91         (vec_duplicate:VALL_F16_NO_V2Q
  92           (vec_select:<VEL>
  93             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
  94             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  95           )))]
  96   "TARGET_SIMD"
  97   {
  98     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
  99     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
 100   }
 101   [(set_attr "type" "neon_dup<q>")]
 102 )
 103
 104 (define_insn "*aarch64_simd_mov<VD:mode>"
 105   [(set (match_operand:VD 0 "nonimmediate_operand"
 106                 "=w, m,  m,  w, ?r, ?w, ?r, w")
 107         (match_operand:VD 1 "general_operand"
 108                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 109   "TARGET_SIMD
 110    && (register_operand (operands[0], <MODE>mode)
 111        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 112 {
 113    switch (which_alternative)
 114      {
 115      case 0: return "ldr\t%d0, %1";
 116      case 1: return "str\txzr, %0";
 117      case 2: return "str\t%d1, %0";
 118      case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 119      case 4: return "umov\t%0, %1.d[0]";
 120      case 5: return "fmov\t%d0, %1";
 121      case 6: return "mov\t%0, %1";
 122      case 7:
 123         return aarch64_output_simd_mov_immediate (operands[1], 64);
 124      default: gcc_unreachable ();
 125      }
 126 }
 127   [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
 128                      neon_logic<q>, neon_to_gp<q>, f_mcr,\
 129                      mov_reg, neon_move<q>")]
 130 )
 131
 132 (define_insn "*aarch64_simd_mov<VQ:mode>"
 133   [(set (match_operand:VQ 0 "nonimmediate_operand"
 134                 "=w, Umn,  m,  w, ?r, ?w, ?r, w")
 135         (match_operand:VQ 1 "general_operand"
 136                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 137   "TARGET_SIMD
 138    && (register_operand (operands[0], <MODE>mode)
 139        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 140 {
 141   switch (which_alternative)
 142     {
 143     case 0:
 144         return "ldr\t%q0, %1";
 145     case 1:
 146         return "stp\txzr, xzr, %0";
 147     case 2:
 148         return "str\t%q1, %0";
 149     case 3:
 150         return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 151     case 4:
 152     case 5:
 153     case 6:
 154         return "#";
 155     case 7:
 156         return aarch64_output_simd_mov_immediate (operands[1], 128);
 157     default:
 158         gcc_unreachable ();
 159     }
 160 }
 161   [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
 162                      neon_logic<q>, multiple, multiple,\
 163                      multiple, neon_move<q>")
 164    (set_attr "length" "4,4,4,4,8,8,8,4")]
 165 )
 166
 167 ;; When storing lane zero we can use the normal STR and its more permissive
 168 ;; addressing modes.
 169
 170 (define_insn "aarch64_store_lane0<mode>"
 171   [(set (match_operand:<VEL> 0 "memory_operand" "=m")
 172         (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
 173                         (parallel [(match_operand 2 "const_int_operand" "n")])))]
 174   "TARGET_SIMD
 175    && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
 176   "str\\t%<Vetype>1, %0"
 177   [(set_attr "type" "neon_store1_1reg<q>")]
 178 )
 179
 180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
 181   [(set (match_operand:DREG 0 "register_operand" "=w")
 182         (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
 183    (set (match_operand:DREG2 2 "register_operand" "=w")
 184         (match_operand:DREG2 3 "memory_operand" "m"))]
 185   "TARGET_SIMD
 186    && rtx_equal_p (XEXP (operands[3], 0),
 187                    plus_constant (Pmode,
 188                                   XEXP (operands[1], 0),
 189                                   GET_MODE_SIZE (<DREG:MODE>mode)))"
 190   "ldp\\t%d0, %d2, %1"
 191   [(set_attr "type" "neon_ldp")]
 192 )
 193
 194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
 195   [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
 196         (match_operand:DREG 1 "register_operand" "w"))
 197    (set (match_operand:DREG2 2 "memory_operand" "=m")
 198         (match_operand:DREG2 3 "register_operand" "w"))]
 199   "TARGET_SIMD
 200    && rtx_equal_p (XEXP (operands[2], 0),
 201                    plus_constant (Pmode,
 202                                   XEXP (operands[0], 0),
 203                                   GET_MODE_SIZE (<DREG:MODE>mode)))"
 204   "stp\\t%d1, %d3, %0"
 205   [(set_attr "type" "neon_stp")]
 206 )
 207
 208 (define_insn "load_pair<VQ:mode><VQ2:mode>"
 209   [(set (match_operand:VQ 0 "register_operand" "=w")
 210         (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
 211    (set (match_operand:VQ2 2 "register_operand" "=w")
 212         (match_operand:VQ2 3 "memory_operand" "m"))]
 213   "TARGET_SIMD
 214     && rtx_equal_p (XEXP (operands[3], 0),
 215                     plus_constant (Pmode,
 216                                XEXP (operands[1], 0),
 217                                GET_MODE_SIZE (<VQ:MODE>mode)))"
 218   "ldp\\t%q0, %q2, %1"
 219   [(set_attr "type" "neon_ldp_q")]
 220 )
 221
 222 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
 223   [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
 224         (match_operand:VQ 1 "register_operand" "w"))
 225    (set (match_operand:VQ2 2 "memory_operand" "=m")
 226         (match_operand:VQ2 3 "register_operand" "w"))]
 227   "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
 228                 plus_constant (Pmode,
 229                                XEXP (operands[0], 0),
 230                                GET_MODE_SIZE (<VQ:MODE>mode)))"
 231   "stp\\t%q1, %q3, %0"
 232   [(set_attr "type" "neon_stp_q")]
 233 )
 234
 235
 236 (define_split
 237   [(set (match_operand:VQ 0 "register_operand" "")
 238       (match_operand:VQ 1 "register_operand" ""))]
 239   "TARGET_SIMD && reload_completed
 240    && GP_REGNUM_P (REGNO (operands[0]))
 241    && GP_REGNUM_P (REGNO (operands[1]))"
 242   [(const_int 0)]
 243 {
 244   aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
 245   DONE;
 246 })
 247
 248 (define_split
 249   [(set (match_operand:VQ 0 "register_operand" "")
 250         (match_operand:VQ 1 "register_operand" ""))]
 251   "TARGET_SIMD && reload_completed
 252    && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
 253        || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
 254   [(const_int 0)]
 255 {
 256   aarch64_split_simd_move (operands[0], operands[1]);
 257   DONE;
 258 })
 259
 260 (define_expand "aarch64_split_simd_mov<mode>"
 261   [(set (match_operand:VQ 0)
 262         (match_operand:VQ 1))]
 263   "TARGET_SIMD"
 264   {
 265     rtx dst = operands[0];
 266     rtx src = operands[1];
 267
 268     if (GP_REGNUM_P (REGNO (src)))
 269       {
 270         rtx src_low_part = gen_lowpart (<VHALF>mode, src);
 271         rtx src_high_part = gen_highpart (<VHALF>mode, src);
 272
 273         emit_insn
 274           (gen_move_lo_quad_<mode> (dst, src_low_part));
 275         emit_insn
 276           (gen_move_hi_quad_<mode> (dst, src_high_part));
 277       }
 278
 279     else
 280       {
 281         rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
 282         rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
 283         rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
 284         rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
 285
 286         emit_insn
 287           (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
 288         emit_insn
 289           (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
 290       }
 291     DONE;
 292   }
 293 )
 294
 295 (define_insn "aarch64_simd_mov_from_<mode>low"
 296   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
 297         (vec_select:<VHALF>
 298           (match_operand:VQ 1 "register_operand" "w")
 299           (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
 300   "TARGET_SIMD && reload_completed"
 301   "umov\t%0, %1.d[0]"
 302   [(set_attr "type" "neon_to_gp<q>")
 303    (set_attr "length" "4")
 304   ])
 305
 306 (define_insn "aarch64_simd_mov_from_<mode>high"
 307   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
 308         (vec_select:<VHALF>
 309           (match_operand:VQ 1 "register_operand" "w")
 310           (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
 311   "TARGET_SIMD && reload_completed"
 312   "umov\t%0, %1.d[1]"
 313   [(set_attr "type" "neon_to_gp<q>")
 314    (set_attr "length" "4")
 315   ])
 316
 317 (define_insn "orn<mode>3"
 318  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 319        (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 320                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 321  "TARGET_SIMD"
 322  "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 323   [(set_attr "type" "neon_logic<q>")]
 324 )
 325
 326 (define_insn "bic<mode>3"
 327  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 328        (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 329                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 330  "TARGET_SIMD"
 331  "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 332   [(set_attr "type" "neon_logic<q>")]
 333 )
 334
 335 (define_insn "add<mode>3"
 336   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 337         (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 338                   (match_operand:VDQ_I 2 "register_operand" "w")))]
 339   "TARGET_SIMD"
 340   "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 341   [(set_attr "type" "neon_add<q>")]
 342 )
 343
 344 (define_insn "sub<mode>3"
 345   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 346         (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 347                    (match_operand:VDQ_I 2 "register_operand" "w")))]
 348   "TARGET_SIMD"
 349   "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 350   [(set_attr "type" "neon_sub<q>")]
 351 )
 352
 353 (define_insn "mul<mode>3"
 354   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 355         (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
 356                    (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
 357   "TARGET_SIMD"
 358   "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 359   [(set_attr "type" "neon_mul_<Vetype><q>")]
 360 )
 361
 362 (define_insn "bswap<mode>2"
 363   [(set (match_operand:VDQHSD 0 "register_operand" "=w")
 364         (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
 365   "TARGET_SIMD"
 366   "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
 367   [(set_attr "type" "neon_rev<q>")]
 368 )
 369
 370 (define_insn "aarch64_rbit<mode>"
 371   [(set (match_operand:VB 0 "register_operand" "=w")
 372         (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
 373                    UNSPEC_RBIT))]
 374   "TARGET_SIMD"
 375   "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
 376   [(set_attr "type" "neon_rbit")]
 377 )
 378
 379 (define_expand "ctz<mode>2"
 380   [(set (match_operand:VS 0 "register_operand")
 381         (ctz:VS (match_operand:VS 1 "register_operand")))]
 382   "TARGET_SIMD"
 383   {
 384      emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
 385      rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
 386                                              <MODE>mode, 0);
 387      emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
 388      emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
 389      DONE;
 390   }
 391 )
 392
 393 (define_expand "xorsign<mode>3"
 394   [(match_operand:VHSDF 0 "register_operand")
 395    (match_operand:VHSDF 1 "register_operand")
 396    (match_operand:VHSDF 2 "register_operand")]
 397   "TARGET_SIMD"
 398 {
 399
 400   machine_mode imode = <V_INT_EQUIV>mode;
 401   rtx v_bitmask = gen_reg_rtx (imode);
 402   rtx op1x = gen_reg_rtx (imode);
 403   rtx op2x = gen_reg_rtx (imode);
 404
 405   rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
 406   rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
 407
 408   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 409
 410   emit_move_insn (v_bitmask,
 411                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 412                                                      HOST_WIDE_INT_M1U << bits));
 413
 414   emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
 415   emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
 416   emit_move_insn (operands[0],
 417                   lowpart_subreg (<MODE>mode, op1x, imode));
 418   DONE;
 419 }
 420 )
 421
 422 ;; These instructions map to the __builtins for the Dot Product operations.
 423 (define_insn "aarch64_<sur>dot<vsi2qi>"
 424   [(set (match_operand:VS 0 "register_operand" "=w")
 425         (plus:VS (match_operand:VS 1 "register_operand" "0")
 426                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 427                             (match_operand:<VSI2QI> 3 "register_operand" "w")]
 428                 DOTPROD)))]
 429   "TARGET_DOTPROD"
 430   "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
 431   [(set_attr "type" "neon_dot")]
 432 )
 433
 434 ;; These expands map to the Dot Product optab the vectorizer checks for.
 435 ;; The auto-vectorizer expects a dot product builtin that also does an
 436 ;; accumulation into the provided register.
 437 ;; Given the following pattern
 438 ;;
 439 ;; for (i=0; i<len; i++) {
 440 ;;     c = a[i] * b[i];
 441 ;;     r += c;
 442 ;; }
 443 ;; return result;
 444 ;;
 445 ;; This can be auto-vectorized to
 446 ;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
 447 ;;
 448 ;; given enough iterations.  However the vectorizer can keep unrolling the loop
 449 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
 450 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
 451 ;; ...
 452 ;;
 453 ;; and so the vectorizer provides r, in which the result has to be accumulated.
 454 (define_expand "<sur>dot_prod<vsi2qi>"
 455   [(set (match_operand:VS 0 "register_operand")
 456         (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
 457                             (match_operand:<VSI2QI> 2 "register_operand")]
 458                  DOTPROD)
 459                 (match_operand:VS 3 "register_operand")))]
 460   "TARGET_DOTPROD"
 461 {
 462   emit_insn (
 463     gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
 464                                     operands[2]));
 465   emit_insn (gen_rtx_SET (operands[0], operands[3]));
 466   DONE;
 467 })
 468
 469 ;; These instructions map to the __builtins for the Dot Product
 470 ;; indexed operations.
 471 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
 472   [(set (match_operand:VS 0 "register_operand" "=w")
 473         (plus:VS (match_operand:VS 1 "register_operand" "0")
 474                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 475                             (match_operand:V8QI 3 "register_operand" "<h_con>")
 476                             (match_operand:SI 4 "immediate_operand" "i")]
 477                 DOTPROD)))]
 478   "TARGET_DOTPROD"
 479   {
 480     operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
 481     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 482   }
 483   [(set_attr "type" "neon_dot")]
 484 )
 485
 486 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
 487   [(set (match_operand:VS 0 "register_operand" "=w")
 488         (plus:VS (match_operand:VS 1 "register_operand" "0")
 489                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 490                             (match_operand:V16QI 3 "register_operand" "<h_con>")
 491                             (match_operand:SI 4 "immediate_operand" "i")]
 492                 DOTPROD)))]
 493   "TARGET_DOTPROD"
 494   {
 495     operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
 496     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 497   }
 498   [(set_attr "type" "neon_dot")]
 499 )
 500
 501 (define_expand "copysign<mode>3"
 502   [(match_operand:VHSDF 0 "register_operand")
 503    (match_operand:VHSDF 1 "register_operand")
 504    (match_operand:VHSDF 2 "register_operand")]
 505   "TARGET_FLOAT && TARGET_SIMD"
 506 {
 507   rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
 508   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 509
 510   emit_move_insn (v_bitmask,
 511                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 512                                                      HOST_WIDE_INT_M1U << bits));
 513   emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
 514                                          operands[2], operands[1]));
 515   DONE;
 516 }
 517 )
 518
 519 (define_insn "*aarch64_mul3_elt<mode>"
 520  [(set (match_operand:VMUL 0 "register_operand" "=w")
 521     (mult:VMUL
 522       (vec_duplicate:VMUL
 523           (vec_select:<VEL>
 524             (match_operand:VMUL 1 "register_operand" "<h_con>")
 525             (parallel [(match_operand:SI 2 "immediate_operand")])))
 526       (match_operand:VMUL 3 "register_operand" "w")))]
 527   "TARGET_SIMD"
 528   {
 529     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
 530     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 531   }
 532   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 533 )
 534
 535 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
 536   [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
 537      (mult:VMUL_CHANGE_NLANES
 538        (vec_duplicate:VMUL_CHANGE_NLANES
 539           (vec_select:<VEL>
 540             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
 541             (parallel [(match_operand:SI 2 "immediate_operand")])))
 542       (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
 543   "TARGET_SIMD"
 544   {
 545     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
 546     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 547   }
 548   [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
 549 )
 550
 551 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
 552  [(set (match_operand:VMUL 0 "register_operand" "=w")
 553     (mult:VMUL
 554       (vec_duplicate:VMUL
 555             (match_operand:<VEL> 1 "register_operand" "<h_con>"))
 556       (match_operand:VMUL 2 "register_operand" "w")))]
 557   "TARGET_SIMD"
 558   "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
 559   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 560 )
 561
 562 (define_insn "aarch64_rsqrte<mode>"
 563   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 564         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
 565                      UNSPEC_RSQRTE))]
 566   "TARGET_SIMD"
 567   "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 568   [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
 569
 570 (define_insn "aarch64_rsqrts<mode>"
 571   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 572         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
 573                             (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
 574          UNSPEC_RSQRTS))]
 575   "TARGET_SIMD"
 576   "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 577   [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
 578
 579 (define_expand "rsqrt<mode>2"
 580   [(set (match_operand:VALLF 0 "register_operand" "=w")
 581         (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
 582                      UNSPEC_RSQRT))]
 583   "TARGET_SIMD"
 584 {
 585   aarch64_emit_approx_sqrt (operands[0], operands[1], true);
 586   DONE;
 587 })
 588
 589 (define_insn "*aarch64_mul3_elt_to_64v2df"
 590   [(set (match_operand:DF 0 "register_operand" "=w")
 591      (mult:DF
 592        (vec_select:DF
 593          (match_operand:V2DF 1 "register_operand" "w")
 594          (parallel [(match_operand:SI 2 "immediate_operand")]))
 595        (match_operand:DF 3 "register_operand" "w")))]
 596   "TARGET_SIMD"
 597   {
 598     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
 599     return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
 600   }
 601   [(set_attr "type" "neon_fp_mul_d_scalar_q")]
 602 )
 603
 604 (define_insn "neg<mode>2"
 605   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 606         (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 607   "TARGET_SIMD"
 608   "neg\t%0.<Vtype>, %1.<Vtype>"
 609   [(set_attr "type" "neon_neg<q>")]
 610 )
 611
 612 (define_insn "abs<mode>2"
 613   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 614         (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 615   "TARGET_SIMD"
 616   "abs\t%0.<Vtype>, %1.<Vtype>"
 617   [(set_attr "type" "neon_abs<q>")]
 618 )
 619
 620 ;; The intrinsic version of integer ABS must not be allowed to
 621 ;; combine with any operation with an integerated ABS step, such
 622 ;; as SABD.
 623 (define_insn "aarch64_abs<mode>"
 624   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
 625           (unspec:VSDQ_I_DI
 626             [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
 627            UNSPEC_ABS))]
 628   "TARGET_SIMD"
 629   "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 630   [(set_attr "type" "neon_abs<q>")]
 631 )
 632
 633 (define_insn "abd<mode>_3"
 634   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 635         (abs:VDQ_BHSI (minus:VDQ_BHSI
 636                        (match_operand:VDQ_BHSI 1 "register_operand" "w")
 637                        (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
 638   "TARGET_SIMD"
 639   "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 640   [(set_attr "type" "neon_abd<q>")]
 641 )
 642
 643 (define_insn "aarch64_<sur>abdl2<mode>_3"
 644   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 645         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 646                           (match_operand:VDQV_S 2 "register_operand" "w")]
 647         ABDL2))]
 648   "TARGET_SIMD"
 649   "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
 650   [(set_attr "type" "neon_abd<q>")]
 651 )
 652
 653 (define_insn "aarch64_<sur>abal<mode>_4"
 654   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 655         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 656                           (match_operand:VDQV_S 2 "register_operand" "w")
 657                          (match_operand:<VDBLW> 3 "register_operand" "0")]
 658         ABAL))]
 659   "TARGET_SIMD"
 660   "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
 661   [(set_attr "type" "neon_arith_acc<q>")]
 662 )
 663
 664 (define_insn "aarch64_<sur>adalp<mode>_3"
 665   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 666         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 667                           (match_operand:<VDBLW> 2 "register_operand" "0")]
 668         ADALP))]
 669   "TARGET_SIMD"
 670   "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
 671   [(set_attr "type" "neon_reduc_add<q>")]
 672 )
 673
 674 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
 675 ;; inputs in operands 1 and 2.  The sequence also has to perform a widening
 676 ;; reduction of the difference into a V4SI vector and accumulate that into
 677 ;; operand 3 before copying that into the result operand 0.
 678 ;; Perform that with a sequence of:
 679 ;; UABDL2       tmp.8h, op1.16b, op2.16b
 680 ;; UABAL        tmp.8h, op1.16b, op2.16b
 681 ;; UADALP       op3.4s, tmp.8h
 682 ;; MOV          op0, op3 // should be eliminated in later passes.
 683 ;; The signed version just uses the signed variants of the above instructions.
 684
 685 (define_expand "<sur>sadv16qi"
 686   [(use (match_operand:V4SI 0 "register_operand"))
 687    (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
 688                   (use (match_operand:V16QI 2 "register_operand"))] ABAL)
 689    (use (match_operand:V4SI 3 "register_operand"))]
 690   "TARGET_SIMD"
 691   {
 692     rtx reduc = gen_reg_rtx (V8HImode);
 693     emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
 694                                                operands[2]));
 695     emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
 696                                               operands[2], reduc));
 697     emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
 698                                               operands[3]));
 699     emit_move_insn (operands[0], operands[3]);
 700     DONE;
 701   }
 702 )
 703
 704 (define_insn "aba<mode>_3"
 705   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 706         (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
 707                          (match_operand:VDQ_BHSI 1 "register_operand" "w")
 708                          (match_operand:VDQ_BHSI 2 "register_operand" "w")))
 709                        (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
 710   "TARGET_SIMD"
 711   "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 712   [(set_attr "type" "neon_arith_acc<q>")]
 713 )
 714
 715 (define_insn "fabd<mode>3"
 716   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 717         (abs:VHSDF_HSDF
 718           (minus:VHSDF_HSDF
 719             (match_operand:VHSDF_HSDF 1 "register_operand" "w")
 720             (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
 721   "TARGET_SIMD"
 722   "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 723   [(set_attr "type" "neon_fp_abd_<stype><q>")]
 724 )
 725
 726 ;; For AND (vector, register) and BIC (vector, immediate)
 727 (define_insn "and<mode>3"
 728   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
 729         (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
 730                    (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
 731   "TARGET_SIMD"
 732   {
 733     switch (which_alternative)
 734       {
 735       case 0:
 736         return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
 737       case 1:
 738         return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
 739                                                   AARCH64_CHECK_BIC);
 740       default:
 741         gcc_unreachable ();
 742       }
 743   }
 744   [(set_attr "type" "neon_logic<q>")]
 745 )
 746
 747 ;; For ORR (vector, register) and ORR (vector, immediate)
 748 (define_insn "ior<mode>3"
 749   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
 750         (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
 751                    (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
 752   "TARGET_SIMD"
 753   {
 754     switch (which_alternative)
 755       {
 756       case 0:
 757         return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
 758       case 1:
 759         return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
 760                                                   AARCH64_CHECK_ORR);
 761       default:
 762         gcc_unreachable ();
 763       }
 764   }
 765   [(set_attr "type" "neon_logic<q>")]
 766 )
 767
 768 (define_insn "xor<mode>3"
 769   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 770         (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 771                  (match_operand:VDQ_I 2 "register_operand" "w")))]
 772   "TARGET_SIMD"
 773   "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
 774   [(set_attr "type" "neon_logic<q>")]
 775 )
 776
 777 (define_insn "one_cmpl<mode>2"
 778   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 779         (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 780   "TARGET_SIMD"
 781   "not\t%0.<Vbtype>, %1.<Vbtype>"
 782   [(set_attr "type" "neon_logic<q>")]
 783 )
 784
 785 (define_insn "aarch64_simd_vec_set<mode>"
 786   [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
 787         (vec_merge:VALL_F16
 788             (vec_duplicate:VALL_F16
 789                 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
 790             (match_operand:VALL_F16 3 "register_operand" "0,0,0")
 791             (match_operand:SI 2 "immediate_operand" "i,i,i")))]
 792   "TARGET_SIMD"
 793   {
 794    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 795    operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
 796    switch (which_alternative)
 797      {
 798      case 0:
 799         return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
 800      case 1:
 801         return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
 802      case 2:
 803         return "ld1\\t{%0.<Vetype>}[%p2], %1";
 804      default:
 805         gcc_unreachable ();
 806      }
 807   }
 808   [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
 809 )
 810
 811 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
 812   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
 813         (vec_merge:VALL_F16
 814             (vec_duplicate:VALL_F16
 815               (vec_select:<VEL>
 816                 (match_operand:VALL_F16 3 "register_operand" "w")
 817                 (parallel
 818                   [(match_operand:SI 4 "immediate_operand" "i")])))
 819             (match_operand:VALL_F16 1 "register_operand" "0")
 820             (match_operand:SI 2 "immediate_operand" "i")))]
 821   "TARGET_SIMD"
 822   {
 823     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 824     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
 825     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
 826
 827     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
 828   }
 829   [(set_attr "type" "neon_ins<q>")]
 830 )
 831
 832 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
 833   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
 834         (vec_merge:VALL_F16_NO_V2Q
 835             (vec_duplicate:VALL_F16_NO_V2Q
 836               (vec_select:<VEL>
 837                 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
 838                 (parallel
 839                   [(match_operand:SI 4 "immediate_operand" "i")])))
 840             (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
 841             (match_operand:SI 2 "immediate_operand" "i")))]
 842   "TARGET_SIMD"
 843   {
 844     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 845     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
 846     operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
 847                                            INTVAL (operands[4]));
 848
 849     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
 850   }
 851   [(set_attr "type" "neon_ins<q>")]
 852 )
 853
 854 (define_insn "aarch64_simd_lshr<mode>"
 855  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 856        (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 857                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
 858  "TARGET_SIMD"
 859  "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
 860   [(set_attr "type" "neon_shift_imm<q>")]
 861 )
 862
 863 (define_insn "aarch64_simd_ashr<mode>"
 864  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 865        (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 866                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
 867  "TARGET_SIMD"
 868  "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
 869   [(set_attr "type" "neon_shift_imm<q>")]
 870 )
 871
 872 (define_insn "aarch64_simd_imm_shl<mode>"
 873  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 874        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 875                    (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm" "Dl")))]
 876  "TARGET_SIMD"
 877   "shl\t%0.<Vtype>, %1.<Vtype>, %2"
 878   [(set_attr "type" "neon_shift_imm<q>")]
 879 )
 880
 881 (define_insn "aarch64_simd_reg_sshl<mode>"
 882  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 883        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 884                    (match_operand:VDQ_I 2 "register_operand" "w")))]
 885  "TARGET_SIMD"
 886  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 887   [(set_attr "type" "neon_shift_reg<q>")]
 888 )
 889
 890 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
 891  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 892        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
 893                     (match_operand:VDQ_I 2 "register_operand" "w")]
 894                    UNSPEC_ASHIFT_UNSIGNED))]
 895  "TARGET_SIMD"
 896  "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 897   [(set_attr "type" "neon_shift_reg<q>")]
 898 )
 899
 900 (define_insn "aarch64_simd_reg_shl<mode>_signed"
 901  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 902        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
 903                     (match_operand:VDQ_I 2 "register_operand" "w")]
 904                    UNSPEC_ASHIFT_SIGNED))]
 905  "TARGET_SIMD"
 906  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 907   [(set_attr "type" "neon_shift_reg<q>")]
 908 )
 909
 910 (define_expand "ashl<mode>3"
 911   [(match_operand:VDQ_I 0 "register_operand" "")
 912    (match_operand:VDQ_I 1 "register_operand" "")
 913    (match_operand:SI  2 "general_operand" "")]
 914  "TARGET_SIMD"
 915 {
 916   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
 917   int shift_amount;
 918
 919   if (CONST_INT_P (operands[2]))
 920     {
 921       shift_amount = INTVAL (operands[2]);
 922       if (shift_amount >= 0 && shift_amount < bit_width)
 923         {
 924           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 925                                                        shift_amount);
 926           emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
 927                                                      operands[1],
 928                                                      tmp));
 929           DONE;
 930         }
 931       else
 932         {
 933           operands[2] = force_reg (SImode, operands[2]);
 934         }
 935     }
 936   else if (MEM_P (operands[2]))
 937     {
 938       operands[2] = force_reg (SImode, operands[2]);
 939     }
 940
 941   if (REG_P (operands[2]))
 942     {
 943       rtx tmp = gen_reg_rtx (<MODE>mode);
 944       emit_insn (gen_aarch64_simd_dup<mode> (tmp,
 945                                              convert_to_mode (<VEL>mode,
 946                                                               operands[2],
 947                                                               0)));
 948       emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
 949                                                   tmp));
 950       DONE;
 951     }
 952   else
 953     FAIL;
 954 }
 955 )
 956
 957 (define_expand "lshr<mode>3"
 958   [(match_operand:VDQ_I 0 "register_operand" "")
 959    (match_operand:VDQ_I 1 "register_operand" "")
 960    (match_operand:SI  2 "general_operand" "")]
 961  "TARGET_SIMD"
 962 {
 963   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
 964   int shift_amount;
 965
 966   if (CONST_INT_P (operands[2]))
 967     {
 968       shift_amount = INTVAL (operands[2]);
 969       if (shift_amount > 0 && shift_amount <= bit_width)
 970         {
 971           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 972                                                        shift_amount);
 973           emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
 974                                                   operands[1],
 975                                                   tmp));
 976           DONE;
 977         }
 978       else
 979         operands[2] = force_reg (SImode, operands[2]);
 980     }
 981   else if (MEM_P (operands[2]))
 982     {
 983       operands[2] = force_reg (SImode, operands[2]);
 984     }
 985
 986   if (REG_P (operands[2]))
 987     {
 988       rtx tmp = gen_reg_rtx (SImode);
 989       rtx tmp1 = gen_reg_rtx (<MODE>mode);
 990       emit_insn (gen_negsi2 (tmp, operands[2]));
 991       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
 992                                              convert_to_mode (<VEL>mode,
 993                                                               tmp, 0)));
 994       emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
 995                                                           operands[1],
 996                                                           tmp1));
 997       DONE;
 998     }
 999   else
1000     FAIL;
1001 }
1002 )
1003
1004 (define_expand "ashr<mode>3"
1005   [(match_operand:VDQ_I 0 "register_operand" "")
1006    (match_operand:VDQ_I 1 "register_operand" "")
1007    (match_operand:SI  2 "general_operand" "")]
1008  "TARGET_SIMD"
1009 {
1010   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1011   int shift_amount;
1012
1013   if (CONST_INT_P (operands[2]))
1014     {
1015       shift_amount = INTVAL (operands[2]);
1016       if (shift_amount > 0 && shift_amount <= bit_width)
1017         {
1018           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1019                                                        shift_amount);
1020           emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1021                                                   operands[1],
1022                                                   tmp));
1023           DONE;
1024         }
1025       else
1026         operands[2] = force_reg (SImode, operands[2]);
1027     }
1028   else if (MEM_P (operands[2]))
1029     {
1030       operands[2] = force_reg (SImode, operands[2]);
1031     }
1032
1033   if (REG_P (operands[2]))
1034     {
1035       rtx tmp = gen_reg_rtx (SImode);
1036       rtx tmp1 = gen_reg_rtx (<MODE>mode);
1037       emit_insn (gen_negsi2 (tmp, operands[2]));
1038       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1039                                              convert_to_mode (<VEL>mode,
1040                                                               tmp, 0)));
1041       emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
1042                                                         operands[1],
1043                                                         tmp1));
1044       DONE;
1045     }
1046   else
1047     FAIL;
1048 }
1049 )
1050
1051 (define_expand "vashl<mode>3"
1052  [(match_operand:VDQ_I 0 "register_operand" "")
1053   (match_operand:VDQ_I 1 "register_operand" "")
1054   (match_operand:VDQ_I 2 "register_operand" "")]
1055  "TARGET_SIMD"
1056 {
1057   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1058                                               operands[2]));
1059   DONE;
1060 })
1061
1062 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1063 ;; Negating individual lanes most certainly offsets the
1064 ;; gain from vectorization.
1065 (define_expand "vashr<mode>3"
1066  [(match_operand:VDQ_BHSI 0 "register_operand" "")
1067   (match_operand:VDQ_BHSI 1 "register_operand" "")
1068   (match_operand:VDQ_BHSI 2 "register_operand" "")]
1069  "TARGET_SIMD"
1070 {
1071   rtx neg = gen_reg_rtx (<MODE>mode);
1072   emit (gen_neg<mode>2 (neg, operands[2]));
1073   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1074                                                     neg));
1075   DONE;
1076 })
1077
1078 ;; DI vector shift
1079 (define_expand "aarch64_ashr_simddi"
1080   [(match_operand:DI 0 "register_operand" "=w")
1081    (match_operand:DI 1 "register_operand" "w")
1082    (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1083   "TARGET_SIMD"
1084   {
1085     /* An arithmetic shift right by 64 fills the result with copies of the sign
1086        bit, just like asr by 63 - however the standard pattern does not handle
1087        a shift by 64.  */
1088     if (INTVAL (operands[2]) == 64)
1089       operands[2] = GEN_INT (63);
1090     emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1091     DONE;
1092   }
1093 )
1094
1095 (define_expand "vlshr<mode>3"
1096  [(match_operand:VDQ_BHSI 0 "register_operand" "")
1097   (match_operand:VDQ_BHSI 1 "register_operand" "")
1098   (match_operand:VDQ_BHSI 2 "register_operand" "")]
1099  "TARGET_SIMD"
1100 {
1101   rtx neg = gen_reg_rtx (<MODE>mode);
1102   emit (gen_neg<mode>2 (neg, operands[2]));
1103   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1104                                                       neg));
1105   DONE;
1106 })
1107
1108 (define_expand "aarch64_lshr_simddi"
1109   [(match_operand:DI 0 "register_operand" "=w")
1110    (match_operand:DI 1 "register_operand" "w")
1111    (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1112   "TARGET_SIMD"
1113   {
1114     if (INTVAL (operands[2]) == 64)
1115       emit_move_insn (operands[0], const0_rtx);
1116     else
1117       emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1118     DONE;
1119   }
1120 )
1121
1122 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1123 (define_insn "vec_shr_<mode>"
1124   [(set (match_operand:VD 0 "register_operand" "=w")
1125         (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1126                     (match_operand:SI 2 "immediate_operand" "i")]
1127                    UNSPEC_VEC_SHR))]
1128   "TARGET_SIMD"
1129   {
1130     if (BYTES_BIG_ENDIAN)
1131       return "shl %d0, %d1, %2";
1132     else
1133       return "ushr %d0, %d1, %2";
1134   }
1135   [(set_attr "type" "neon_shift_imm")]
1136 )
1137
1138 (define_expand "vec_set<mode>"
1139   [(match_operand:VALL_F16 0 "register_operand" "+w")
1140    (match_operand:<VEL> 1 "register_operand" "w")
1141    (match_operand:SI 2 "immediate_operand" "")]
1142   "TARGET_SIMD"
1143   {
1144     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1145     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1146                                           GEN_INT (elem), operands[0]));
1147     DONE;
1148   }
1149 )
1150
1151
1152 (define_insn "aarch64_mla<mode>"
1153  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1154        (plus:VDQ_BHSI (mult:VDQ_BHSI
1155                         (match_operand:VDQ_BHSI 2 "register_operand" "w")
1156                         (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1157                       (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1158  "TARGET_SIMD"
1159  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1160   [(set_attr "type" "neon_mla_<Vetype><q>")]
1161 )
1162
1163 (define_insn "*aarch64_mla_elt<mode>"
1164  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1165        (plus:VDQHS
1166          (mult:VDQHS
1167            (vec_duplicate:VDQHS
1168               (vec_select:<VEL>
1169                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1170                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1171            (match_operand:VDQHS 3 "register_operand" "w"))
1172          (match_operand:VDQHS 4 "register_operand" "0")))]
1173  "TARGET_SIMD"
1174   {
1175     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1176     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1177   }
1178   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1179 )
1180
1181 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1182  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1183        (plus:VDQHS
1184          (mult:VDQHS
1185            (vec_duplicate:VDQHS
1186               (vec_select:<VEL>
1187                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1188                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1189            (match_operand:VDQHS 3 "register_operand" "w"))
1190          (match_operand:VDQHS 4 "register_operand" "0")))]
1191  "TARGET_SIMD"
1192   {
1193     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1194     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1195   }
1196   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1197 )
1198
1199 (define_insn "*aarch64_mla_elt_merge<mode>"
1200   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1201         (plus:VDQHS
1202           (mult:VDQHS (vec_duplicate:VDQHS
1203                   (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1204                 (match_operand:VDQHS 2 "register_operand" "w"))
1205           (match_operand:VDQHS 3 "register_operand" "0")))]
1206  "TARGET_SIMD"
1207  "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1208   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1209 )
1210
1211 (define_insn "aarch64_mls<mode>"
1212  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1213        (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1214                    (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1215                               (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1216  "TARGET_SIMD"
1217  "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1218   [(set_attr "type" "neon_mla_<Vetype><q>")]
1219 )
1220
1221 (define_insn "*aarch64_mls_elt<mode>"
1222  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1223        (minus:VDQHS
1224          (match_operand:VDQHS 4 "register_operand" "0")
1225          (mult:VDQHS
1226            (vec_duplicate:VDQHS
1227               (vec_select:<VEL>
1228                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1229                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1230            (match_operand:VDQHS 3 "register_operand" "w"))))]
1231  "TARGET_SIMD"
1232   {
1233     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1234     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1235   }
1236   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1237 )
1238
1239 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1240  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1241        (minus:VDQHS
1242          (match_operand:VDQHS 4 "register_operand" "0")
1243          (mult:VDQHS
1244            (vec_duplicate:VDQHS
1245               (vec_select:<VEL>
1246                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1247                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1248            (match_operand:VDQHS 3 "register_operand" "w"))))]
1249  "TARGET_SIMD"
1250   {
1251     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1252     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1253   }
1254   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1255 )
1256
1257 (define_insn "*aarch64_mls_elt_merge<mode>"
1258   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1259         (minus:VDQHS
1260           (match_operand:VDQHS 1 "register_operand" "0")
1261           (mult:VDQHS (vec_duplicate:VDQHS
1262                   (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1263                 (match_operand:VDQHS 3 "register_operand" "w"))))]
1264   "TARGET_SIMD"
1265   "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1266   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1267 )
1268
1269 ;; Max/Min operations.
1270 (define_insn "<su><maxmin><mode>3"
1271  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1272        (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1273                     (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1274  "TARGET_SIMD"
1275  "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1276   [(set_attr "type" "neon_minmax<q>")]
1277 )
1278
1279 (define_expand "<su><maxmin>v2di3"
1280  [(set (match_operand:V2DI 0 "register_operand" "")
1281        (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1282                     (match_operand:V2DI 2 "register_operand" "")))]
1283  "TARGET_SIMD"
1284 {
1285   enum rtx_code cmp_operator;
1286   rtx cmp_fmt;
1287
1288   switch (<CODE>)
1289     {
1290     case UMIN:
1291       cmp_operator = LTU;
1292       break;
1293     case SMIN:
1294       cmp_operator = LT;
1295       break;
1296     case UMAX:
1297       cmp_operator = GTU;
1298       break;
1299     case SMAX:
1300       cmp_operator = GT;
1301       break;
1302     default:
1303       gcc_unreachable ();
1304     }
1305
1306   cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1307   emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1308               operands[2], cmp_fmt, operands[1], operands[2]));
1309   DONE;
1310 })
1311
1312 ;; Pairwise Integer Max/Min operations.
1313 (define_insn "aarch64_<maxmin_uns>p<mode>"
1314  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1315        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1316                          (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1317                         MAXMINV))]
1318  "TARGET_SIMD"
1319  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1320   [(set_attr "type" "neon_minmax<q>")]
1321 )
1322
1323 ;; Pairwise FP Max/Min operations.
1324 (define_insn "aarch64_<maxmin_uns>p<mode>"
1325  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1326        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1327                       (match_operand:VHSDF 2 "register_operand" "w")]
1328                       FMAXMINV))]
1329  "TARGET_SIMD"
1330  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1331   [(set_attr "type" "neon_minmax<q>")]
1332 )
1333
1334 ;; vec_concat gives a new vector with the low elements from operand 1, and
1335 ;; the high elements from operand 2.  That is to say, given op1 = { a, b }
1336 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1337 ;; What that means, is that the RTL descriptions of the below patterns
1338 ;; need to change depending on endianness.
1339
1340 ;; Move to the low architectural bits of the register.
1341 ;; On little-endian this is { operand, zeroes }
1342 ;; On big-endian this is { zeroes, operand }
1343
1344 (define_insn "move_lo_quad_internal_<mode>"
1345   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1346         (vec_concat:VQ_NO2E
1347           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1348           (vec_duplicate:<VHALF> (const_int 0))))]
1349   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1350   "@
1351    dup\\t%d0, %1.d[0]
1352    fmov\\t%d0, %1
1353    dup\\t%d0, %1"
1354   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1355    (set_attr "simd" "yes,*,yes")
1356    (set_attr "fp" "*,yes,*")
1357    (set_attr "length" "4")]
1358 )
1359
1360 (define_insn "move_lo_quad_internal_<mode>"
1361   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1362         (vec_concat:VQ_2E
1363           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1364           (const_int 0)))]
1365   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1366   "@
1367    dup\\t%d0, %1.d[0]
1368    fmov\\t%d0, %1
1369    dup\\t%d0, %1"
1370   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1371    (set_attr "simd" "yes,*,yes")
1372    (set_attr "fp" "*,yes,*")
1373    (set_attr "length" "4")]
1374 )
1375
1376 (define_insn "move_lo_quad_internal_be_<mode>"
1377   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1378         (vec_concat:VQ_NO2E
1379           (vec_duplicate:<VHALF> (const_int 0))
1380           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1381   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1382   "@
1383    dup\\t%d0, %1.d[0]
1384    fmov\\t%d0, %1
1385    dup\\t%d0, %1"
1386   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1387    (set_attr "simd" "yes,*,yes")
1388    (set_attr "fp" "*,yes,*")
1389    (set_attr "length" "4")]
1390 )
1391
1392 (define_insn "move_lo_quad_internal_be_<mode>"
1393   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1394         (vec_concat:VQ_2E
1395           (const_int 0)
1396           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1397   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1398   "@
1399    dup\\t%d0, %1.d[0]
1400    fmov\\t%d0, %1
1401    dup\\t%d0, %1"
1402   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1403    (set_attr "simd" "yes,*,yes")
1404    (set_attr "fp" "*,yes,*")
1405    (set_attr "length" "4")]
1406 )
1407
1408 (define_expand "move_lo_quad_<mode>"
1409   [(match_operand:VQ 0 "register_operand")
1410    (match_operand:VQ 1 "register_operand")]
1411   "TARGET_SIMD"
1412 {
1413   if (BYTES_BIG_ENDIAN)
1414     emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1415   else
1416     emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1417   DONE;
1418 }
1419 )
1420
1421 ;; Move operand1 to the high architectural bits of the register, keeping
1422 ;; the low architectural bits of operand2.
1423 ;; For little-endian this is { operand2, operand1 }
1424 ;; For big-endian this is { operand1, operand2 }
1425
1426 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1427   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1428         (vec_concat:VQ
1429           (vec_select:<VHALF>
1430                 (match_dup 0)
1431                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1432           (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1433   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1434   "@
1435    ins\\t%0.d[1], %1.d[0]
1436    ins\\t%0.d[1], %1"
1437   [(set_attr "type" "neon_ins")]
1438 )
1439
1440 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1441   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1442         (vec_concat:VQ
1443           (match_operand:<VHALF> 1 "register_operand" "w,r")
1444           (vec_select:<VHALF>
1445                 (match_dup 0)
1446                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1447   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1448   "@
1449    ins\\t%0.d[1], %1.d[0]
1450    ins\\t%0.d[1], %1"
1451   [(set_attr "type" "neon_ins")]
1452 )
1453
1454 (define_expand "move_hi_quad_<mode>"
1455  [(match_operand:VQ 0 "register_operand" "")
1456   (match_operand:<VHALF> 1 "register_operand" "")]
1457  "TARGET_SIMD"
1458 {
1459   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1460   if (BYTES_BIG_ENDIAN)
1461     emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1462                     operands[1], p));
1463   else
1464     emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1465                     operands[1], p));
1466   DONE;
1467 })
1468
1469 ;; Narrowing operations.
1470
1471 ;; For doubles.
1472 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1473  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1474        (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1475  "TARGET_SIMD"
1476  "xtn\\t%0.<Vntype>, %1.<Vtype>"
1477   [(set_attr "type" "neon_shift_imm_narrow_q")]
1478 )
1479
1480 (define_expand "vec_pack_trunc_<mode>"
1481  [(match_operand:<VNARROWD> 0 "register_operand" "")
1482   (match_operand:VDN 1 "register_operand" "")
1483   (match_operand:VDN 2 "register_operand" "")]
1484  "TARGET_SIMD"
1485 {
1486   rtx tempreg = gen_reg_rtx (<VDBL>mode);
1487   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1488   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1489
1490   emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1491   emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1492   emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1493   DONE;
1494 })
1495
1496 ;; For quads.
1497
1498 (define_insn "vec_pack_trunc_<mode>"
1499  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1500        (vec_concat:<VNARROWQ2>
1501          (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1502          (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1503  "TARGET_SIMD"
1504  {
1505    if (BYTES_BIG_ENDIAN)
1506      return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1507    else
1508      return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1509  }
1510   [(set_attr "type" "multiple")
1511    (set_attr "length" "8")]
1512 )
1513
1514 ;; Widening operations.
1515
1516 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1517   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1518         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1519                                (match_operand:VQW 1 "register_operand" "w")
1520                                (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1521                             )))]
1522   "TARGET_SIMD"
1523   "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1524   [(set_attr "type" "neon_shift_imm_long")]
1525 )
1526
1527 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1528   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1529         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1530                                (match_operand:VQW 1 "register_operand" "w")
1531                                (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1532                             )))]
1533   "TARGET_SIMD"
1534   "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1535   [(set_attr "type" "neon_shift_imm_long")]
1536 )
1537
1538 (define_expand "vec_unpack<su>_hi_<mode>"
1539   [(match_operand:<VWIDE> 0 "register_operand" "")
1540    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1541   "TARGET_SIMD"
1542   {
1543     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1544     emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1545                                                           operands[1], p));
1546     DONE;
1547   }
1548 )
1549
1550 (define_expand "vec_unpack<su>_lo_<mode>"
1551   [(match_operand:<VWIDE> 0 "register_operand" "")
1552    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1553   "TARGET_SIMD"
1554   {
1555     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1556     emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1557                                                           operands[1], p));
1558     DONE;
1559   }
1560 )
1561
1562 ;; Widening arithmetic.
1563
1564 (define_insn "*aarch64_<su>mlal_lo<mode>"
1565   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1566         (plus:<VWIDE>
1567           (mult:<VWIDE>
1568               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1569                  (match_operand:VQW 2 "register_operand" "w")
1570                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1571               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1572                  (match_operand:VQW 4 "register_operand" "w")
1573                  (match_dup 3))))
1574           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1575   "TARGET_SIMD"
1576   "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1577   [(set_attr "type" "neon_mla_<Vetype>_long")]
1578 )
1579
1580 (define_insn "*aarch64_<su>mlal_hi<mode>"
1581   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1582         (plus:<VWIDE>
1583           (mult:<VWIDE>
1584               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1585                  (match_operand:VQW 2 "register_operand" "w")
1586                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1587               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1588                  (match_operand:VQW 4 "register_operand" "w")
1589                  (match_dup 3))))
1590           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1591   "TARGET_SIMD"
1592   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1593   [(set_attr "type" "neon_mla_<Vetype>_long")]
1594 )
1595
1596 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1597   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1598         (minus:<VWIDE>
1599           (match_operand:<VWIDE> 1 "register_operand" "0")
1600           (mult:<VWIDE>
1601               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1602                  (match_operand:VQW 2 "register_operand" "w")
1603                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1604               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1605                  (match_operand:VQW 4 "register_operand" "w")
1606                  (match_dup 3))))))]
1607   "TARGET_SIMD"
1608   "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1609   [(set_attr "type" "neon_mla_<Vetype>_long")]
1610 )
1611
1612 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1613   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1614         (minus:<VWIDE>
1615           (match_operand:<VWIDE> 1 "register_operand" "0")
1616           (mult:<VWIDE>
1617               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1618                  (match_operand:VQW 2 "register_operand" "w")
1619                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1620               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1621                  (match_operand:VQW 4 "register_operand" "w")
1622                  (match_dup 3))))))]
1623   "TARGET_SIMD"
1624   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1625   [(set_attr "type" "neon_mla_<Vetype>_long")]
1626 )
1627
1628 (define_insn "*aarch64_<su>mlal<mode>"
1629   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1630         (plus:<VWIDE>
1631           (mult:<VWIDE>
1632             (ANY_EXTEND:<VWIDE>
1633               (match_operand:VD_BHSI 1 "register_operand" "w"))
1634             (ANY_EXTEND:<VWIDE>
1635               (match_operand:VD_BHSI 2 "register_operand" "w")))
1636           (match_operand:<VWIDE> 3 "register_operand" "0")))]
1637   "TARGET_SIMD"
1638   "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1639   [(set_attr "type" "neon_mla_<Vetype>_long")]
1640 )
1641
1642 (define_insn "*aarch64_<su>mlsl<mode>"
1643   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1644         (minus:<VWIDE>
1645           (match_operand:<VWIDE> 1 "register_operand" "0")
1646           (mult:<VWIDE>
1647             (ANY_EXTEND:<VWIDE>
1648               (match_operand:VD_BHSI 2 "register_operand" "w"))
1649             (ANY_EXTEND:<VWIDE>
1650               (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1651   "TARGET_SIMD"
1652   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1653   [(set_attr "type" "neon_mla_<Vetype>_long")]
1654 )
1655
1656 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1657  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1658        (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1659                            (match_operand:VQW 1 "register_operand" "w")
1660                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1661                      (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1662                            (match_operand:VQW 2 "register_operand" "w")
1663                            (match_dup 3)))))]
1664   "TARGET_SIMD"
1665   "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1666   [(set_attr "type" "neon_mul_<Vetype>_long")]
1667 )
1668
1669 (define_expand "vec_widen_<su>mult_lo_<mode>"
1670   [(match_operand:<VWIDE> 0 "register_operand" "")
1671    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1672    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1673  "TARGET_SIMD"
1674  {
1675    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1676    emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1677                                                        operands[1],
1678                                                        operands[2], p));
1679    DONE;
1680  }
1681 )
1682
1683 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1684  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1685       (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1686                             (match_operand:VQW 1 "register_operand" "w")
1687                             (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1688                     (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1689                             (match_operand:VQW 2 "register_operand" "w")
1690                             (match_dup 3)))))]
1691   "TARGET_SIMD"
1692   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1693   [(set_attr "type" "neon_mul_<Vetype>_long")]
1694 )
1695
1696 (define_expand "vec_widen_<su>mult_hi_<mode>"
1697   [(match_operand:<VWIDE> 0 "register_operand" "")
1698    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1699    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1700  "TARGET_SIMD"
1701  {
1702    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1703    emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1704                                                        operands[1],
1705                                                        operands[2], p));
1706    DONE;
1707
1708  }
1709 )
1710
1711 ;; FP vector operations.
1712 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1713 ;; double-precision (64-bit) floating-point data types and arithmetic as
1714 ;; defined by the IEEE 754-2008 standard.  This makes them vectorizable
1715 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1716 ;;
1717 ;; Floating-point operations can raise an exception.  Vectorizing such
1718 ;; operations are safe because of reasons explained below.
1719 ;;
1720 ;; ARMv8 permits an extension to enable trapped floating-point
1721 ;; exception handling, however this is an optional feature.  In the
1722 ;; event of a floating-point exception being raised by vectorised
1723 ;; code then:
1724 ;; 1.  If trapped floating-point exceptions are available, then a trap
1725 ;;     will be taken when any lane raises an enabled exception.  A trap
1726 ;;     handler may determine which lane raised the exception.
1727 ;; 2.  Alternatively a sticky exception flag is set in the
1728 ;;     floating-point status register (FPSR).  Software may explicitly
1729 ;;     test the exception flags, in which case the tests will either
1730 ;;     prevent vectorisation, allowing precise identification of the
1731 ;;     failing operation, or if tested outside of vectorisable regions
1732 ;;     then the specific operation and lane are not of interest.
1733
1734 ;; FP arithmetic operations.
1735
1736 (define_insn "add<mode>3"
1737  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1738        (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1739                    (match_operand:VHSDF 2 "register_operand" "w")))]
1740  "TARGET_SIMD"
1741  "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1742   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1743 )
1744
1745 (define_insn "sub<mode>3"
1746  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1747        (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1748                     (match_operand:VHSDF 2 "register_operand" "w")))]
1749  "TARGET_SIMD"
1750  "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1751   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1752 )
1753
1754 (define_insn "mul<mode>3"
1755  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1756        (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1757                    (match_operand:VHSDF 2 "register_operand" "w")))]
1758  "TARGET_SIMD"
1759  "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1760   [(set_attr "type" "neon_fp_mul_<stype><q>")]
1761 )
1762
1763 (define_expand "div<mode>3"
1764  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1765        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1766                   (match_operand:VHSDF 2 "register_operand" "w")))]
1767  "TARGET_SIMD"
1768 {
1769   if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1770     DONE;
1771
1772   operands[1] = force_reg (<MODE>mode, operands[1]);
1773 })
1774
1775 (define_insn "*div<mode>3"
1776  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1777        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1778                  (match_operand:VHSDF 2 "register_operand" "w")))]
1779  "TARGET_SIMD"
1780  "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1781   [(set_attr "type" "neon_fp_div_<stype><q>")]
1782 )
1783
1784 (define_insn "neg<mode>2"
1785  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1786        (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1787  "TARGET_SIMD"
1788  "fneg\\t%0.<Vtype>, %1.<Vtype>"
1789   [(set_attr "type" "neon_fp_neg_<stype><q>")]
1790 )
1791
1792 (define_insn "abs<mode>2"
1793  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1794        (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1795  "TARGET_SIMD"
1796  "fabs\\t%0.<Vtype>, %1.<Vtype>"
1797   [(set_attr "type" "neon_fp_abs_<stype><q>")]
1798 )
1799
1800 (define_insn "fma<mode>4"
1801   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1802        (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1803                   (match_operand:VHSDF 2 "register_operand" "w")
1804                   (match_operand:VHSDF 3 "register_operand" "0")))]
1805   "TARGET_SIMD"
1806  "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1807   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1808 )
1809
1810 (define_insn "*aarch64_fma4_elt<mode>"
1811   [(set (match_operand:VDQF 0 "register_operand" "=w")
1812     (fma:VDQF
1813       (vec_duplicate:VDQF
1814         (vec_select:<VEL>
1815           (match_operand:VDQF 1 "register_operand" "<h_con>")
1816           (parallel [(match_operand:SI 2 "immediate_operand")])))
1817       (match_operand:VDQF 3 "register_operand" "w")
1818       (match_operand:VDQF 4 "register_operand" "0")))]
1819   "TARGET_SIMD"
1820   {
1821     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1822     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1823   }
1824   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1825 )
1826
1827 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1828   [(set (match_operand:VDQSF 0 "register_operand" "=w")
1829     (fma:VDQSF
1830       (vec_duplicate:VDQSF
1831         (vec_select:<VEL>
1832           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1833           (parallel [(match_operand:SI 2 "immediate_operand")])))
1834       (match_operand:VDQSF 3 "register_operand" "w")
1835       (match_operand:VDQSF 4 "register_operand" "0")))]
1836   "TARGET_SIMD"
1837   {
1838     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1839     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1840   }
1841   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1842 )
1843
1844 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1845   [(set (match_operand:VMUL 0 "register_operand" "=w")
1846     (fma:VMUL
1847       (vec_duplicate:VMUL
1848           (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1849       (match_operand:VMUL 2 "register_operand" "w")
1850       (match_operand:VMUL 3 "register_operand" "0")))]
1851   "TARGET_SIMD"
1852   "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1853   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1854 )
1855
1856 (define_insn "*aarch64_fma4_elt_to_64v2df"
1857   [(set (match_operand:DF 0 "register_operand" "=w")
1858     (fma:DF
1859         (vec_select:DF
1860           (match_operand:V2DF 1 "register_operand" "w")
1861           (parallel [(match_operand:SI 2 "immediate_operand")]))
1862       (match_operand:DF 3 "register_operand" "w")
1863       (match_operand:DF 4 "register_operand" "0")))]
1864   "TARGET_SIMD"
1865   {
1866     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1867     return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1868   }
1869   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1870 )
1871
1872 (define_insn "fnma<mode>4"
1873   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1874         (fma:VHSDF
1875           (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1876           (match_operand:VHSDF 2 "register_operand" "w")
1877           (match_operand:VHSDF 3 "register_operand" "0")))]
1878   "TARGET_SIMD"
1879   "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1880   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1881 )
1882
1883 (define_insn "*aarch64_fnma4_elt<mode>"
1884   [(set (match_operand:VDQF 0 "register_operand" "=w")
1885     (fma:VDQF
1886       (neg:VDQF
1887         (match_operand:VDQF 3 "register_operand" "w"))
1888       (vec_duplicate:VDQF
1889         (vec_select:<VEL>
1890           (match_operand:VDQF 1 "register_operand" "<h_con>")
1891           (parallel [(match_operand:SI 2 "immediate_operand")])))
1892       (match_operand:VDQF 4 "register_operand" "0")))]
1893   "TARGET_SIMD"
1894   {
1895     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1896     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1897   }
1898   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1899 )
1900
1901 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1902   [(set (match_operand:VDQSF 0 "register_operand" "=w")
1903     (fma:VDQSF
1904       (neg:VDQSF
1905         (match_operand:VDQSF 3 "register_operand" "w"))
1906       (vec_duplicate:VDQSF
1907         (vec_select:<VEL>
1908           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1909           (parallel [(match_operand:SI 2 "immediate_operand")])))
1910       (match_operand:VDQSF 4 "register_operand" "0")))]
1911   "TARGET_SIMD"
1912   {
1913     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1914     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1915   }
1916   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1917 )
1918
1919 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1920   [(set (match_operand:VMUL 0 "register_operand" "=w")
1921     (fma:VMUL
1922       (neg:VMUL
1923         (match_operand:VMUL 2 "register_operand" "w"))
1924       (vec_duplicate:VMUL
1925         (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1926       (match_operand:VMUL 3 "register_operand" "0")))]
1927   "TARGET_SIMD"
1928   "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1929   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1930 )
1931
1932 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1933   [(set (match_operand:DF 0 "register_operand" "=w")
1934     (fma:DF
1935       (vec_select:DF
1936         (match_operand:V2DF 1 "register_operand" "w")
1937         (parallel [(match_operand:SI 2 "immediate_operand")]))
1938       (neg:DF
1939         (match_operand:DF 3 "register_operand" "w"))
1940       (match_operand:DF 4 "register_operand" "0")))]
1941   "TARGET_SIMD"
1942   {
1943     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1944     return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1945   }
1946   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1947 )
1948
1949 ;; Vector versions of the floating-point frint patterns.
1950 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1951 (define_insn "<frint_pattern><mode>2"
1952   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1953         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1954                        FRINT))]
1955   "TARGET_SIMD"
1956   "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1957   [(set_attr "type" "neon_fp_round_<stype><q>")]
1958 )
1959
1960 ;; Vector versions of the fcvt standard patterns.
1961 ;; Expands to lbtrunc, lround, lceil, lfloor
1962 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1963   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1964         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1965                                [(match_operand:VHSDF 1 "register_operand" "w")]
1966                                FCVT)))]
1967   "TARGET_SIMD"
1968   "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1969   [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1970 )
1971
1972 ;; HF Scalar variants of related SIMD instructions.
1973 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1974   [(set (match_operand:HI 0 "register_operand" "=w")
1975         (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1976                       FCVT)))]
1977   "TARGET_SIMD_F16INST"
1978   "fcvt<frint_suffix><su>\t%h0, %h1"
1979   [(set_attr "type" "neon_fp_to_int_s")]
1980 )
1981
1982 (define_insn "<optab>_trunchfhi2"
1983   [(set (match_operand:HI 0 "register_operand" "=w")
1984         (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1985   "TARGET_SIMD_F16INST"
1986   "fcvtz<su>\t%h0, %h1"
1987   [(set_attr "type" "neon_fp_to_int_s")]
1988 )
1989
1990 (define_insn "<optab>hihf2"
1991   [(set (match_operand:HF 0 "register_operand" "=w")
1992         (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1993   "TARGET_SIMD_F16INST"
1994   "<su_optab>cvtf\t%h0, %h1"
1995   [(set_attr "type" "neon_int_to_fp_s")]
1996 )
1997
1998 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1999   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2000         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2001                                [(mult:VDQF
2002          (match_operand:VDQF 1 "register_operand" "w")
2003          (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
2004                                UNSPEC_FRINTZ)))]
2005   "TARGET_SIMD
2006    && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
2007                 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
2008   {
2009     int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
2010     char buf[64];
2011     snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
2012     output_asm_insn (buf, operands);
2013     return "";
2014   }
2015   [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
2016 )
2017
2018 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
2019   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2020         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2021                                [(match_operand:VHSDF 1 "register_operand")]
2022                                 UNSPEC_FRINTZ)))]
2023   "TARGET_SIMD"
2024   {})
2025
2026 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2027   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2028         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2029                                [(match_operand:VHSDF 1 "register_operand")]
2030                                 UNSPEC_FRINTZ)))]
2031   "TARGET_SIMD"
2032   {})
2033
2034 (define_expand "ftrunc<VHSDF:mode>2"
2035   [(set (match_operand:VHSDF 0 "register_operand")
2036         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2037                        UNSPEC_FRINTZ))]
2038   "TARGET_SIMD"
2039   {})
2040
2041 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2042   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2043         (FLOATUORS:VHSDF
2044           (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2045   "TARGET_SIMD"
2046   "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2047   [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2048 )
2049
2050 ;; Conversions between vectors of floats and doubles.
2051 ;; Contains a mix of patterns to match standard pattern names
2052 ;; and those for intrinsics.
2053
2054 ;; Float widening operations.
2055
2056 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2057   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2058         (float_extend:<VWIDE> (vec_select:<VHALF>
2059                                (match_operand:VQ_HSF 1 "register_operand" "w")
2060                                (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2061                             )))]
2062   "TARGET_SIMD"
2063   "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2064   [(set_attr "type" "neon_fp_cvt_widen_s")]
2065 )
2066
2067 ;; Convert between fixed-point and floating-point (vector modes)
2068
2069 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2070   [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2071         (unspec:<VHSDF:FCVT_TARGET>
2072           [(match_operand:VHSDF 1 "register_operand" "w")
2073            (match_operand:SI 2 "immediate_operand" "i")]
2074          FCVT_F2FIXED))]
2075   "TARGET_SIMD"
2076   "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2077   [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2078 )
2079
2080 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2081   [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2082         (unspec:<VDQ_HSDI:FCVT_TARGET>
2083           [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2084            (match_operand:SI 2 "immediate_operand" "i")]
2085          FCVT_FIXED2F))]
2086   "TARGET_SIMD"
2087   "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2088   [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2089 )
2090
2091 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2092 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2093 ;; the meaning of HI and LO changes depending on the target endianness.
2094 ;; While elsewhere we map the higher numbered elements of a vector to
2095 ;; the lower architectural lanes of the vector, for these patterns we want
2096 ;; to always treat "hi" as referring to the higher architectural lanes.
2097 ;; Consequently, while the patterns below look inconsistent with our
2098 ;; other big-endian patterns their behavior is as required.
2099
2100 (define_expand "vec_unpacks_lo_<mode>"
2101   [(match_operand:<VWIDE> 0 "register_operand" "")
2102    (match_operand:VQ_HSF 1 "register_operand" "")]
2103   "TARGET_SIMD"
2104   {
2105     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2106     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2107                                                        operands[1], p));
2108     DONE;
2109   }
2110 )
2111
2112 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2113   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2114         (float_extend:<VWIDE> (vec_select:<VHALF>
2115                                (match_operand:VQ_HSF 1 "register_operand" "w")
2116                                (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2117                             )))]
2118   "TARGET_SIMD"
2119   "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2120   [(set_attr "type" "neon_fp_cvt_widen_s")]
2121 )
2122
2123 (define_expand "vec_unpacks_hi_<mode>"
2124   [(match_operand:<VWIDE> 0 "register_operand" "")
2125    (match_operand:VQ_HSF 1 "register_operand" "")]
2126   "TARGET_SIMD"
2127   {
2128     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2129     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2130                                                        operands[1], p));
2131     DONE;
2132   }
2133 )
2134 (define_insn "aarch64_float_extend_lo_<Vwide>"
2135   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2136         (float_extend:<VWIDE>
2137           (match_operand:VDF 1 "register_operand" "w")))]
2138   "TARGET_SIMD"
2139   "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2140   [(set_attr "type" "neon_fp_cvt_widen_s")]
2141 )
2142
2143 ;; Float narrowing operations.
2144
2145 (define_insn "aarch64_float_truncate_lo_<mode>"
2146   [(set (match_operand:VDF 0 "register_operand" "=w")
2147       (float_truncate:VDF
2148         (match_operand:<VWIDE> 1 "register_operand" "w")))]
2149   "TARGET_SIMD"
2150   "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2151   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2152 )
2153
2154 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2155   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2156     (vec_concat:<VDBL>
2157       (match_operand:VDF 1 "register_operand" "0")
2158       (float_truncate:VDF
2159         (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2160   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2161   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2162   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2163 )
2164
2165 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2166   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2167     (vec_concat:<VDBL>
2168       (float_truncate:VDF
2169         (match_operand:<VWIDE> 2 "register_operand" "w"))
2170       (match_operand:VDF 1 "register_operand" "0")))]
2171   "TARGET_SIMD && BYTES_BIG_ENDIAN"
2172   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2173   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2174 )
2175
2176 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2177   [(match_operand:<VDBL> 0 "register_operand" "=w")
2178    (match_operand:VDF 1 "register_operand" "0")
2179    (match_operand:<VWIDE> 2 "register_operand" "w")]
2180   "TARGET_SIMD"
2181 {
2182   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2183                              ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2184                              : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2185   emit_insn (gen (operands[0], operands[1], operands[2]));
2186   DONE;
2187 }
2188 )
2189
2190 (define_expand "vec_pack_trunc_v2df"
2191   [(set (match_operand:V4SF 0 "register_operand")
2192       (vec_concat:V4SF
2193         (float_truncate:V2SF
2194             (match_operand:V2DF 1 "register_operand"))
2195         (float_truncate:V2SF
2196             (match_operand:V2DF 2 "register_operand"))
2197           ))]
2198   "TARGET_SIMD"
2199   {
2200     rtx tmp = gen_reg_rtx (V2SFmode);
2201     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2202     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2203
2204     emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2205     emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2206                                                    tmp, operands[hi]));
2207     DONE;
2208   }
2209 )
2210
2211 (define_expand "vec_pack_trunc_df"
2212   [(set (match_operand:V2SF 0 "register_operand")
2213       (vec_concat:V2SF
2214         (float_truncate:SF
2215             (match_operand:DF 1 "register_operand"))
2216         (float_truncate:SF
2217             (match_operand:DF 2 "register_operand"))
2218           ))]
2219   "TARGET_SIMD"
2220   {
2221     rtx tmp = gen_reg_rtx (V2SFmode);
2222     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2223     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2224
2225     emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2226     emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2227     emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2228     DONE;
2229   }
2230 )
2231
2232 ;; FP Max/Min
2233 ;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
2234 ;; expression like:
2235 ;;      a = (b < c) ? b : c;
2236 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
2237 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
2238 ;; -ffast-math.
2239 ;;
2240 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2241 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2242 ;; operand will be returned when both operands are zero (i.e. they may not
2243 ;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
2244 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2245 ;; NaNs.
2246
2247 (define_insn "<su><maxmin><mode>3"
2248   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2249         (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2250                        (match_operand:VHSDF 2 "register_operand" "w")))]
2251   "TARGET_SIMD"
2252   "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2253   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2254 )
2255
2256 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2257 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2258 ;; which implement the IEEE fmax ()/fmin () functions.
2259 (define_insn "<maxmin_uns><mode>3"
2260   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2261        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2262                       (match_operand:VHSDF 2 "register_operand" "w")]
2263                       FMAXMIN_UNS))]
2264   "TARGET_SIMD"
2265   "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2266   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2267 )
2268
2269 ;; 'across lanes' add.
2270
2271 (define_expand "reduc_plus_scal_<mode>"
2272   [(match_operand:<VEL> 0 "register_operand" "=w")
2273    (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2274                UNSPEC_ADDV)]
2275   "TARGET_SIMD"
2276   {
2277     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2278     rtx scratch = gen_reg_rtx (<MODE>mode);
2279     emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2280     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2281     DONE;
2282   }
2283 )
2284
2285 (define_insn "aarch64_faddp<mode>"
2286  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2287        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2288                       (match_operand:VHSDF 2 "register_operand" "w")]
2289         UNSPEC_FADDV))]
2290  "TARGET_SIMD"
2291  "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2292   [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2293 )
2294
2295 (define_insn "aarch64_reduc_plus_internal<mode>"
2296  [(set (match_operand:VDQV 0 "register_operand" "=w")
2297        (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2298                     UNSPEC_ADDV))]
2299  "TARGET_SIMD"
2300  "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2301   [(set_attr "type" "neon_reduc_add<q>")]
2302 )
2303
2304 (define_insn "aarch64_reduc_plus_internalv2si"
2305  [(set (match_operand:V2SI 0 "register_operand" "=w")
2306        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2307                     UNSPEC_ADDV))]
2308  "TARGET_SIMD"
2309  "addp\\t%0.2s, %1.2s, %1.2s"
2310   [(set_attr "type" "neon_reduc_add")]
2311 )
2312
2313 (define_insn "reduc_plus_scal_<mode>"
2314  [(set (match_operand:<VEL> 0 "register_operand" "=w")
2315        (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2316                    UNSPEC_FADDV))]
2317  "TARGET_SIMD"
2318  "faddp\\t%<Vetype>0, %1.<Vtype>"
2319   [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2320 )
2321
2322 (define_expand "reduc_plus_scal_v4sf"
2323  [(set (match_operand:SF 0 "register_operand")
2324        (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2325                     UNSPEC_FADDV))]
2326  "TARGET_SIMD"
2327 {
2328   rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2329   rtx scratch = gen_reg_rtx (V4SFmode);
2330   emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2331   emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2332   emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2333   DONE;
2334 })
2335
2336 (define_insn "clrsb<mode>2"
2337   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2338         (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2339   "TARGET_SIMD"
2340   "cls\\t%0.<Vtype>, %1.<Vtype>"
2341   [(set_attr "type" "neon_cls<q>")]
2342 )
2343
2344 (define_insn "clz<mode>2"
2345  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2346        (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2347  "TARGET_SIMD"
2348  "clz\\t%0.<Vtype>, %1.<Vtype>"
2349   [(set_attr "type" "neon_cls<q>")]
2350 )
2351
2352 (define_insn "popcount<mode>2"
2353   [(set (match_operand:VB 0 "register_operand" "=w")
2354         (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2355   "TARGET_SIMD"
2356   "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2357   [(set_attr "type" "neon_cnt<q>")]
2358 )
2359
2360 ;; 'across lanes' max and min ops.
2361
2362 ;; Template for outputting a scalar, so we can create __builtins which can be
2363 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function.  (This is FP smax/smin).
2364 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2365   [(match_operand:<VEL> 0 "register_operand")
2366    (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2367                   FMAXMINV)]
2368   "TARGET_SIMD"
2369   {
2370     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2371     rtx scratch = gen_reg_rtx (<MODE>mode);
2372     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2373                                                               operands[1]));
2374     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2375     DONE;
2376   }
2377 )
2378
2379 ;; Likewise for integer cases, signed and unsigned.
2380 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2381   [(match_operand:<VEL> 0 "register_operand")
2382    (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2383                     MAXMINV)]
2384   "TARGET_SIMD"
2385   {
2386     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2387     rtx scratch = gen_reg_rtx (<MODE>mode);
2388     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2389                                                               operands[1]));
2390     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2391     DONE;
2392   }
2393 )
2394
2395 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2396  [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2397        (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2398                     MAXMINV))]
2399  "TARGET_SIMD"
2400  "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2401   [(set_attr "type" "neon_reduc_minmax<q>")]
2402 )
2403
2404 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2405  [(set (match_operand:V2SI 0 "register_operand" "=w")
2406        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2407                     MAXMINV))]
2408  "TARGET_SIMD"
2409  "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2410   [(set_attr "type" "neon_reduc_minmax")]
2411 )
2412
2413 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2414  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2415        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2416                       FMAXMINV))]
2417  "TARGET_SIMD"
2418  "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2419   [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2420 )
2421
2422 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2423 ;; allocation.
2424 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2425 ;; to select.
2426 ;;
2427 ;; Thus our BSL is of the form:
2428 ;;   op0 = bsl (mask, op2, op3)
2429 ;; We can use any of:
2430 ;;
2431 ;;   if (op0 = mask)
2432 ;;     bsl mask, op1, op2
2433 ;;   if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2434 ;;     bit op0, op2, mask
2435 ;;   if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2436 ;;     bif op0, op1, mask
2437 ;;
2438 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2439 ;; Some forms of straight-line code may generate the equivalent form
2440 ;; in *aarch64_simd_bsl<mode>_alt.
2441
2442 (define_insn "aarch64_simd_bsl<mode>_internal"
2443   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2444         (xor:VDQ_I
2445            (and:VDQ_I
2446              (xor:VDQ_I
2447                (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2448                (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2449              (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2450           (match_dup:<V_INT_EQUIV> 3)
2451         ))]
2452   "TARGET_SIMD"
2453   "@
2454   bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2455   bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2456   bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2457   [(set_attr "type" "neon_bsl<q>")]
2458 )
2459
2460 ;; We need this form in addition to the above pattern to match the case
2461 ;; when combine tries merging three insns such that the second operand of
2462 ;; the outer XOR matches the second operand of the inner XOR rather than
2463 ;; the first.  The two are equivalent but since recog doesn't try all
2464 ;; permutations of commutative operations, we have to have a separate pattern.
2465
2466 (define_insn "*aarch64_simd_bsl<mode>_alt"
2467   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2468         (xor:VDQ_I
2469            (and:VDQ_I
2470              (xor:VDQ_I
2471                (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2472                (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2473               (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2474           (match_dup:<V_INT_EQUIV> 2)))]
2475   "TARGET_SIMD"
2476   "@
2477   bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2478   bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2479   bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2480   [(set_attr "type" "neon_bsl<q>")]
2481 )
2482
2483 ;; DImode is special, we want to avoid computing operations which are
2484 ;; more naturally computed in general purpose registers in the vector
2485 ;; registers.  If we do that, we need to move all three operands from general
2486 ;; purpose registers to vector registers, then back again.  However, we
2487 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2488 ;; optimizations based on the component operations of a BSL.
2489 ;;
2490 ;; That means we need a splitter back to the individual operations, if they
2491 ;; would be better calculated on the integer side.
2492
2493 (define_insn_and_split "aarch64_simd_bsldi_internal"
2494   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2495         (xor:DI
2496            (and:DI
2497              (xor:DI
2498                (match_operand:DI 3 "register_operand" "w,0,w,r")
2499                (match_operand:DI 2 "register_operand" "w,w,0,r"))
2500              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2501           (match_dup:DI 3)
2502         ))]
2503   "TARGET_SIMD"
2504   "@
2505   bsl\\t%0.8b, %2.8b, %3.8b
2506   bit\\t%0.8b, %2.8b, %1.8b
2507   bif\\t%0.8b, %3.8b, %1.8b
2508   #"
2509   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2510   [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2511 {
2512   /* Split back to individual operations.  If we're before reload, and
2513      able to create a temporary register, do so.  If we're after reload,
2514      we've got an early-clobber destination register, so use that.
2515      Otherwise, we can't create pseudos and we can't yet guarantee that
2516      operands[0] is safe to write, so FAIL to split.  */
2517
2518   rtx scratch;
2519   if (reload_completed)
2520     scratch = operands[0];
2521   else if (can_create_pseudo_p ())
2522     scratch = gen_reg_rtx (DImode);
2523   else
2524     FAIL;
2525
2526   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2527   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2528   emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2529   DONE;
2530 }
2531   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2532    (set_attr "length" "4,4,4,12")]
2533 )
2534
2535 (define_insn_and_split "aarch64_simd_bsldi_alt"
2536   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2537         (xor:DI
2538            (and:DI
2539              (xor:DI
2540                (match_operand:DI 3 "register_operand" "w,w,0,r")
2541                (match_operand:DI 2 "register_operand" "w,0,w,r"))
2542              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2543           (match_dup:DI 2)
2544         ))]
2545   "TARGET_SIMD"
2546   "@
2547   bsl\\t%0.8b, %3.8b, %2.8b
2548   bit\\t%0.8b, %3.8b, %1.8b
2549   bif\\t%0.8b, %2.8b, %1.8b
2550   #"
2551   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2552   [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2553 {
2554   /* Split back to individual operations.  If we're before reload, and
2555      able to create a temporary register, do so.  If we're after reload,
2556      we've got an early-clobber destination register, so use that.
2557      Otherwise, we can't create pseudos and we can't yet guarantee that
2558      operands[0] is safe to write, so FAIL to split.  */
2559
2560   rtx scratch;
2561   if (reload_completed)
2562     scratch = operands[0];
2563   else if (can_create_pseudo_p ())
2564     scratch = gen_reg_rtx (DImode);
2565   else
2566     FAIL;
2567
2568   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2569   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2570   emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2571   DONE;
2572 }
2573   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2574    (set_attr "length" "4,4,4,12")]
2575 )
2576
2577 (define_expand "aarch64_simd_bsl<mode>"
2578   [(match_operand:VALLDIF 0 "register_operand")
2579    (match_operand:<V_INT_EQUIV> 1 "register_operand")
2580    (match_operand:VALLDIF 2 "register_operand")
2581    (match_operand:VALLDIF 3 "register_operand")]
2582  "TARGET_SIMD"
2583 {
2584   /* We can't alias operands together if they have different modes.  */
2585   rtx tmp = operands[0];
2586   if (FLOAT_MODE_P (<MODE>mode))
2587     {
2588       operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2589       operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2590       tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2591     }
2592   operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2593   emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2594                                                          operands[1],
2595                                                          operands[2],
2596                                                          operands[3]));
2597   if (tmp != operands[0])
2598     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2599
2600   DONE;
2601 })
2602
2603 (define_expand "vcond_mask_<mode><v_int_equiv>"
2604   [(match_operand:VALLDI 0 "register_operand")
2605    (match_operand:VALLDI 1 "nonmemory_operand")
2606    (match_operand:VALLDI 2 "nonmemory_operand")
2607    (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2608   "TARGET_SIMD"
2609 {
2610   /* If we have (a = (P) ? -1 : 0);
2611      Then we can simply move the generated mask (result must be int).  */
2612   if (operands[1] == CONSTM1_RTX (<MODE>mode)
2613       && operands[2] == CONST0_RTX (<MODE>mode))
2614     emit_move_insn (operands[0], operands[3]);
2615   /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask.  */
2616   else if (operands[1] == CONST0_RTX (<MODE>mode)
2617            && operands[2] == CONSTM1_RTX (<MODE>mode))
2618     emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2619   else
2620     {
2621       if (!REG_P (operands[1]))
2622         operands[1] = force_reg (<MODE>mode, operands[1]);
2623       if (!REG_P (operands[2]))
2624         operands[2] = force_reg (<MODE>mode, operands[2]);
2625       emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2626                                              operands[1], operands[2]));
2627     }
2628
2629   DONE;
2630 })
2631
2632 ;; Patterns comparing two vectors to produce a mask.
2633
2634 (define_expand "vec_cmp<mode><mode>"
2635   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2636           (match_operator 1 "comparison_operator"
2637             [(match_operand:VSDQ_I_DI 2 "register_operand")
2638              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2639   "TARGET_SIMD"
2640 {
2641   rtx mask = operands[0];
2642   enum rtx_code code = GET_CODE (operands[1]);
2643
2644   switch (code)
2645     {
2646     case NE:
2647     case LE:
2648     case LT:
2649     case GE:
2650     case GT:
2651     case EQ:
2652       if (operands[3] == CONST0_RTX (<MODE>mode))
2653         break;
2654
2655       /* Fall through.  */
2656     default:
2657       if (!REG_P (operands[3]))
2658         operands[3] = force_reg (<MODE>mode, operands[3]);
2659
2660       break;
2661     }
2662
2663   switch (code)
2664     {
2665     case LT:
2666       emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2667       break;
2668
2669     case GE:
2670       emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2671       break;
2672
2673     case LE:
2674       emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2675       break;
2676
2677     case GT:
2678       emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2679       break;
2680
2681     case LTU:
2682       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2683       break;
2684
2685     case GEU:
2686       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2687       break;
2688
2689     case LEU:
2690       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2691       break;
2692
2693     case GTU:
2694       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2695       break;
2696
2697     case NE:
2698       /* Handle NE as !EQ.  */
2699       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2700       emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2701       break;
2702
2703     case EQ:
2704       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2705       break;
2706
2707     default:
2708       gcc_unreachable ();
2709     }
2710
2711   DONE;
2712 })
2713
2714 (define_expand "vec_cmp<mode><v_int_equiv>"
2715   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2716         (match_operator 1 "comparison_operator"
2717             [(match_operand:VDQF 2 "register_operand")
2718              (match_operand:VDQF 3 "nonmemory_operand")]))]
2719   "TARGET_SIMD"
2720 {
2721   int use_zero_form = 0;
2722   enum rtx_code code = GET_CODE (operands[1]);
2723   rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2724
2725   rtx (*comparison) (rtx, rtx, rtx) = NULL;
2726
2727   switch (code)
2728     {
2729     case LE:
2730     case LT:
2731     case GE:
2732     case GT:
2733     case EQ:
2734       if (operands[3] == CONST0_RTX (<MODE>mode))
2735         {
2736           use_zero_form = 1;
2737           break;
2738         }
2739       /* Fall through.  */
2740     default:
2741       if (!REG_P (operands[3]))
2742         operands[3] = force_reg (<MODE>mode, operands[3]);
2743
2744       break;
2745     }
2746
2747   switch (code)
2748     {
2749     case LT:
2750       if (use_zero_form)
2751         {
2752           comparison = gen_aarch64_cmlt<mode>;
2753           break;
2754         }
2755       /* Fall through.  */
2756     case UNLT:
2757       std::swap (operands[2], operands[3]);
2758       /* Fall through.  */
2759     case UNGT:
2760     case GT:
2761       comparison = gen_aarch64_cmgt<mode>;
2762       break;
2763     case LE:
2764       if (use_zero_form)
2765         {
2766           comparison = gen_aarch64_cmle<mode>;
2767           break;
2768         }
2769       /* Fall through.  */
2770     case UNLE:
2771       std::swap (operands[2], operands[3]);
2772       /* Fall through.  */
2773     case UNGE:
2774     case GE:
2775       comparison = gen_aarch64_cmge<mode>;
2776       break;
2777     case NE:
2778     case EQ:
2779       comparison = gen_aarch64_cmeq<mode>;
2780       break;
2781     case UNEQ:
2782     case ORDERED:
2783     case UNORDERED:
2784     case LTGT:
2785       break;
2786     default:
2787       gcc_unreachable ();
2788     }
2789
2790   switch (code)
2791     {
2792     case UNGE:
2793     case UNGT:
2794     case UNLE:
2795     case UNLT:
2796       {
2797         /* All of the above must not raise any FP exceptions.  Thus we first
2798            check each operand for NaNs and force any elements containing NaN to
2799            zero before using them in the compare.
2800            Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2801                                      (cm<cc> (isnan (a) ? 0.0 : a,
2802                                               isnan (b) ? 0.0 : b))
2803            We use the following transformations for doing the comparisions:
2804            a UNGE b -> a GE b
2805            a UNGT b -> a GT b
2806            a UNLE b -> b GE a
2807            a UNLT b -> b GT a.  */
2808
2809         rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2810         rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2811         rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2812         emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2813         emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2814         emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2815         emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2816                                           lowpart_subreg (<V_INT_EQUIV>mode,
2817                                                           operands[2],
2818                                                           <MODE>mode)));
2819         emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2820                                           lowpart_subreg (<V_INT_EQUIV>mode,
2821                                                           operands[3],
2822                                                           <MODE>mode)));
2823         gcc_assert (comparison != NULL);
2824         emit_insn (comparison (operands[0],
2825                                lowpart_subreg (<MODE>mode,
2826                                                tmp0, <V_INT_EQUIV>mode),
2827                                lowpart_subreg (<MODE>mode,
2828                                                tmp1, <V_INT_EQUIV>mode)));
2829         emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2830       }
2831       break;
2832
2833     case LT:
2834     case LE:
2835     case GT:
2836     case GE:
2837     case EQ:
2838     case NE:
2839       /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
2840          As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
2841          a GE b -> a GE b
2842          a GT b -> a GT b
2843          a LE b -> b GE a
2844          a LT b -> b GT a
2845          a EQ b -> a EQ b
2846          a NE b -> ~(a EQ b)  */
2847       gcc_assert (comparison != NULL);
2848       emit_insn (comparison (operands[0], operands[2], operands[3]));
2849       if (code == NE)
2850         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2851       break;
2852
2853     case LTGT:
2854       /* LTGT is not guranteed to not generate a FP exception.  So let's
2855          go the faster way : ((a > b) || (b > a)).  */
2856       emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2857                                          operands[2], operands[3]));
2858       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2859       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2860       break;
2861
2862     case ORDERED:
2863     case UNORDERED:
2864     case UNEQ:
2865       /* cmeq (a, a) & cmeq (b, b).  */
2866       emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2867                                          operands[2], operands[2]));
2868       emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2869       emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2870
2871       if (code == UNORDERED)
2872         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2873       else if (code == UNEQ)
2874         {
2875           emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
2876           emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
2877         }
2878       break;
2879
2880     default:
2881       gcc_unreachable ();
2882     }
2883
2884   DONE;
2885 })
2886
2887 (define_expand "vec_cmpu<mode><mode>"
2888   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2889           (match_operator 1 "comparison_operator"
2890             [(match_operand:VSDQ_I_DI 2 "register_operand")
2891              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2892   "TARGET_SIMD"
2893 {
2894   emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2895                                       operands[2], operands[3]));
2896   DONE;
2897 })
2898
2899 (define_expand "vcond<mode><mode>"
2900   [(set (match_operand:VALLDI 0 "register_operand")
2901         (if_then_else:VALLDI
2902           (match_operator 3 "comparison_operator"
2903             [(match_operand:VALLDI 4 "register_operand")
2904              (match_operand:VALLDI 5 "nonmemory_operand")])
2905           (match_operand:VALLDI 1 "nonmemory_operand")
2906           (match_operand:VALLDI 2 "nonmemory_operand")))]
2907   "TARGET_SIMD"
2908 {
2909   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2910   enum rtx_code code = GET_CODE (operands[3]);
2911
2912   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2913      it as well as switch operands 1/2 in order to avoid the additional
2914      NOT instruction.  */
2915   if (code == NE)
2916     {
2917       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2918                                     operands[4], operands[5]);
2919       std::swap (operands[1], operands[2]);
2920     }
2921   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2922                                              operands[4], operands[5]));
2923   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2924                                                  operands[2], mask));
2925
2926   DONE;
2927 })
2928
2929 (define_expand "vcond<v_cmp_mixed><mode>"
2930   [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2931         (if_then_else:<V_cmp_mixed>
2932           (match_operator 3 "comparison_operator"
2933             [(match_operand:VDQF_COND 4 "register_operand")
2934              (match_operand:VDQF_COND 5 "nonmemory_operand")])
2935           (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2936           (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2937   "TARGET_SIMD"
2938 {
2939   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2940   enum rtx_code code = GET_CODE (operands[3]);
2941
2942   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2943      it as well as switch operands 1/2 in order to avoid the additional
2944      NOT instruction.  */
2945   if (code == NE)
2946     {
2947       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2948                                     operands[4], operands[5]);
2949       std::swap (operands[1], operands[2]);
2950     }
2951   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2952                                              operands[4], operands[5]));
2953   emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
2954                                                 operands[0], operands[1],
2955                                                 operands[2], mask));
2956
2957   DONE;
2958 })
2959
2960 (define_expand "vcondu<mode><mode>"
2961   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2962         (if_then_else:VSDQ_I_DI
2963           (match_operator 3 "comparison_operator"
2964             [(match_operand:VSDQ_I_DI 4 "register_operand")
2965              (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2966           (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2967           (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2968   "TARGET_SIMD"
2969 {
2970   rtx mask = gen_reg_rtx (<MODE>mode);
2971   enum rtx_code code = GET_CODE (operands[3]);
2972
2973   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2974      it as well as switch operands 1/2 in order to avoid the additional
2975      NOT instruction.  */
2976   if (code == NE)
2977     {
2978       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2979                                     operands[4], operands[5]);
2980       std::swap (operands[1], operands[2]);
2981     }
2982   emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2983                                       operands[4], operands[5]));
2984   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2985                                                  operands[2], mask));
2986   DONE;
2987 })
2988
2989 (define_expand "vcondu<mode><v_cmp_mixed>"
2990   [(set (match_operand:VDQF 0 "register_operand")
2991         (if_then_else:VDQF
2992           (match_operator 3 "comparison_operator"
2993             [(match_operand:<V_cmp_mixed> 4 "register_operand")
2994              (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2995           (match_operand:VDQF 1 "nonmemory_operand")
2996           (match_operand:VDQF 2 "nonmemory_operand")))]
2997   "TARGET_SIMD"
2998 {
2999   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3000   enum rtx_code code = GET_CODE (operands[3]);
3001
3002   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3003      it as well as switch operands 1/2 in order to avoid the additional
3004      NOT instruction.  */
3005   if (code == NE)
3006     {
3007       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3008                                     operands[4], operands[5]);
3009       std::swap (operands[1], operands[2]);
3010     }
3011   emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
3012                                                   mask, operands[3],
3013                                                   operands[4], operands[5]));
3014   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3015                                                  operands[2], mask));
3016   DONE;
3017 })
3018
3019 ;; Patterns for AArch64 SIMD Intrinsics.
3020
3021 ;; Lane extraction with sign extension to general purpose register.
3022 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3023   [(set (match_operand:GPI 0 "register_operand" "=r")
3024         (sign_extend:GPI
3025           (vec_select:<VEL>
3026             (match_operand:VDQQH 1 "register_operand" "w")
3027             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3028   "TARGET_SIMD"
3029   {
3030     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3031     return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3032   }
3033   [(set_attr "type" "neon_to_gp<q>")]
3034 )
3035
3036 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
3037   [(set (match_operand:SI 0 "register_operand" "=r")
3038         (zero_extend:SI
3039           (vec_select:<VEL>
3040             (match_operand:VDQQH 1 "register_operand" "w")
3041             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3042   "TARGET_SIMD"
3043   {
3044     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3045     return "umov\\t%w0, %1.<Vetype>[%2]";
3046   }
3047   [(set_attr "type" "neon_to_gp<q>")]
3048 )
3049
3050 ;; Lane extraction of a value, neither sign nor zero extension
3051 ;; is guaranteed so upper bits should be considered undefined.
3052 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3053 (define_insn "aarch64_get_lane<mode>"
3054   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3055         (vec_select:<VEL>
3056           (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3057           (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3058   "TARGET_SIMD"
3059   {
3060     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3061     switch (which_alternative)
3062       {
3063         case 0:
3064           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3065         case 1:
3066           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3067         case 2:
3068           return "st1\\t{%1.<Vetype>}[%2], %0";
3069         default:
3070           gcc_unreachable ();
3071       }
3072   }
3073   [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3074 )
3075
3076 (define_insn "load_pair_lanes<mode>"
3077   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3078         (vec_concat:<VDBL>
3079            (match_operand:VDC 1 "memory_operand" "Utq")
3080            (match_operand:VDC 2 "memory_operand" "m")))]
3081   "TARGET_SIMD && !STRICT_ALIGNMENT
3082    && rtx_equal_p (XEXP (operands[2], 0),
3083                    plus_constant (Pmode,
3084                                   XEXP (operands[1], 0),
3085                                   GET_MODE_SIZE (<MODE>mode)))"
3086   "ldr\\t%q0, %1"
3087   [(set_attr "type" "neon_load1_1reg_q")]
3088 )
3089
3090 (define_insn "store_pair_lanes<mode>"
3091   [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
3092         (vec_concat:<VDBL>
3093            (match_operand:VDC 1 "register_operand" "w, r")
3094            (match_operand:VDC 2 "register_operand" "w, r")))]
3095   "TARGET_SIMD"
3096   "@
3097    stp\\t%d1, %d2, %y0
3098    stp\\t%x1, %x2, %y0"
3099   [(set_attr "type" "neon_stp, store_16")]
3100 )
3101
3102 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3103 ;; dest vector.
3104
3105 (define_insn "*aarch64_combinez<mode>"
3106   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3107         (vec_concat:<VDBL>
3108           (match_operand:VDC 1 "general_operand" "w,?r,m")
3109           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3110   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3111   "@
3112    mov\\t%0.8b, %1.8b
3113    fmov\t%d0, %1
3114    ldr\\t%d0, %1"
3115   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3116    (set_attr "simd" "yes,*,yes")
3117    (set_attr "fp" "*,yes,*")]
3118 )
3119
3120 (define_insn "*aarch64_combinez_be<mode>"
3121   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3122         (vec_concat:<VDBL>
3123           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3124           (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3125   "TARGET_SIMD && BYTES_BIG_ENDIAN"
3126   "@
3127    mov\\t%0.8b, %1.8b
3128    fmov\t%d0, %1
3129    ldr\\t%d0, %1"
3130   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3131    (set_attr "simd" "yes,*,yes")
3132    (set_attr "fp" "*,yes,*")]
3133 )
3134
3135 (define_expand "aarch64_combine<mode>"
3136   [(match_operand:<VDBL> 0 "register_operand")
3137    (match_operand:VDC 1 "register_operand")
3138    (match_operand:VDC 2 "register_operand")]
3139   "TARGET_SIMD"
3140 {
3141   aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3142
3143   DONE;
3144 }
3145 )
3146
3147 (define_expand "aarch64_simd_combine<mode>"
3148   [(match_operand:<VDBL> 0 "register_operand")
3149    (match_operand:VDC 1 "register_operand")
3150    (match_operand:VDC 2 "register_operand")]
3151   "TARGET_SIMD"
3152   {
3153     emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3154     emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3155     DONE;
3156   }
3157 [(set_attr "type" "multiple")]
3158 )
3159
3160 ;; <su><addsub>l<q>.
3161
3162 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3163  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3164        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3165                            (match_operand:VQW 1 "register_operand" "w")
3166                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3167                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3168                            (match_operand:VQW 2 "register_operand" "w")
3169                            (match_dup 3)))))]
3170   "TARGET_SIMD"
3171   "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3172   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3173 )
3174
3175 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3176  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3177        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3178                            (match_operand:VQW 1 "register_operand" "w")
3179                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3180                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3181                            (match_operand:VQW 2 "register_operand" "w")
3182                            (match_dup 3)))))]
3183   "TARGET_SIMD"
3184   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3185   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3186 )
3187
3188
3189 (define_expand "aarch64_saddl2<mode>"
3190   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3191    (match_operand:VQW 1 "register_operand" "w")
3192    (match_operand:VQW 2 "register_operand" "w")]
3193   "TARGET_SIMD"
3194 {
3195   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3196   emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3197                                                   operands[2], p));
3198   DONE;
3199 })
3200
3201 (define_expand "aarch64_uaddl2<mode>"
3202   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3203    (match_operand:VQW 1 "register_operand" "w")
3204    (match_operand:VQW 2 "register_operand" "w")]
3205   "TARGET_SIMD"
3206 {
3207   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3208   emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3209                                                   operands[2], p));
3210   DONE;
3211 })
3212
3213 (define_expand "aarch64_ssubl2<mode>"
3214   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3215    (match_operand:VQW 1 "register_operand" "w")
3216    (match_operand:VQW 2 "register_operand" "w")]
3217   "TARGET_SIMD"
3218 {
3219   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3220   emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3221                                                 operands[2], p));
3222   DONE;
3223 })
3224
3225 (define_expand "aarch64_usubl2<mode>"
3226   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3227    (match_operand:VQW 1 "register_operand" "w")
3228    (match_operand:VQW 2 "register_operand" "w")]
3229   "TARGET_SIMD"
3230 {
3231   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3232   emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3233                                                 operands[2], p));
3234   DONE;
3235 })
3236
3237 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3238  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3239        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3240                            (match_operand:VD_BHSI 1 "register_operand" "w"))
3241                        (ANY_EXTEND:<VWIDE>
3242                            (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3243   "TARGET_SIMD"
3244   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3245   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3246 )
3247
3248 ;; <su><addsub>w<q>.
3249
3250 (define_expand "widen_ssum<mode>3"
3251   [(set (match_operand:<VDBLW> 0 "register_operand" "")
3252         (plus:<VDBLW> (sign_extend:<VDBLW>
3253                         (match_operand:VQW 1 "register_operand" ""))
3254                       (match_operand:<VDBLW> 2 "register_operand" "")))]
3255   "TARGET_SIMD"
3256   {
3257     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3258     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3259
3260     emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3261                                                 operands[1], p));
3262     emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3263     DONE;
3264   }
3265 )
3266
3267 (define_expand "widen_ssum<mode>3"
3268   [(set (match_operand:<VWIDE> 0 "register_operand" "")
3269         (plus:<VWIDE> (sign_extend:<VWIDE>
3270                         (match_operand:VD_BHSI 1 "register_operand" ""))
3271                       (match_operand:<VWIDE> 2 "register_operand" "")))]
3272   "TARGET_SIMD"
3273 {
3274   emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3275   DONE;
3276 })
3277
3278 (define_expand "widen_usum<mode>3"
3279   [(set (match_operand:<VDBLW> 0 "register_operand" "")
3280         (plus:<VDBLW> (zero_extend:<VDBLW>
3281                         (match_operand:VQW 1 "register_operand" ""))
3282                       (match_operand:<VDBLW> 2 "register_operand" "")))]
3283   "TARGET_SIMD"
3284   {
3285     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3286     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3287
3288     emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3289                                                  operands[1], p));
3290     emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3291     DONE;
3292   }
3293 )
3294
3295 (define_expand "widen_usum<mode>3"
3296   [(set (match_operand:<VWIDE> 0 "register_operand" "")
3297         (plus:<VWIDE> (zero_extend:<VWIDE>
3298                         (match_operand:VD_BHSI 1 "register_operand" ""))
3299                       (match_operand:<VWIDE> 2 "register_operand" "")))]
3300   "TARGET_SIMD"
3301 {
3302   emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3303   DONE;
3304 })
3305
3306 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
3307   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3308         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3309                         (ANY_EXTEND:<VWIDE>
3310                           (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3311   "TARGET_SIMD"
3312   "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3313   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3314 )
3315
3316 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
3317   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3318         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3319                         (ANY_EXTEND:<VWIDE>
3320                           (vec_select:<VHALF>
3321                            (match_operand:VQW 2 "register_operand" "w")
3322                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3323   "TARGET_SIMD"
3324   "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3325   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3326 )
3327
3328 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
3329   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3330         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3331                         (ANY_EXTEND:<VWIDE>
3332                           (vec_select:<VHALF>
3333                            (match_operand:VQW 2 "register_operand" "w")
3334                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3335   "TARGET_SIMD"
3336   "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3337   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3338 )
3339
3340 (define_expand "aarch64_saddw2<mode>"
3341   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3342    (match_operand:<VWIDE> 1 "register_operand" "w")
3343    (match_operand:VQW 2 "register_operand" "w")]
3344   "TARGET_SIMD"
3345 {
3346   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3347   emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3348                                                 operands[2], p));
3349   DONE;
3350 })
3351
3352 (define_expand "aarch64_uaddw2<mode>"
3353   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3354    (match_operand:<VWIDE> 1 "register_operand" "w")
3355    (match_operand:VQW 2 "register_operand" "w")]
3356   "TARGET_SIMD"
3357 {
3358   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3359   emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3360                                                 operands[2], p));
3361   DONE;
3362 })
3363
3364
3365 (define_expand "aarch64_ssubw2<mode>"
3366   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3367    (match_operand:<VWIDE> 1 "register_operand" "w")
3368    (match_operand:VQW 2 "register_operand" "w")]
3369   "TARGET_SIMD"
3370 {
3371   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3372   emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3373                                                 operands[2], p));
3374   DONE;
3375 })
3376
3377 (define_expand "aarch64_usubw2<mode>"
3378   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3379    (match_operand:<VWIDE> 1 "register_operand" "w")
3380    (match_operand:VQW 2 "register_operand" "w")]
3381   "TARGET_SIMD"
3382 {
3383   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3384   emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3385                                                 operands[2], p));
3386   DONE;
3387 })
3388
3389 ;; <su><r>h<addsub>.
3390
3391 (define_expand "<u>avg<mode>3_floor"
3392   [(set (match_operand:VDQ_BHSI 0 "register_operand")
3393         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3394                           (match_operand:VDQ_BHSI 2 "register_operand")]
3395                          HADD))]
3396   "TARGET_SIMD"
3397 )
3398
3399 (define_expand "<u>avg<mode>3_ceil"
3400   [(set (match_operand:VDQ_BHSI 0 "register_operand")
3401         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3402                           (match_operand:VDQ_BHSI 2 "register_operand")]
3403                          RHADD))]
3404   "TARGET_SIMD"
3405 )
3406
3407 (define_insn "aarch64_<sur>h<addsub><mode>"
3408   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3409         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3410                       (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3411                      HADDSUB))]
3412   "TARGET_SIMD"
3413   "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3414   [(set_attr "type" "neon_<addsub>_halve<q>")]
3415 )
3416
3417 ;; <r><addsub>hn<q>.
3418
3419 (define_insn "aarch64_<sur><addsub>hn<mode>"
3420   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3421         (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3422                             (match_operand:VQN 2 "register_operand" "w")]
3423                            ADDSUBHN))]
3424   "TARGET_SIMD"
3425   "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3426   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3427 )
3428
3429 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3430   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3431         (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3432                              (match_operand:VQN 2 "register_operand" "w")
3433                              (match_operand:VQN 3 "register_operand" "w")]
3434                             ADDSUBHN2))]
3435   "TARGET_SIMD"
3436   "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3437   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3438 )
3439
3440 ;; pmul.
3441
3442 (define_insn "aarch64_pmul<mode>"
3443   [(set (match_operand:VB 0 "register_operand" "=w")
3444         (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3445                     (match_operand:VB 2 "register_operand" "w")]
3446                    UNSPEC_PMUL))]
3447  "TARGET_SIMD"
3448  "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3449   [(set_attr "type" "neon_mul_<Vetype><q>")]
3450 )
3451
3452 ;; fmulx.
3453
3454 (define_insn "aarch64_fmulx<mode>"
3455   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3456         (unspec:VHSDF_HSDF
3457           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3458            (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3459            UNSPEC_FMULX))]
3460  "TARGET_SIMD"
3461  "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3462  [(set_attr "type" "neon_fp_mul_<stype>")]
3463 )
3464
3465 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3466
3467 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3468   [(set (match_operand:VDQSF 0 "register_operand" "=w")
3469         (unspec:VDQSF
3470          [(match_operand:VDQSF 1 "register_operand" "w")
3471           (vec_duplicate:VDQSF
3472            (vec_select:<VEL>
3473             (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3474             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3475          UNSPEC_FMULX))]
3476   "TARGET_SIMD"
3477   {
3478     operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3479     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3480   }
3481   [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3482 )
3483
3484 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3485
3486 (define_insn "*aarch64_mulx_elt<mode>"
3487   [(set (match_operand:VDQF 0 "register_operand" "=w")
3488         (unspec:VDQF
3489          [(match_operand:VDQF 1 "register_operand" "w")
3490           (vec_duplicate:VDQF
3491            (vec_select:<VEL>
3492             (match_operand:VDQF 2 "register_operand" "w")
3493             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3494          UNSPEC_FMULX))]
3495   "TARGET_SIMD"
3496   {
3497     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3498     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3499   }
3500   [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3501 )
3502
3503 ;; vmulxq_lane
3504
3505 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3506   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3507         (unspec:VHSDF
3508          [(match_operand:VHSDF 1 "register_operand" "w")
3509           (vec_duplicate:VHSDF
3510             (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3511          UNSPEC_FMULX))]
3512   "TARGET_SIMD"
3513   "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3514   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3515 )
3516
3517 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3518 ;; vmulxd_lane_f64 ==  vmulx_lane_f64
3519 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3520
3521 (define_insn "*aarch64_vgetfmulx<mode>"
3522   [(set (match_operand:<VEL> 0 "register_operand" "=w")
3523         (unspec:<VEL>
3524          [(match_operand:<VEL> 1 "register_operand" "w")
3525           (vec_select:<VEL>
3526            (match_operand:VDQF 2 "register_operand" "w")
3527             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3528          UNSPEC_FMULX))]
3529   "TARGET_SIMD"
3530   {
3531     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3532     return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3533   }
3534   [(set_attr "type" "fmul<Vetype>")]
3535 )
3536 ;; <su>q<addsub>
3537
3538 (define_insn "aarch64_<su_optab><optab><mode>"
3539   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3540         (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3541                           (match_operand:VSDQ_I 2 "register_operand" "w")))]
3542   "TARGET_SIMD"
3543   "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3544   [(set_attr "type" "neon_<optab><q>")]
3545 )
3546
3547 ;; suqadd and usqadd
3548
3549 (define_insn "aarch64_<sur>qadd<mode>"
3550   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3551         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3552                         (match_operand:VSDQ_I 2 "register_operand" "w")]
3553                        USSUQADD))]
3554   "TARGET_SIMD"
3555   "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3556   [(set_attr "type" "neon_qadd<q>")]
3557 )
3558
3559 ;; sqmovun
3560
3561 (define_insn "aarch64_sqmovun<mode>"
3562   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3563         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3564                             UNSPEC_SQXTUN))]
3565    "TARGET_SIMD"
3566    "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3567    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3568 )
3569
3570 ;; sqmovn and uqmovn
3571
3572 (define_insn "aarch64_<sur>qmovn<mode>"
3573   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3574         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3575                             SUQMOVN))]
3576   "TARGET_SIMD"
3577   "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3578    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3579 )
3580
3581 ;; <su>q<absneg>
3582
3583 (define_insn "aarch64_s<optab><mode>"
3584   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3585         (UNQOPS:VSDQ_I
3586           (match_operand:VSDQ_I 1 "register_operand" "w")))]
3587   "TARGET_SIMD"
3588   "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3589   [(set_attr "type" "neon_<optab><q>")]
3590 )
3591
3592 ;; sq<r>dmulh.
3593
3594 (define_insn "aarch64_sq<r>dmulh<mode>"
3595   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3596         (unspec:VSDQ_HSI
3597           [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3598            (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3599          VQDMULH))]
3600   "TARGET_SIMD"
3601   "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3602   [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3603 )
3604
3605 ;; sq<r>dmulh_lane
3606
3607 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3608   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3609         (unspec:VDQHS
3610           [(match_operand:VDQHS 1 "register_operand" "w")
3611            (vec_select:<VEL>
3612              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3613              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3614          VQDMULH))]
3615   "TARGET_SIMD"
3616   "*
3617    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3618    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3619   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3620 )
3621
3622 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3623   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3624         (unspec:VDQHS
3625           [(match_operand:VDQHS 1 "register_operand" "w")
3626            (vec_select:<VEL>
3627              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3628              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3629          VQDMULH))]
3630   "TARGET_SIMD"
3631   "*
3632    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3633    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3634   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3635 )
3636
3637 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3638   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3639         (unspec:SD_HSI
3640           [(match_operand:SD_HSI 1 "register_operand" "w")
3641            (vec_select:<VEL>
3642              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3643              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3644          VQDMULH))]
3645   "TARGET_SIMD"
3646   "*
3647    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3648    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3649   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3650 )
3651
3652 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3653   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3654         (unspec:SD_HSI
3655           [(match_operand:SD_HSI 1 "register_operand" "w")
3656            (vec_select:<VEL>
3657              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3658              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3659          VQDMULH))]
3660   "TARGET_SIMD"
3661   "*
3662    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3663    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3664   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3665 )
3666
3667 ;; sqrdml[as]h.
3668
3669 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3670   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3671         (unspec:VSDQ_HSI
3672           [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3673            (match_operand:VSDQ_HSI 2 "register_operand" "w")
3674            (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3675           SQRDMLH_AS))]
3676    "TARGET_SIMD_RDMA"
3677    "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3678    [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3679 )
3680
3681 ;; sqrdml[as]h_lane.
3682
3683 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3684   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3685         (unspec:VDQHS
3686           [(match_operand:VDQHS 1 "register_operand" "0")
3687            (match_operand:VDQHS 2 "register_operand" "w")
3688            (vec_select:<VEL>
3689              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3690              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3691           SQRDMLH_AS))]
3692    "TARGET_SIMD_RDMA"
3693    {
3694      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3695      return
3696       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3697    }
3698    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3699 )
3700
3701 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3702   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3703         (unspec:SD_HSI
3704           [(match_operand:SD_HSI 1 "register_operand" "0")
3705            (match_operand:SD_HSI 2 "register_operand" "w")
3706            (vec_select:<VEL>
3707              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3708              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3709           SQRDMLH_AS))]
3710    "TARGET_SIMD_RDMA"
3711    {
3712      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3713      return
3714       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3715    }
3716    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3717 )
3718
3719 ;; sqrdml[as]h_laneq.
3720
3721 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3722   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3723         (unspec:VDQHS
3724           [(match_operand:VDQHS 1 "register_operand" "0")
3725            (match_operand:VDQHS 2 "register_operand" "w")
3726            (vec_select:<VEL>
3727              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3728              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3729           SQRDMLH_AS))]
3730    "TARGET_SIMD_RDMA"
3731    {
3732      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3733      return
3734       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3735    }
3736    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3737 )
3738
3739 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3740   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3741         (unspec:SD_HSI
3742           [(match_operand:SD_HSI 1 "register_operand" "0")
3743            (match_operand:SD_HSI 2 "register_operand" "w")
3744            (vec_select:<VEL>
3745              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3746              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3747           SQRDMLH_AS))]
3748    "TARGET_SIMD_RDMA"
3749    {
3750      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3751      return
3752       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3753    }
3754    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3755 )
3756
3757 ;; vqdml[sa]l
3758
3759 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3760   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3761         (SBINQOPS:<VWIDE>
3762           (match_operand:<VWIDE> 1 "register_operand" "0")
3763           (ss_ashift:<VWIDE>
3764               (mult:<VWIDE>
3765                 (sign_extend:<VWIDE>
3766                       (match_operand:VSD_HSI 2 "register_operand" "w"))
3767                 (sign_extend:<VWIDE>
3768                       (match_operand:VSD_HSI 3 "register_operand" "w")))
3769               (const_int 1))))]
3770   "TARGET_SIMD"
3771   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3772   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3773 )
3774
3775 ;; vqdml[sa]l_lane
3776
3777 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3778   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3779         (SBINQOPS:<VWIDE>
3780           (match_operand:<VWIDE> 1 "register_operand" "0")
3781           (ss_ashift:<VWIDE>
3782             (mult:<VWIDE>
3783               (sign_extend:<VWIDE>
3784                 (match_operand:VD_HSI 2 "register_operand" "w"))
3785               (sign_extend:<VWIDE>
3786                 (vec_duplicate:VD_HSI
3787                   (vec_select:<VEL>
3788                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3789                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3790               ))
3791             (const_int 1))))]
3792   "TARGET_SIMD"
3793   {
3794     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3795     return
3796       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3797   }
3798   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3799 )
3800
3801 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3802   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3803         (SBINQOPS:<VWIDE>
3804           (match_operand:<VWIDE> 1 "register_operand" "0")
3805           (ss_ashift:<VWIDE>
3806             (mult:<VWIDE>
3807               (sign_extend:<VWIDE>
3808                 (match_operand:VD_HSI 2 "register_operand" "w"))
3809               (sign_extend:<VWIDE>
3810                 (vec_duplicate:VD_HSI
3811                   (vec_select:<VEL>
3812                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3813                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3814               ))
3815             (const_int 1))))]
3816   "TARGET_SIMD"
3817   {
3818     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3819     return
3820       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3821   }
3822   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3823 )
3824
3825 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3826   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3827         (SBINQOPS:<VWIDE>
3828           (match_operand:<VWIDE> 1 "register_operand" "0")
3829           (ss_ashift:<VWIDE>
3830             (mult:<VWIDE>
3831               (sign_extend:<VWIDE>
3832                 (match_operand:SD_HSI 2 "register_operand" "w"))
3833               (sign_extend:<VWIDE>
3834                 (vec_select:<VEL>
3835                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3836                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3837               )
3838             (const_int 1))))]
3839   "TARGET_SIMD"
3840   {
3841     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3842     return
3843       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3844   }
3845   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3846 )
3847
3848 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3849   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3850         (SBINQOPS:<VWIDE>
3851           (match_operand:<VWIDE> 1 "register_operand" "0")
3852           (ss_ashift:<VWIDE>
3853             (mult:<VWIDE>
3854               (sign_extend:<VWIDE>
3855                 (match_operand:SD_HSI 2 "register_operand" "w"))
3856               (sign_extend:<VWIDE>
3857                 (vec_select:<VEL>
3858                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3859                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3860               )
3861             (const_int 1))))]
3862   "TARGET_SIMD"
3863   {
3864     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3865     return
3866       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3867   }
3868   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3869 )
3870
3871 ;; vqdml[sa]l_n
3872
3873 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3874   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3875         (SBINQOPS:<VWIDE>
3876           (match_operand:<VWIDE> 1 "register_operand" "0")
3877           (ss_ashift:<VWIDE>
3878               (mult:<VWIDE>
3879                 (sign_extend:<VWIDE>
3880                       (match_operand:VD_HSI 2 "register_operand" "w"))
3881                 (sign_extend:<VWIDE>
3882                   (vec_duplicate:VD_HSI
3883                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3884               (const_int 1))))]
3885   "TARGET_SIMD"
3886   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3887   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3888 )
3889
3890 ;; sqdml[as]l2
3891
3892 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3893   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3894         (SBINQOPS:<VWIDE>
3895          (match_operand:<VWIDE> 1 "register_operand" "0")
3896          (ss_ashift:<VWIDE>
3897              (mult:<VWIDE>
3898                (sign_extend:<VWIDE>
3899                  (vec_select:<VHALF>
3900                      (match_operand:VQ_HSI 2 "register_operand" "w")
3901                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3902                (sign_extend:<VWIDE>
3903                  (vec_select:<VHALF>
3904                      (match_operand:VQ_HSI 3 "register_operand" "w")
3905                      (match_dup 4))))
3906              (const_int 1))))]
3907   "TARGET_SIMD"
3908   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3909   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3910 )
3911
3912 (define_expand "aarch64_sqdmlal2<mode>"
3913   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3914    (match_operand:<VWIDE> 1 "register_operand" "w")
3915    (match_operand:VQ_HSI 2 "register_operand" "w")
3916    (match_operand:VQ_HSI 3 "register_operand" "w")]
3917   "TARGET_SIMD"
3918 {
3919   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3920   emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3921                                                   operands[2], operands[3], p));
3922   DONE;
3923 })
3924
3925 (define_expand "aarch64_sqdmlsl2<mode>"
3926   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3927    (match_operand:<VWIDE> 1 "register_operand" "w")
3928    (match_operand:VQ_HSI 2 "register_operand" "w")
3929    (match_operand:VQ_HSI 3 "register_operand" "w")]
3930   "TARGET_SIMD"
3931 {
3932   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3933   emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3934                                                   operands[2], operands[3], p));
3935   DONE;
3936 })
3937
3938 ;; vqdml[sa]l2_lane
3939
3940 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3941   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3942         (SBINQOPS:<VWIDE>
3943           (match_operand:<VWIDE> 1 "register_operand" "0")
3944           (ss_ashift:<VWIDE>
3945               (mult:<VWIDE>
3946                 (sign_extend:<VWIDE>
3947                   (vec_select:<VHALF>
3948                     (match_operand:VQ_HSI 2 "register_operand" "w")
3949                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3950                 (sign_extend:<VWIDE>
3951                   (vec_duplicate:<VHALF>
3952                     (vec_select:<VEL>
3953                       (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3954                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3955                     ))))
3956               (const_int 1))))]
3957   "TARGET_SIMD"
3958   {
3959     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3960     return
3961      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3962   }
3963   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3964 )
3965
3966 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3967   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3968         (SBINQOPS:<VWIDE>
3969           (match_operand:<VWIDE> 1 "register_operand" "0")
3970           (ss_ashift:<VWIDE>
3971               (mult:<VWIDE>
3972                 (sign_extend:<VWIDE>
3973                   (vec_select:<VHALF>
3974                     (match_operand:VQ_HSI 2 "register_operand" "w")
3975                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3976                 (sign_extend:<VWIDE>
3977                   (vec_duplicate:<VHALF>
3978                     (vec_select:<VEL>
3979                       (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3980                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3981                     ))))
3982               (const_int 1))))]
3983   "TARGET_SIMD"
3984   {
3985     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3986     return
3987      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3988   }
3989   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3990 )
3991
3992 (define_expand "aarch64_sqdmlal2_lane<mode>"
3993   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3994    (match_operand:<VWIDE> 1 "register_operand" "w")
3995    (match_operand:VQ_HSI 2 "register_operand" "w")
3996    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3997    (match_operand:SI 4 "immediate_operand" "i")]
3998   "TARGET_SIMD"
3999 {
4000   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4001   emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
4002                                                        operands[2], operands[3],
4003                                                        operands[4], p));
4004   DONE;
4005 })
4006
4007 (define_expand "aarch64_sqdmlal2_laneq<mode>"
4008   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4009    (match_operand:<VWIDE> 1 "register_operand" "w")
4010    (match_operand:VQ_HSI 2 "register_operand" "w")
4011    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4012    (match_operand:SI 4 "immediate_operand" "i")]
4013   "TARGET_SIMD"
4014 {
4015   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4016   emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
4017                                                        operands[2], operands[3],
4018                                                        operands[4], p));
4019   DONE;
4020 })
4021
4022 (define_expand "aarch64_sqdmlsl2_lane<mode>"
4023   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4024    (match_operand:<VWIDE> 1 "register_operand" "w")
4025    (match_operand:VQ_HSI 2 "register_operand" "w")
4026    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4027    (match_operand:SI 4 "immediate_operand" "i")]
4028   "TARGET_SIMD"
4029 {
4030   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4031   emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
4032                                                        operands[2], operands[3],
4033                                                        operands[4], p));
4034   DONE;
4035 })
4036
4037 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
4038   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4039    (match_operand:<VWIDE> 1 "register_operand" "w")
4040    (match_operand:VQ_HSI 2 "register_operand" "w")
4041    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4042    (match_operand:SI 4 "immediate_operand" "i")]
4043   "TARGET_SIMD"
4044 {
4045   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4046   emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4047                                                        operands[2], operands[3],
4048                                                        operands[4], p));
4049   DONE;
4050 })
4051
4052 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4053   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4054         (SBINQOPS:<VWIDE>
4055           (match_operand:<VWIDE> 1 "register_operand" "0")
4056           (ss_ashift:<VWIDE>
4057             (mult:<VWIDE>
4058               (sign_extend:<VWIDE>
4059                 (vec_select:<VHALF>
4060                   (match_operand:VQ_HSI 2 "register_operand" "w")
4061                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4062               (sign_extend:<VWIDE>
4063                 (vec_duplicate:<VHALF>
4064                   (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4065             (const_int 1))))]
4066   "TARGET_SIMD"
4067   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4068   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4069 )
4070
4071 (define_expand "aarch64_sqdmlal2_n<mode>"
4072   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4073    (match_operand:<VWIDE> 1 "register_operand" "w")
4074    (match_operand:VQ_HSI 2 "register_operand" "w")
4075    (match_operand:<VEL> 3 "register_operand" "w")]
4076   "TARGET_SIMD"
4077 {
4078   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4079   emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4080                                                     operands[2], operands[3],
4081                                                     p));
4082   DONE;
4083 })
4084
4085 (define_expand "aarch64_sqdmlsl2_n<mode>"
4086   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4087    (match_operand:<VWIDE> 1 "register_operand" "w")
4088    (match_operand:VQ_HSI 2 "register_operand" "w")
4089    (match_operand:<VEL> 3 "register_operand" "w")]
4090   "TARGET_SIMD"
4091 {
4092   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4093   emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4094                                                     operands[2], operands[3],
4095                                                     p));
4096   DONE;
4097 })
4098
4099 ;; vqdmull
4100
4101 (define_insn "aarch64_sqdmull<mode>"
4102   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4103         (ss_ashift:<VWIDE>
4104              (mult:<VWIDE>
4105                (sign_extend:<VWIDE>
4106                      (match_operand:VSD_HSI 1 "register_operand" "w"))
4107                (sign_extend:<VWIDE>
4108                      (match_operand:VSD_HSI 2 "register_operand" "w")))
4109              (const_int 1)))]
4110   "TARGET_SIMD"
4111   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4112   [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4113 )
4114
4115 ;; vqdmull_lane
4116
4117 (define_insn "aarch64_sqdmull_lane<mode>"
4118   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4119         (ss_ashift:<VWIDE>
4120              (mult:<VWIDE>
4121                (sign_extend:<VWIDE>
4122                  (match_operand:VD_HSI 1 "register_operand" "w"))
4123                (sign_extend:<VWIDE>
4124                  (vec_duplicate:VD_HSI
4125                    (vec_select:<VEL>
4126                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4127                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4128                ))
4129              (const_int 1)))]
4130   "TARGET_SIMD"
4131   {
4132     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4133     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4134   }
4135   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4136 )
4137
4138 (define_insn "aarch64_sqdmull_laneq<mode>"
4139   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4140         (ss_ashift:<VWIDE>
4141              (mult:<VWIDE>
4142                (sign_extend:<VWIDE>
4143                  (match_operand:VD_HSI 1 "register_operand" "w"))
4144                (sign_extend:<VWIDE>
4145                  (vec_duplicate:VD_HSI
4146                    (vec_select:<VEL>
4147                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4148                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4149                ))
4150              (const_int 1)))]
4151   "TARGET_SIMD"
4152   {
4153     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4154     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4155   }
4156   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4157 )
4158
4159 (define_insn "aarch64_sqdmull_lane<mode>"
4160   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4161         (ss_ashift:<VWIDE>
4162              (mult:<VWIDE>
4163                (sign_extend:<VWIDE>
4164                  (match_operand:SD_HSI 1 "register_operand" "w"))
4165                (sign_extend:<VWIDE>
4166                  (vec_select:<VEL>
4167                    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4168                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4169                ))
4170              (const_int 1)))]
4171   "TARGET_SIMD"
4172   {
4173     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4174     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4175   }
4176   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4177 )
4178
4179 (define_insn "aarch64_sqdmull_laneq<mode>"
4180   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4181         (ss_ashift:<VWIDE>
4182              (mult:<VWIDE>
4183                (sign_extend:<VWIDE>
4184                  (match_operand:SD_HSI 1 "register_operand" "w"))
4185                (sign_extend:<VWIDE>
4186                  (vec_select:<VEL>
4187                    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4188                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4189                ))
4190              (const_int 1)))]
4191   "TARGET_SIMD"
4192   {
4193     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4194     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4195   }
4196   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4197 )
4198
4199 ;; vqdmull_n
4200
4201 (define_insn "aarch64_sqdmull_n<mode>"
4202   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4203         (ss_ashift:<VWIDE>
4204              (mult:<VWIDE>
4205                (sign_extend:<VWIDE>
4206                  (match_operand:VD_HSI 1 "register_operand" "w"))
4207                (sign_extend:<VWIDE>
4208                  (vec_duplicate:VD_HSI
4209                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4210                )
4211              (const_int 1)))]
4212   "TARGET_SIMD"
4213   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4214   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4215 )
4216
4217 ;; vqdmull2
4218
4219
4220
4221 (define_insn "aarch64_sqdmull2<mode>_internal"
4222   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4223         (ss_ashift:<VWIDE>
4224              (mult:<VWIDE>
4225                (sign_extend:<VWIDE>
4226                  (vec_select:<VHALF>
4227                    (match_operand:VQ_HSI 1 "register_operand" "w")
4228                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4229                (sign_extend:<VWIDE>
4230                  (vec_select:<VHALF>
4231                    (match_operand:VQ_HSI 2 "register_operand" "w")
4232                    (match_dup 3)))
4233                )
4234              (const_int 1)))]
4235   "TARGET_SIMD"
4236   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4237   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4238 )
4239
4240 (define_expand "aarch64_sqdmull2<mode>"
4241   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4242    (match_operand:VQ_HSI 1 "register_operand" "w")
4243    (match_operand:VQ_HSI 2 "register_operand" "w")]
4244   "TARGET_SIMD"
4245 {
4246   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4247   emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4248                                                   operands[2], p));
4249   DONE;
4250 })
4251
4252 ;; vqdmull2_lane
4253
4254 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4255   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4256         (ss_ashift:<VWIDE>
4257              (mult:<VWIDE>
4258                (sign_extend:<VWIDE>
4259                  (vec_select:<VHALF>
4260                    (match_operand:VQ_HSI 1 "register_operand" "w")
4261                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4262                (sign_extend:<VWIDE>
4263                  (vec_duplicate:<VHALF>
4264                    (vec_select:<VEL>
4265                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4266                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4267                ))
4268              (const_int 1)))]
4269   "TARGET_SIMD"
4270   {
4271     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4272     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4273   }
4274   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4275 )
4276
4277 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4278   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4279         (ss_ashift:<VWIDE>
4280              (mult:<VWIDE>
4281                (sign_extend:<VWIDE>
4282                  (vec_select:<VHALF>
4283                    (match_operand:VQ_HSI 1 "register_operand" "w")
4284                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4285                (sign_extend:<VWIDE>
4286                  (vec_duplicate:<VHALF>
4287                    (vec_select:<VEL>
4288                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4289                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4290                ))
4291              (const_int 1)))]
4292   "TARGET_SIMD"
4293   {
4294     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4295     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4296   }
4297   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4298 )
4299
4300 (define_expand "aarch64_sqdmull2_lane<mode>"
4301   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4302    (match_operand:VQ_HSI 1 "register_operand" "w")
4303    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4304    (match_operand:SI 3 "immediate_operand" "i")]
4305   "TARGET_SIMD"
4306 {
4307   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4308   emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4309                                                        operands[2], operands[3],
4310                                                        p));
4311   DONE;
4312 })
4313
4314 (define_expand "aarch64_sqdmull2_laneq<mode>"
4315   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4316    (match_operand:VQ_HSI 1 "register_operand" "w")
4317    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4318    (match_operand:SI 3 "immediate_operand" "i")]
4319   "TARGET_SIMD"
4320 {
4321   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4322   emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4323                                                        operands[2], operands[3],
4324                                                        p));
4325   DONE;
4326 })
4327
4328 ;; vqdmull2_n
4329
4330 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4331   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4332         (ss_ashift:<VWIDE>
4333              (mult:<VWIDE>
4334                (sign_extend:<VWIDE>
4335                  (vec_select:<VHALF>
4336                    (match_operand:VQ_HSI 1 "register_operand" "w")
4337                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4338                (sign_extend:<VWIDE>
4339                  (vec_duplicate:<VHALF>
4340                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4341                )
4342              (const_int 1)))]
4343   "TARGET_SIMD"
4344   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4345   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4346 )
4347
4348 (define_expand "aarch64_sqdmull2_n<mode>"
4349   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4350    (match_operand:VQ_HSI 1 "register_operand" "w")
4351    (match_operand:<VEL> 2 "register_operand" "w")]
4352   "TARGET_SIMD"
4353 {
4354   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4355   emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4356                                                     operands[2], p));
4357   DONE;
4358 })
4359
4360 ;; vshl
4361
4362 (define_insn "aarch64_<sur>shl<mode>"
4363   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4364         (unspec:VSDQ_I_DI
4365           [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4366            (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4367          VSHL))]
4368   "TARGET_SIMD"
4369   "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4370   [(set_attr "type" "neon_shift_reg<q>")]
4371 )
4372
4373
4374 ;; vqshl
4375
4376 (define_insn "aarch64_<sur>q<r>shl<mode>"
4377   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4378         (unspec:VSDQ_I
4379           [(match_operand:VSDQ_I 1 "register_operand" "w")
4380            (match_operand:VSDQ_I 2 "register_operand" "w")]
4381          VQSHL))]
4382   "TARGET_SIMD"
4383   "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4384   [(set_attr "type" "neon_sat_shift_reg<q>")]
4385 )
4386
4387 ;; vshll_n
4388
4389 (define_insn "aarch64_<sur>shll_n<mode>"
4390   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4391         (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4392                          (match_operand:SI 2
4393                            "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4394                          VSHLL))]
4395   "TARGET_SIMD"
4396   {
4397     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4398       return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4399     else
4400       return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4401   }
4402   [(set_attr "type" "neon_shift_imm_long")]
4403 )
4404
4405 ;; vshll_high_n
4406
4407 (define_insn "aarch64_<sur>shll2_n<mode>"
4408   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4409         (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4410                          (match_operand:SI 2 "immediate_operand" "i")]
4411                          VSHLL))]
4412   "TARGET_SIMD"
4413   {
4414     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4415       return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4416     else
4417       return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4418   }
4419   [(set_attr "type" "neon_shift_imm_long")]
4420 )
4421
4422 ;; vrshr_n
4423
4424 (define_insn "aarch64_<sur>shr_n<mode>"
4425   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4426         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4427                            (match_operand:SI 2
4428                              "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4429                           VRSHR_N))]
4430   "TARGET_SIMD"
4431   "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4432   [(set_attr "type" "neon_sat_shift_imm<q>")]
4433 )
4434
4435 ;; v(r)sra_n
4436
4437 (define_insn "aarch64_<sur>sra_n<mode>"
4438   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4439         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4440                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4441                        (match_operand:SI 3
4442                          "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4443                       VSRA))]
4444   "TARGET_SIMD"
4445   "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4446   [(set_attr "type" "neon_shift_acc<q>")]
4447 )
4448
4449 ;; vs<lr>i_n
4450
4451 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4452   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4453         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4454                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4455                        (match_operand:SI 3
4456                          "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4457                       VSLRI))]
4458   "TARGET_SIMD"
4459   "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4460   [(set_attr "type" "neon_shift_imm<q>")]
4461 )
4462
4463 ;; vqshl(u)
4464
4465 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4466   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4467         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4468                        (match_operand:SI 2
4469                          "aarch64_simd_shift_imm_<ve_mode>" "i")]
4470                       VQSHL_N))]
4471   "TARGET_SIMD"
4472   "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4473   [(set_attr "type" "neon_sat_shift_imm<q>")]
4474 )
4475
4476
4477 ;; vq(r)shr(u)n_n
4478
4479 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4480   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4481         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4482                             (match_operand:SI 2
4483                               "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4484                            VQSHRN_N))]
4485   "TARGET_SIMD"
4486   "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4487   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4488 )
4489
4490
4491 ;; cm(eq|ge|gt|lt|le)
4492 ;; Note, we have constraints for Dz and Z as different expanders
4493 ;; have different ideas of what should be passed to this pattern.
4494
4495 (define_insn "aarch64_cm<optab><mode>"
4496   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4497         (neg:<V_INT_EQUIV>
4498           (COMPARISONS:<V_INT_EQUIV>
4499             (match_operand:VDQ_I 1 "register_operand" "w,w")
4500             (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4501           )))]
4502   "TARGET_SIMD"
4503   "@
4504   cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4505   cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4506   [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4507 )
4508
4509 (define_insn_and_split "aarch64_cm<optab>di"
4510   [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4511         (neg:DI
4512           (COMPARISONS:DI
4513             (match_operand:DI 1 "register_operand" "w,w,r")
4514             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4515           )))
4516      (clobber (reg:CC CC_REGNUM))]
4517   "TARGET_SIMD"
4518   "#"
4519   "&& reload_completed"
4520   [(set (match_operand:DI 0 "register_operand")
4521         (neg:DI
4522           (COMPARISONS:DI
4523             (match_operand:DI 1 "register_operand")
4524             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4525           )))]
4526   {
4527     /* If we are in the general purpose register file,
4528        we split to a sequence of comparison and store.  */
4529     if (GP_REGNUM_P (REGNO (operands[0]))
4530         && GP_REGNUM_P (REGNO (operands[1])))
4531       {
4532         machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4533         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4534         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4535         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4536         DONE;
4537       }
4538     /* Otherwise, we expand to a similar pattern which does not
4539        clobber CC_REGNUM.  */
4540   }
4541   [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4542 )
4543
4544 (define_insn "*aarch64_cm<optab>di"
4545   [(set (match_operand:DI 0 "register_operand" "=w,w")
4546         (neg:DI
4547           (COMPARISONS:DI
4548             (match_operand:DI 1 "register_operand" "w,w")
4549             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4550           )))]
4551   "TARGET_SIMD && reload_completed"
4552   "@
4553   cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4554   cm<optab>\t%d0, %d1, #0"
4555   [(set_attr "type" "neon_compare, neon_compare_zero")]
4556 )
4557
4558 ;; cm(hs|hi)
4559
4560 (define_insn "aarch64_cm<optab><mode>"
4561   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4562         (neg:<V_INT_EQUIV>
4563           (UCOMPARISONS:<V_INT_EQUIV>
4564             (match_operand:VDQ_I 1 "register_operand" "w")
4565             (match_operand:VDQ_I 2 "register_operand" "w")
4566           )))]
4567   "TARGET_SIMD"
4568   "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4569   [(set_attr "type" "neon_compare<q>")]
4570 )
4571
4572 (define_insn_and_split "aarch64_cm<optab>di"
4573   [(set (match_operand:DI 0 "register_operand" "=w,r")
4574         (neg:DI
4575           (UCOMPARISONS:DI
4576             (match_operand:DI 1 "register_operand" "w,r")
4577             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4578           )))
4579     (clobber (reg:CC CC_REGNUM))]
4580   "TARGET_SIMD"
4581   "#"
4582   "&& reload_completed"
4583   [(set (match_operand:DI 0 "register_operand")
4584         (neg:DI
4585           (UCOMPARISONS:DI
4586             (match_operand:DI 1 "register_operand")
4587             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4588           )))]
4589   {
4590     /* If we are in the general purpose register file,
4591        we split to a sequence of comparison and store.  */
4592     if (GP_REGNUM_P (REGNO (operands[0]))
4593         && GP_REGNUM_P (REGNO (operands[1])))
4594       {
4595         machine_mode mode = CCmode;
4596         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4597         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4598         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4599         DONE;
4600       }
4601     /* Otherwise, we expand to a similar pattern which does not
4602        clobber CC_REGNUM.  */
4603   }
4604   [(set_attr "type" "neon_compare,multiple")]
4605 )
4606
4607 (define_insn "*aarch64_cm<optab>di"
4608   [(set (match_operand:DI 0 "register_operand" "=w")
4609         (neg:DI
4610           (UCOMPARISONS:DI
4611             (match_operand:DI 1 "register_operand" "w")
4612             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4613           )))]
4614   "TARGET_SIMD && reload_completed"
4615   "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4616   [(set_attr "type" "neon_compare")]
4617 )
4618
4619 ;; cmtst
4620
4621 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4622 ;; we don't have any insns using ne, and aarch64_vcond outputs
4623 ;; not (neg (eq (and x y) 0))
4624 ;; which is rewritten by simplify_rtx as
4625 ;; plus (eq (and x y) 0) -1.
4626
4627 (define_insn "aarch64_cmtst<mode>"
4628   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4629         (plus:<V_INT_EQUIV>
4630           (eq:<V_INT_EQUIV>
4631             (and:VDQ_I
4632               (match_operand:VDQ_I 1 "register_operand" "w")
4633               (match_operand:VDQ_I 2 "register_operand" "w"))
4634             (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4635           (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4636   ]
4637   "TARGET_SIMD"
4638   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4639   [(set_attr "type" "neon_tst<q>")]
4640 )
4641
4642 (define_insn_and_split "aarch64_cmtstdi"
4643   [(set (match_operand:DI 0 "register_operand" "=w,r")
4644         (neg:DI
4645           (ne:DI
4646             (and:DI
4647               (match_operand:DI 1 "register_operand" "w,r")
4648               (match_operand:DI 2 "register_operand" "w,r"))
4649             (const_int 0))))
4650     (clobber (reg:CC CC_REGNUM))]
4651   "TARGET_SIMD"
4652   "#"
4653   "&& reload_completed"
4654   [(set (match_operand:DI 0 "register_operand")
4655         (neg:DI
4656           (ne:DI
4657             (and:DI
4658               (match_operand:DI 1 "register_operand")
4659               (match_operand:DI 2 "register_operand"))
4660             (const_int 0))))]
4661   {
4662     /* If we are in the general purpose register file,
4663        we split to a sequence of comparison and store.  */
4664     if (GP_REGNUM_P (REGNO (operands[0]))
4665         && GP_REGNUM_P (REGNO (operands[1])))
4666       {
4667         rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4668         machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4669         rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4670         rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4671         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4672         DONE;
4673       }
4674     /* Otherwise, we expand to a similar pattern which does not
4675        clobber CC_REGNUM.  */
4676   }
4677   [(set_attr "type" "neon_tst,multiple")]
4678 )
4679
4680 (define_insn "*aarch64_cmtstdi"
4681   [(set (match_operand:DI 0 "register_operand" "=w")
4682         (neg:DI
4683           (ne:DI
4684             (and:DI
4685               (match_operand:DI 1 "register_operand" "w")
4686               (match_operand:DI 2 "register_operand" "w"))
4687             (const_int 0))))]
4688   "TARGET_SIMD"
4689   "cmtst\t%d0, %d1, %d2"
4690   [(set_attr "type" "neon_tst")]
4691 )
4692
4693 ;; fcm(eq|ge|gt|le|lt)
4694
4695 (define_insn "aarch64_cm<optab><mode>"
4696   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4697         (neg:<V_INT_EQUIV>
4698           (COMPARISONS:<V_INT_EQUIV>
4699             (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4700             (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4701           )))]
4702   "TARGET_SIMD"
4703   "@
4704   fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4705   fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4706   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4707 )
4708
4709 ;; fac(ge|gt)
4710 ;; Note we can also handle what would be fac(le|lt) by
4711 ;; generating fac(ge|gt).
4712
4713 (define_insn "aarch64_fac<optab><mode>"
4714   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4715         (neg:<V_INT_EQUIV>
4716           (FAC_COMPARISONS:<V_INT_EQUIV>
4717             (abs:VHSDF_HSDF
4718               (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4719             (abs:VHSDF_HSDF
4720               (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4721   )))]
4722   "TARGET_SIMD"
4723   "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4724   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4725 )
4726
4727 ;; addp
4728
4729 (define_insn "aarch64_addp<mode>"
4730   [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4731         (unspec:VD_BHSI
4732           [(match_operand:VD_BHSI 1 "register_operand" "w")
4733            (match_operand:VD_BHSI 2 "register_operand" "w")]
4734           UNSPEC_ADDP))]
4735   "TARGET_SIMD"
4736   "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4737   [(set_attr "type" "neon_reduc_add<q>")]
4738 )
4739
4740 (define_insn "aarch64_addpdi"
4741   [(set (match_operand:DI 0 "register_operand" "=w")
4742         (unspec:DI
4743           [(match_operand:V2DI 1 "register_operand" "w")]
4744           UNSPEC_ADDP))]
4745   "TARGET_SIMD"
4746   "addp\t%d0, %1.2d"
4747   [(set_attr "type" "neon_reduc_add")]
4748 )
4749
4750 ;; sqrt
4751
4752 (define_expand "sqrt<mode>2"
4753   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4754         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4755   "TARGET_SIMD"
4756 {
4757   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4758     DONE;
4759 })
4760
4761 (define_insn "*sqrt<mode>2"
4762   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4763         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4764   "TARGET_SIMD"
4765   "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4766   [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4767 )
4768
4769 ;; Patterns for vector struct loads and stores.
4770
4771 (define_insn "aarch64_simd_ld2<mode>"
4772   [(set (match_operand:OI 0 "register_operand" "=w")
4773         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4774                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4775                    UNSPEC_LD2))]
4776   "TARGET_SIMD"
4777   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4778   [(set_attr "type" "neon_load2_2reg<q>")]
4779 )
4780
4781 (define_insn "aarch64_simd_ld2r<mode>"
4782   [(set (match_operand:OI 0 "register_operand" "=w")
4783        (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4784                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4785                   UNSPEC_LD2_DUP))]
4786   "TARGET_SIMD"
4787   "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4788   [(set_attr "type" "neon_load2_all_lanes<q>")]
4789 )
4790
4791 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4792   [(set (match_operand:OI 0 "register_operand" "=w")
4793         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4794                     (match_operand:OI 2 "register_operand" "0")
4795                     (match_operand:SI 3 "immediate_operand" "i")
4796                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4797                    UNSPEC_LD2_LANE))]
4798   "TARGET_SIMD"
4799   {
4800     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4801     return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4802   }
4803   [(set_attr "type" "neon_load2_one_lane")]
4804 )
4805
4806 (define_expand "vec_load_lanesoi<mode>"
4807   [(set (match_operand:OI 0 "register_operand" "=w")
4808         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4809                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4810                    UNSPEC_LD2))]
4811   "TARGET_SIMD"
4812 {
4813   if (BYTES_BIG_ENDIAN)
4814     {
4815       rtx tmp = gen_reg_rtx (OImode);
4816       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4817       emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4818       emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4819     }
4820   else
4821     emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4822   DONE;
4823 })
4824
4825 (define_insn "aarch64_simd_st2<mode>"
4826   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4827         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4828                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4829                    UNSPEC_ST2))]
4830   "TARGET_SIMD"
4831   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4832   [(set_attr "type" "neon_store2_2reg<q>")]
4833 )
4834
4835 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4836 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4837   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4838         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4839                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4840                     (match_operand:SI 2 "immediate_operand" "i")]
4841                    UNSPEC_ST2_LANE))]
4842   "TARGET_SIMD"
4843   {
4844     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4845     return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4846   }
4847   [(set_attr "type" "neon_store2_one_lane<q>")]
4848 )
4849
4850 (define_expand "vec_store_lanesoi<mode>"
4851   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4852         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4853                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4854                    UNSPEC_ST2))]
4855   "TARGET_SIMD"
4856 {
4857   if (BYTES_BIG_ENDIAN)
4858     {
4859       rtx tmp = gen_reg_rtx (OImode);
4860       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4861       emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4862       emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4863     }
4864   else
4865     emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4866   DONE;
4867 })
4868
4869 (define_insn "aarch64_simd_ld3<mode>"
4870   [(set (match_operand:CI 0 "register_operand" "=w")
4871         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4872                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4873                    UNSPEC_LD3))]
4874   "TARGET_SIMD"
4875   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4876   [(set_attr "type" "neon_load3_3reg<q>")]
4877 )
4878
4879 (define_insn "aarch64_simd_ld3r<mode>"
4880   [(set (match_operand:CI 0 "register_operand" "=w")
4881        (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4882                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4883                   UNSPEC_LD3_DUP))]
4884   "TARGET_SIMD"
4885   "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4886   [(set_attr "type" "neon_load3_all_lanes<q>")]
4887 )
4888
4889 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4890   [(set (match_operand:CI 0 "register_operand" "=w")
4891         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4892                     (match_operand:CI 2 "register_operand" "0")
4893                     (match_operand:SI 3 "immediate_operand" "i")
4894                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4895                    UNSPEC_LD3_LANE))]
4896   "TARGET_SIMD"
4897 {
4898     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4899     return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4900 }
4901   [(set_attr "type" "neon_load3_one_lane")]
4902 )
4903
4904 (define_expand "vec_load_lanesci<mode>"
4905   [(set (match_operand:CI 0 "register_operand" "=w")
4906         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4907                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4908                    UNSPEC_LD3))]
4909   "TARGET_SIMD"
4910 {
4911   if (BYTES_BIG_ENDIAN)
4912     {
4913       rtx tmp = gen_reg_rtx (CImode);
4914       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4915       emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4916       emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4917     }
4918   else
4919     emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4920   DONE;
4921 })
4922
4923 (define_insn "aarch64_simd_st3<mode>"
4924   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4925         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4926                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4927                    UNSPEC_ST3))]
4928   "TARGET_SIMD"
4929   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4930   [(set_attr "type" "neon_store3_3reg<q>")]
4931 )
4932
4933 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4934 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4935   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4936         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4937                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4938                      (match_operand:SI 2 "immediate_operand" "i")]
4939                     UNSPEC_ST3_LANE))]
4940   "TARGET_SIMD"
4941   {
4942     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4943     return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4944   }
4945   [(set_attr "type" "neon_store3_one_lane<q>")]
4946 )
4947
4948 (define_expand "vec_store_lanesci<mode>"
4949   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4950         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4951                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4952                    UNSPEC_ST3))]
4953   "TARGET_SIMD"
4954 {
4955   if (BYTES_BIG_ENDIAN)
4956     {
4957       rtx tmp = gen_reg_rtx (CImode);
4958       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4959       emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4960       emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4961     }
4962   else
4963     emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4964   DONE;
4965 })
4966
4967 (define_insn "aarch64_simd_ld4<mode>"
4968   [(set (match_operand:XI 0 "register_operand" "=w")
4969         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4970                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4971                    UNSPEC_LD4))]
4972   "TARGET_SIMD"
4973   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4974   [(set_attr "type" "neon_load4_4reg<q>")]
4975 )
4976
4977 (define_insn "aarch64_simd_ld4r<mode>"
4978   [(set (match_operand:XI 0 "register_operand" "=w")
4979        (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4980                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4981                   UNSPEC_LD4_DUP))]
4982   "TARGET_SIMD"
4983   "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4984   [(set_attr "type" "neon_load4_all_lanes<q>")]
4985 )
4986
4987 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4988   [(set (match_operand:XI 0 "register_operand" "=w")
4989         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4990                     (match_operand:XI 2 "register_operand" "0")
4991                     (match_operand:SI 3 "immediate_operand" "i")
4992                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4993                    UNSPEC_LD4_LANE))]
4994   "TARGET_SIMD"
4995 {
4996     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4997     return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4998 }
4999   [(set_attr "type" "neon_load4_one_lane")]
5000 )
5001
5002 (define_expand "vec_load_lanesxi<mode>"
5003   [(set (match_operand:XI 0 "register_operand" "=w")
5004         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5005                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5006                    UNSPEC_LD4))]
5007   "TARGET_SIMD"
5008 {
5009   if (BYTES_BIG_ENDIAN)
5010     {
5011       rtx tmp = gen_reg_rtx (XImode);
5012       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5013       emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
5014       emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
5015     }
5016   else
5017     emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
5018   DONE;
5019 })
5020
5021 (define_insn "aarch64_simd_st4<mode>"
5022   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5023         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5024                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5025                    UNSPEC_ST4))]
5026   "TARGET_SIMD"
5027   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5028   [(set_attr "type" "neon_store4_4reg<q>")]
5029 )
5030
5031 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5032 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
5033   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5034         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5035                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5036                      (match_operand:SI 2 "immediate_operand" "i")]
5037                     UNSPEC_ST4_LANE))]
5038   "TARGET_SIMD"
5039   {
5040     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5041     return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5042   }
5043   [(set_attr "type" "neon_store4_one_lane<q>")]
5044 )
5045
5046 (define_expand "vec_store_lanesxi<mode>"
5047   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5048         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5049                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5050                    UNSPEC_ST4))]
5051   "TARGET_SIMD"
5052 {
5053   if (BYTES_BIG_ENDIAN)
5054     {
5055       rtx tmp = gen_reg_rtx (XImode);
5056       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5057       emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5058       emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5059     }
5060   else
5061     emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5062   DONE;
5063 })
5064
5065 (define_insn_and_split "aarch64_rev_reglist<mode>"
5066 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5067         (unspec:VSTRUCT
5068                    [(match_operand:VSTRUCT 1 "register_operand" "w")
5069                     (match_operand:V16QI 2 "register_operand" "w")]
5070                    UNSPEC_REV_REGLIST))]
5071   "TARGET_SIMD"
5072   "#"
5073   "&& reload_completed"
5074   [(const_int 0)]
5075 {
5076   int i;
5077   int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5078   for (i = 0; i < nregs; i++)
5079     {
5080       rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5081       rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5082       emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5083     }
5084   DONE;
5085 }
5086   [(set_attr "type" "neon_tbl1_q")
5087    (set_attr "length" "<insn_count>")]
5088 )
5089
5090 ;; Reload patterns for AdvSIMD register list operands.
5091
5092 (define_expand "mov<mode>"
5093   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
5094         (match_operand:VSTRUCT 1 "general_operand" ""))]
5095   "TARGET_SIMD"
5096 {
5097   if (can_create_pseudo_p ())
5098     {
5099       if (GET_CODE (operands[0]) != REG)
5100         operands[1] = force_reg (<MODE>mode, operands[1]);
5101     }
5102 })
5103
5104
5105 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5106   [(match_operand:CI 0 "register_operand" "=w")
5107    (match_operand:DI 1 "register_operand" "r")
5108    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5109   "TARGET_SIMD"
5110 {
5111   rtx mem = gen_rtx_MEM (CImode, operands[1]);
5112   emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5113   DONE;
5114 })
5115
5116 (define_insn "aarch64_ld1_x3_<mode>"
5117   [(set (match_operand:CI 0 "register_operand" "=w")
5118         (unspec:CI
5119           [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5120            (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5121   "TARGET_SIMD"
5122   "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5123   [(set_attr "type" "neon_load1_3reg<q>")]
5124 )
5125
5126 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5127   [(match_operand:DI 0 "register_operand" "")
5128    (match_operand:OI 1 "register_operand" "")
5129    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5130   "TARGET_SIMD"
5131 {
5132   rtx mem = gen_rtx_MEM (OImode, operands[0]);
5133   emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5134   DONE;
5135 })
5136
5137 (define_insn "aarch64_st1_x2_<mode>"
5138    [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5139          (unspec:OI
5140           [(match_operand:OI 1 "register_operand" "w")
5141           (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5142   "TARGET_SIMD"
5143   "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5144   [(set_attr "type" "neon_store1_2reg<q>")]
5145 )
5146
5147 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5148   [(match_operand:DI 0 "register_operand" "")
5149    (match_operand:CI 1 "register_operand" "")
5150    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5151   "TARGET_SIMD"
5152 {
5153   rtx mem = gen_rtx_MEM (CImode, operands[0]);
5154   emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5155   DONE;
5156 })
5157
5158 (define_insn "aarch64_st1_x3_<mode>"
5159    [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5160         (unspec:CI
5161          [(match_operand:CI 1 "register_operand" "w")
5162           (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5163   "TARGET_SIMD"
5164   "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5165   [(set_attr "type" "neon_store1_3reg<q>")]
5166 )
5167
5168 (define_insn "*aarch64_mov<mode>"
5169   [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5170         (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5171   "TARGET_SIMD && !BYTES_BIG_ENDIAN
5172    && (register_operand (operands[0], <MODE>mode)
5173        || register_operand (operands[1], <MODE>mode))"
5174   "@
5175    #
5176    st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5177    ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5178   [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5179                      neon_load<nregs>_<nregs>reg_q")
5180    (set_attr "length" "<insn_count>,4,4")]
5181 )
5182
5183 (define_insn "aarch64_be_ld1<mode>"
5184   [(set (match_operand:VALLDI_F16 0     "register_operand" "=w")
5185         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5186                              "aarch64_simd_struct_operand" "Utv")]
5187         UNSPEC_LD1))]
5188   "TARGET_SIMD"
5189   "ld1\\t{%0<Vmtype>}, %1"
5190   [(set_attr "type" "neon_load1_1reg<q>")]
5191 )
5192
5193 (define_insn "aarch64_be_st1<mode>"
5194   [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5195         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5196         UNSPEC_ST1))]
5197   "TARGET_SIMD"
5198   "st1\\t{%1<Vmtype>}, %0"
5199   [(set_attr "type" "neon_store1_1reg<q>")]
5200 )
5201
5202 (define_insn "*aarch64_be_movoi"
5203   [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5204         (match_operand:OI 1 "general_operand"      " w,w,m"))]
5205   "TARGET_SIMD && BYTES_BIG_ENDIAN
5206    && (register_operand (operands[0], OImode)
5207        || register_operand (operands[1], OImode))"
5208   "@
5209    #
5210    stp\\t%q1, %R1, %0
5211    ldp\\t%q0, %R0, %1"
5212   [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5213    (set_attr "length" "8,4,4")]
5214 )
5215
5216 (define_insn "*aarch64_be_movci"
5217   [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5218         (match_operand:CI 1 "general_operand"      " w,w,o"))]
5219   "TARGET_SIMD && BYTES_BIG_ENDIAN
5220    && (register_operand (operands[0], CImode)
5221        || register_operand (operands[1], CImode))"
5222   "#"
5223   [(set_attr "type" "multiple")
5224    (set_attr "length" "12,4,4")]
5225 )
5226
5227 (define_insn "*aarch64_be_movxi"
5228   [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5229         (match_operand:XI 1 "general_operand"      " w,w,o"))]
5230   "TARGET_SIMD && BYTES_BIG_ENDIAN
5231    && (register_operand (operands[0], XImode)
5232        || register_operand (operands[1], XImode))"
5233   "#"
5234   [(set_attr "type" "multiple")
5235    (set_attr "length" "16,4,4")]
5236 )
5237
5238 (define_split
5239   [(set (match_operand:OI 0 "register_operand")
5240         (match_operand:OI 1 "register_operand"))]
5241   "TARGET_SIMD && reload_completed"
5242   [(const_int 0)]
5243 {
5244   aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5245   DONE;
5246 })
5247
5248 (define_split
5249   [(set (match_operand:CI 0 "nonimmediate_operand")
5250         (match_operand:CI 1 "general_operand"))]
5251   "TARGET_SIMD && reload_completed"
5252   [(const_int 0)]
5253 {
5254   if (register_operand (operands[0], CImode)
5255       && register_operand (operands[1], CImode))
5256     {
5257       aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5258       DONE;
5259     }
5260   else if (BYTES_BIG_ENDIAN)
5261     {
5262       emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5263                       simplify_gen_subreg (OImode, operands[1], CImode, 0));
5264       emit_move_insn (gen_lowpart (V16QImode,
5265                                    simplify_gen_subreg (TImode, operands[0],
5266                                                         CImode, 32)),
5267                       gen_lowpart (V16QImode,
5268                                    simplify_gen_subreg (TImode, operands[1],
5269                                                         CImode, 32)));
5270       DONE;
5271     }
5272   else
5273     FAIL;
5274 })
5275
5276 (define_split
5277   [(set (match_operand:XI 0 "nonimmediate_operand")
5278         (match_operand:XI 1 "general_operand"))]
5279   "TARGET_SIMD && reload_completed"
5280   [(const_int 0)]
5281 {
5282   if (register_operand (operands[0], XImode)
5283       && register_operand (operands[1], XImode))
5284     {
5285       aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5286       DONE;
5287     }
5288   else if (BYTES_BIG_ENDIAN)
5289     {
5290       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5291                       simplify_gen_subreg (OImode, operands[1], XImode, 0));
5292       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5293                       simplify_gen_subreg (OImode, operands[1], XImode, 32));
5294       DONE;
5295     }
5296   else
5297     FAIL;
5298 })
5299
5300 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5301   [(match_operand:VSTRUCT 0 "register_operand" "=w")
5302    (match_operand:DI 1 "register_operand" "w")
5303    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5304   "TARGET_SIMD"
5305 {
5306   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5307   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5308                      * <VSTRUCT:nregs>);
5309
5310   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5311                                                                 mem));
5312   DONE;
5313 })
5314
5315 (define_insn "aarch64_ld2<mode>_dreg"
5316   [(set (match_operand:OI 0 "register_operand" "=w")
5317         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5318                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5319                    UNSPEC_LD2_DREG))]
5320   "TARGET_SIMD"
5321   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5322   [(set_attr "type" "neon_load2_2reg<q>")]
5323 )
5324
5325 (define_insn "aarch64_ld2<mode>_dreg"
5326   [(set (match_operand:OI 0 "register_operand" "=w")
5327         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5328                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5329                    UNSPEC_LD2_DREG))]
5330   "TARGET_SIMD"
5331   "ld1\\t{%S0.1d - %T0.1d}, %1"
5332   [(set_attr "type" "neon_load1_2reg<q>")]
5333 )
5334
5335 (define_insn "aarch64_ld3<mode>_dreg"
5336   [(set (match_operand:CI 0 "register_operand" "=w")
5337         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5338                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5339                    UNSPEC_LD3_DREG))]
5340   "TARGET_SIMD"
5341   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5342   [(set_attr "type" "neon_load3_3reg<q>")]
5343 )
5344
5345 (define_insn "aarch64_ld3<mode>_dreg"
5346   [(set (match_operand:CI 0 "register_operand" "=w")
5347         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5348                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5349                    UNSPEC_LD3_DREG))]
5350   "TARGET_SIMD"
5351   "ld1\\t{%S0.1d - %U0.1d}, %1"
5352   [(set_attr "type" "neon_load1_3reg<q>")]
5353 )
5354
5355 (define_insn "aarch64_ld4<mode>_dreg"
5356   [(set (match_operand:XI 0 "register_operand" "=w")
5357         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5358                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5359                    UNSPEC_LD4_DREG))]
5360   "TARGET_SIMD"
5361   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5362   [(set_attr "type" "neon_load4_4reg<q>")]
5363 )
5364
5365 (define_insn "aarch64_ld4<mode>_dreg"
5366   [(set (match_operand:XI 0 "register_operand" "=w")
5367         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5368                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5369                    UNSPEC_LD4_DREG))]
5370   "TARGET_SIMD"
5371   "ld1\\t{%S0.1d - %V0.1d}, %1"
5372   [(set_attr "type" "neon_load1_4reg<q>")]
5373 )
5374
5375 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5376  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5377   (match_operand:DI 1 "register_operand" "r")
5378   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5379   "TARGET_SIMD"
5380 {
5381   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5382   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5383
5384   emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5385   DONE;
5386 })
5387
5388 (define_expand "aarch64_ld1<VALL_F16:mode>"
5389  [(match_operand:VALL_F16 0 "register_operand")
5390   (match_operand:DI 1 "register_operand")]
5391   "TARGET_SIMD"
5392 {
5393   machine_mode mode = <VALL_F16:MODE>mode;
5394   rtx mem = gen_rtx_MEM (mode, operands[1]);
5395
5396   if (BYTES_BIG_ENDIAN)
5397     emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5398   else
5399     emit_move_insn (operands[0], mem);
5400   DONE;
5401 })
5402
5403 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5404  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5405   (match_operand:DI 1 "register_operand" "r")
5406   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5407   "TARGET_SIMD"
5408 {
5409   machine_mode mode = <VSTRUCT:MODE>mode;
5410   rtx mem = gen_rtx_MEM (mode, operands[1]);
5411
5412   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5413   DONE;
5414 })
5415
5416 (define_expand "aarch64_ld1x2<VQ:mode>"
5417  [(match_operand:OI 0 "register_operand" "=w")
5418   (match_operand:DI 1 "register_operand" "r")
5419   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5420   "TARGET_SIMD"
5421 {
5422   machine_mode mode = OImode;
5423   rtx mem = gen_rtx_MEM (mode, operands[1]);
5424
5425   emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5426   DONE;
5427 })
5428
5429 (define_expand "aarch64_ld1x2<VDC:mode>"
5430  [(match_operand:OI 0 "register_operand" "=w")
5431   (match_operand:DI 1 "register_operand" "r")
5432   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5433   "TARGET_SIMD"
5434 {
5435   machine_mode mode = OImode;
5436   rtx mem = gen_rtx_MEM (mode, operands[1]);
5437
5438   emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5439   DONE;
5440 })
5441
5442
5443 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5444   [(match_operand:VSTRUCT 0 "register_operand" "=w")
5445         (match_operand:DI 1 "register_operand" "w")
5446         (match_operand:VSTRUCT 2 "register_operand" "0")
5447         (match_operand:SI 3 "immediate_operand" "i")
5448         (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5449   "TARGET_SIMD"
5450 {
5451   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5452   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5453                      * <VSTRUCT:nregs>);
5454
5455   aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5456   emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5457         operands[0], mem, operands[2], operands[3]));
5458   DONE;
5459 })
5460
5461 ;; Expanders for builtins to extract vector registers from large
5462 ;; opaque integer modes.
5463
5464 ;; D-register list.
5465
5466 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5467  [(match_operand:VDC 0 "register_operand" "=w")
5468   (match_operand:VSTRUCT 1 "register_operand" "w")
5469   (match_operand:SI 2 "immediate_operand" "i")]
5470   "TARGET_SIMD"
5471 {
5472   int part = INTVAL (operands[2]);
5473   rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5474   int offset = part * 16;
5475
5476   emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5477   emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5478   DONE;
5479 })
5480
5481 ;; Q-register list.
5482
5483 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5484  [(match_operand:VQ 0 "register_operand" "=w")
5485   (match_operand:VSTRUCT 1 "register_operand" "w")
5486   (match_operand:SI 2 "immediate_operand" "i")]
5487   "TARGET_SIMD"
5488 {
5489   int part = INTVAL (operands[2]);
5490   int offset = part * 16;
5491
5492   emit_move_insn (operands[0],
5493                   gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5494   DONE;
5495 })
5496
5497 ;; Permuted-store expanders for neon intrinsics.
5498
5499 ;; Permute instructions
5500
5501 ;; vec_perm support
5502
5503 (define_expand "vec_perm<mode>"
5504   [(match_operand:VB 0 "register_operand")
5505    (match_operand:VB 1 "register_operand")
5506    (match_operand:VB 2 "register_operand")
5507    (match_operand:VB 3 "register_operand")]
5508   "TARGET_SIMD"
5509 {
5510   aarch64_expand_vec_perm (operands[0], operands[1],
5511                            operands[2], operands[3], <nunits>);
5512   DONE;
5513 })
5514
5515 (define_insn "aarch64_tbl1<mode>"
5516   [(set (match_operand:VB 0 "register_operand" "=w")
5517         (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5518                     (match_operand:VB 2 "register_operand" "w")]
5519                    UNSPEC_TBL))]
5520   "TARGET_SIMD"
5521   "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5522   [(set_attr "type" "neon_tbl1<q>")]
5523 )
5524
5525 ;; Two source registers.
5526
5527 (define_insn "aarch64_tbl2v16qi"
5528   [(set (match_operand:V16QI 0 "register_operand" "=w")
5529         (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5530                        (match_operand:V16QI 2 "register_operand" "w")]
5531                       UNSPEC_TBL))]
5532   "TARGET_SIMD"
5533   "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5534   [(set_attr "type" "neon_tbl2_q")]
5535 )
5536
5537 (define_insn "aarch64_tbl3<mode>"
5538   [(set (match_operand:VB 0 "register_operand" "=w")
5539         (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5540                       (match_operand:VB 2 "register_operand" "w")]
5541                       UNSPEC_TBL))]
5542   "TARGET_SIMD"
5543   "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5544   [(set_attr "type" "neon_tbl3")]
5545 )
5546
5547 (define_insn "aarch64_tbx4<mode>"
5548   [(set (match_operand:VB 0 "register_operand" "=w")
5549         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5550                       (match_operand:OI 2 "register_operand" "w")
5551                       (match_operand:VB 3 "register_operand" "w")]
5552                       UNSPEC_TBX))]
5553   "TARGET_SIMD"
5554   "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5555   [(set_attr "type" "neon_tbl4")]
5556 )
5557
5558 ;; Three source registers.
5559
5560 (define_insn "aarch64_qtbl3<mode>"
5561   [(set (match_operand:VB 0 "register_operand" "=w")
5562         (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5563                       (match_operand:VB 2 "register_operand" "w")]
5564                       UNSPEC_TBL))]
5565   "TARGET_SIMD"
5566   "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5567   [(set_attr "type" "neon_tbl3")]
5568 )
5569
5570 (define_insn "aarch64_qtbx3<mode>"
5571   [(set (match_operand:VB 0 "register_operand" "=w")
5572         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5573                       (match_operand:CI 2 "register_operand" "w")
5574                       (match_operand:VB 3 "register_operand" "w")]
5575                       UNSPEC_TBX))]
5576   "TARGET_SIMD"
5577   "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5578   [(set_attr "type" "neon_tbl3")]
5579 )
5580
5581 ;; Four source registers.
5582
5583 (define_insn "aarch64_qtbl4<mode>"
5584   [(set (match_operand:VB 0 "register_operand" "=w")
5585         (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5586                       (match_operand:VB 2 "register_operand" "w")]
5587                       UNSPEC_TBL))]
5588   "TARGET_SIMD"
5589   "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5590   [(set_attr "type" "neon_tbl4")]
5591 )
5592
5593 (define_insn "aarch64_qtbx4<mode>"
5594   [(set (match_operand:VB 0 "register_operand" "=w")
5595         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5596                       (match_operand:XI 2 "register_operand" "w")
5597                       (match_operand:VB 3 "register_operand" "w")]
5598                       UNSPEC_TBX))]
5599   "TARGET_SIMD"
5600   "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5601   [(set_attr "type" "neon_tbl4")]
5602 )
5603
5604 (define_insn_and_split "aarch64_combinev16qi"
5605   [(set (match_operand:OI 0 "register_operand" "=w")
5606         (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5607                     (match_operand:V16QI 2 "register_operand" "w")]
5608                    UNSPEC_CONCAT))]
5609   "TARGET_SIMD"
5610   "#"
5611   "&& reload_completed"
5612   [(const_int 0)]
5613 {
5614   aarch64_split_combinev16qi (operands);
5615   DONE;
5616 }
5617 [(set_attr "type" "multiple")]
5618 )
5619
5620 ;; This instruction's pattern is generated directly by
5621 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5622 ;; need corresponding changes there.
5623 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5624   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5625         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5626                           (match_operand:VALL_F16 2 "register_operand" "w")]
5627          PERMUTE))]
5628   "TARGET_SIMD"
5629   "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5630   [(set_attr "type" "neon_permute<q>")]
5631 )
5632
5633 ;; This instruction's pattern is generated directly by
5634 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5635 ;; need corresponding changes there.  Note that the immediate (third)
5636 ;; operand is a lane index not a byte index.
5637 (define_insn "aarch64_ext<mode>"
5638   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5639         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5640                           (match_operand:VALL_F16 2 "register_operand" "w")
5641                           (match_operand:SI 3 "immediate_operand" "i")]
5642          UNSPEC_EXT))]
5643   "TARGET_SIMD"
5644 {
5645   operands[3] = GEN_INT (INTVAL (operands[3])
5646       * GET_MODE_UNIT_SIZE (<MODE>mode));
5647   return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5648 }
5649   [(set_attr "type" "neon_ext<q>")]
5650 )
5651
5652 ;; This instruction's pattern is generated directly by
5653 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5654 ;; need corresponding changes there.
5655 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5656   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5657         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5658                     REVERSE))]
5659   "TARGET_SIMD"
5660   "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5661   [(set_attr "type" "neon_rev<q>")]
5662 )
5663
5664 (define_insn "aarch64_st2<mode>_dreg"
5665   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5666         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5667                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5668                    UNSPEC_ST2))]
5669   "TARGET_SIMD"
5670   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5671   [(set_attr "type" "neon_store2_2reg")]
5672 )
5673
5674 (define_insn "aarch64_st2<mode>_dreg"
5675   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5676         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5677                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5678                    UNSPEC_ST2))]
5679   "TARGET_SIMD"
5680   "st1\\t{%S1.1d - %T1.1d}, %0"
5681   [(set_attr "type" "neon_store1_2reg")]
5682 )
5683
5684 (define_insn "aarch64_st3<mode>_dreg"
5685   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5686         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5687                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5688                    UNSPEC_ST3))]
5689   "TARGET_SIMD"
5690   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5691   [(set_attr "type" "neon_store3_3reg")]
5692 )
5693
5694 (define_insn "aarch64_st3<mode>_dreg"
5695   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5696         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5697                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5698                    UNSPEC_ST3))]
5699   "TARGET_SIMD"
5700   "st1\\t{%S1.1d - %U1.1d}, %0"
5701   [(set_attr "type" "neon_store1_3reg")]
5702 )
5703
5704 (define_insn "aarch64_st4<mode>_dreg"
5705   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5706         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5707                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5708                    UNSPEC_ST4))]
5709   "TARGET_SIMD"
5710   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5711   [(set_attr "type" "neon_store4_4reg")]
5712 )
5713
5714 (define_insn "aarch64_st4<mode>_dreg"
5715   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5716         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5717                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5718                    UNSPEC_ST4))]
5719   "TARGET_SIMD"
5720   "st1\\t{%S1.1d - %V1.1d}, %0"
5721   [(set_attr "type" "neon_store1_4reg")]
5722 )
5723
5724 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5725  [(match_operand:DI 0 "register_operand" "r")
5726   (match_operand:VSTRUCT 1 "register_operand" "w")
5727   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5728   "TARGET_SIMD"
5729 {
5730   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5731   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5732
5733   emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5734   DONE;
5735 })
5736
5737 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5738  [(match_operand:DI 0 "register_operand" "r")
5739   (match_operand:VSTRUCT 1 "register_operand" "w")
5740   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5741   "TARGET_SIMD"
5742 {
5743   machine_mode mode = <VSTRUCT:MODE>mode;
5744   rtx mem = gen_rtx_MEM (mode, operands[0]);
5745
5746   emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5747   DONE;
5748 })
5749
5750 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5751  [(match_operand:DI 0 "register_operand" "r")
5752   (match_operand:VSTRUCT 1 "register_operand" "w")
5753   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5754   (match_operand:SI 2 "immediate_operand")]
5755   "TARGET_SIMD"
5756 {
5757   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5758   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5759                      * <VSTRUCT:nregs>);
5760
5761   emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5762                 mem, operands[1], operands[2]));
5763   DONE;
5764 })
5765
5766 (define_expand "aarch64_st1<VALL_F16:mode>"
5767  [(match_operand:DI 0 "register_operand")
5768   (match_operand:VALL_F16 1 "register_operand")]
5769   "TARGET_SIMD"
5770 {
5771   machine_mode mode = <VALL_F16:MODE>mode;
5772   rtx mem = gen_rtx_MEM (mode, operands[0]);
5773
5774   if (BYTES_BIG_ENDIAN)
5775     emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5776   else
5777     emit_move_insn (mem, operands[1]);
5778   DONE;
5779 })
5780
5781 ;; Expander for builtins to insert vector registers into large
5782 ;; opaque integer modes.
5783
5784 ;; Q-register list.  We don't need a D-reg inserter as we zero
5785 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5786
5787 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5788  [(match_operand:VSTRUCT 0 "register_operand" "+w")
5789   (match_operand:VSTRUCT 1 "register_operand" "0")
5790   (match_operand:VQ 2 "register_operand" "w")
5791   (match_operand:SI 3 "immediate_operand" "i")]
5792   "TARGET_SIMD"
5793 {
5794   int part = INTVAL (operands[3]);
5795   int offset = part * 16;
5796
5797   emit_move_insn (operands[0], operands[1]);
5798   emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5799                   operands[2]);
5800   DONE;
5801 })
5802
5803 ;; Standard pattern name vec_init<mode><Vel>.
5804
5805 (define_expand "vec_init<mode><Vel>"
5806   [(match_operand:VALL_F16 0 "register_operand" "")
5807    (match_operand 1 "" "")]
5808   "TARGET_SIMD"
5809 {
5810   aarch64_expand_vector_init (operands[0], operands[1]);
5811   DONE;
5812 })
5813
5814 (define_insn "*aarch64_simd_ld1r<mode>"
5815   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5816         (vec_duplicate:VALL_F16
5817           (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5818   "TARGET_SIMD"
5819   "ld1r\\t{%0.<Vtype>}, %1"
5820   [(set_attr "type" "neon_load1_all_lanes")]
5821 )
5822
5823 (define_insn "aarch64_simd_ld1<mode>_x2"
5824   [(set (match_operand:OI 0 "register_operand" "=w")
5825         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5826                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5827                    UNSPEC_LD1))]
5828   "TARGET_SIMD"
5829   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5830   [(set_attr "type" "neon_load1_2reg<q>")]
5831 )
5832
5833 (define_insn "aarch64_simd_ld1<mode>_x2"
5834   [(set (match_operand:OI 0 "register_operand" "=w")
5835         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5836                     (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5837                    UNSPEC_LD1))]
5838   "TARGET_SIMD"
5839   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5840   [(set_attr "type" "neon_load1_2reg<q>")]
5841 )
5842
5843
5844 (define_insn "aarch64_frecpe<mode>"
5845   [(set (match_operand:VHSDF 0 "register_operand" "=w")
5846         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5847          UNSPEC_FRECPE))]
5848   "TARGET_SIMD"
5849   "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5850   [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5851 )
5852
5853 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5854   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5855         (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5856          FRECP))]
5857   "TARGET_SIMD"
5858   "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5859   [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5860 )
5861
5862 (define_insn "aarch64_frecps<mode>"
5863   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5864         (unspec:VHSDF_HSDF
5865           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5866           (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5867           UNSPEC_FRECPS))]
5868   "TARGET_SIMD"
5869   "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5870   [(set_attr "type" "neon_fp_recps_<stype><q>")]
5871 )
5872
5873 (define_insn "aarch64_urecpe<mode>"
5874   [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5875         (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5876                 UNSPEC_URECPE))]
5877  "TARGET_SIMD"
5878  "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5879   [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5880
5881 ;; Standard pattern name vec_extract<mode><Vel>.
5882
5883 (define_expand "vec_extract<mode><Vel>"
5884   [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5885    (match_operand:VALL_F16 1 "register_operand" "")
5886    (match_operand:SI 2 "immediate_operand" "")]
5887   "TARGET_SIMD"
5888 {
5889     emit_insn
5890       (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5891     DONE;
5892 })
5893
5894 ;; aes
5895
5896 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5897   [(set (match_operand:V16QI 0 "register_operand" "=w")
5898         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%0")
5899                        (match_operand:V16QI 2 "register_operand" "w")]
5900          CRYPTO_AES))]
5901   "TARGET_SIMD && TARGET_AES"
5902   "aes<aes_op>\\t%0.16b, %2.16b"
5903   [(set_attr "type" "crypto_aese")]
5904 )
5905
5906 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
5907   [(set (match_operand:V16QI 0 "register_operand" "=w")
5908         (unspec:V16QI [(xor:V16QI
5909                         (match_operand:V16QI 1 "register_operand" "%0")
5910                         (match_operand:V16QI 2 "register_operand" "w"))
5911                        (match_operand:V16QI 3 "aarch64_simd_imm_zero" "")]
5912                        CRYPTO_AES))]
5913   "TARGET_SIMD && TARGET_AES"
5914   "aes<aes_op>\\t%0.16b, %2.16b"
5915   [(set_attr "type" "crypto_aese")]
5916 )
5917
5918 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
5919   [(set (match_operand:V16QI 0 "register_operand" "=w")
5920         (unspec:V16QI [(match_operand:V16QI 3 "aarch64_simd_imm_zero" "")
5921         (xor:V16QI (match_operand:V16QI 1 "register_operand" "%0")
5922                    (match_operand:V16QI 2 "register_operand" "w"))]
5923         CRYPTO_AES))]
5924   "TARGET_SIMD && TARGET_AES"
5925   "aes<aes_op>\\t%0.16b, %2.16b"
5926   [(set_attr "type" "crypto_aese")]
5927 )
5928
5929 ;; When AES/AESMC fusion is enabled we want the register allocation to
5930 ;; look like:
5931 ;;    AESE Vn, _
5932 ;;    AESMC Vn, Vn
5933 ;; So prefer to tie operand 1 to operand 0 when fusing.
5934
5935 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5936   [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5937         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5938          CRYPTO_AESMC))]
5939   "TARGET_SIMD && TARGET_AES"
5940   "aes<aesmc_op>\\t%0.16b, %1.16b"
5941   [(set_attr "type" "crypto_aesmc")
5942    (set_attr_alternative "enabled"
5943      [(if_then_else (match_test
5944                        "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5945                      (const_string "yes" )
5946                      (const_string "no"))
5947       (const_string "yes")])]
5948 )
5949
5950 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
5951 ;; and enforce the register dependency without scheduling or register
5952 ;; allocation messing up the order or introducing moves inbetween.
5953 ;;  Mash the two together during combine.
5954
5955 (define_insn "*aarch64_crypto_aese_fused"
5956   [(set (match_operand:V16QI 0 "register_operand" "=&w")
5957         (unspec:V16QI
5958           [(unspec:V16QI
5959             [(match_operand:V16QI 1 "register_operand" "0")
5960              (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE)
5961           ] UNSPEC_AESMC))]
5962   "TARGET_SIMD && TARGET_AES
5963    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
5964   "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
5965   [(set_attr "type" "crypto_aese")
5966    (set_attr "length" "8")]
5967 )
5968
5969 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
5970 ;; and enforce the register dependency without scheduling or register
5971 ;; allocation messing up the order or introducing moves inbetween.
5972 ;;  Mash the two together during combine.
5973
5974 (define_insn "*aarch64_crypto_aesd_fused"
5975   [(set (match_operand:V16QI 0 "register_operand" "=&w")
5976         (unspec:V16QI
5977           [(unspec:V16QI
5978             [(match_operand:V16QI 1 "register_operand" "0")
5979              (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD)
5980           ] UNSPEC_AESIMC))]
5981   "TARGET_SIMD && TARGET_AES
5982    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
5983   "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
5984   [(set_attr "type" "crypto_aese")
5985    (set_attr "length" "8")]
5986 )
5987
5988 ;; sha1
5989
5990 (define_insn "aarch64_crypto_sha1hsi"
5991   [(set (match_operand:SI 0 "register_operand" "=w")
5992         (unspec:SI [(match_operand:SI 1
5993                        "register_operand" "w")]
5994          UNSPEC_SHA1H))]
5995   "TARGET_SIMD && TARGET_SHA2"
5996   "sha1h\\t%s0, %s1"
5997   [(set_attr "type" "crypto_sha1_fast")]
5998 )
5999
6000 (define_insn "aarch64_crypto_sha1hv4si"
6001   [(set (match_operand:SI 0 "register_operand" "=w")
6002         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6003                      (parallel [(const_int 0)]))]
6004          UNSPEC_SHA1H))]
6005   "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
6006   "sha1h\\t%s0, %s1"
6007   [(set_attr "type" "crypto_sha1_fast")]
6008 )
6009
6010 (define_insn "aarch64_be_crypto_sha1hv4si"
6011   [(set (match_operand:SI 0 "register_operand" "=w")
6012         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6013                      (parallel [(const_int 3)]))]
6014          UNSPEC_SHA1H))]
6015   "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
6016   "sha1h\\t%s0, %s1"
6017   [(set_attr "type" "crypto_sha1_fast")]
6018 )
6019
6020 (define_insn "aarch64_crypto_sha1su1v4si"
6021   [(set (match_operand:V4SI 0 "register_operand" "=w")
6022         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6023                       (match_operand:V4SI 2 "register_operand" "w")]
6024          UNSPEC_SHA1SU1))]
6025   "TARGET_SIMD && TARGET_SHA2"
6026   "sha1su1\\t%0.4s, %2.4s"
6027   [(set_attr "type" "crypto_sha1_fast")]
6028 )
6029
6030 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
6031   [(set (match_operand:V4SI 0 "register_operand" "=w")
6032         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6033                       (match_operand:SI 2 "register_operand" "w")
6034                       (match_operand:V4SI 3 "register_operand" "w")]
6035          CRYPTO_SHA1))]
6036   "TARGET_SIMD && TARGET_SHA2"
6037   "sha1<sha1_op>\\t%q0, %s2, %3.4s"
6038   [(set_attr "type" "crypto_sha1_slow")]
6039 )
6040
6041 (define_insn "aarch64_crypto_sha1su0v4si"
6042   [(set (match_operand:V4SI 0 "register_operand" "=w")
6043         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6044                       (match_operand:V4SI 2 "register_operand" "w")
6045                       (match_operand:V4SI 3 "register_operand" "w")]
6046          UNSPEC_SHA1SU0))]
6047   "TARGET_SIMD && TARGET_SHA2"
6048   "sha1su0\\t%0.4s, %2.4s, %3.4s"
6049   [(set_attr "type" "crypto_sha1_xor")]
6050 )
6051
6052 ;; sha256
6053
6054 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6055   [(set (match_operand:V4SI 0 "register_operand" "=w")
6056         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6057                       (match_operand:V4SI 2 "register_operand" "w")
6058                       (match_operand:V4SI 3 "register_operand" "w")]
6059          CRYPTO_SHA256))]
6060   "TARGET_SIMD && TARGET_SHA2"
6061   "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
6062   [(set_attr "type" "crypto_sha256_slow")]
6063 )
6064
6065 (define_insn "aarch64_crypto_sha256su0v4si"
6066   [(set (match_operand:V4SI 0 "register_operand" "=w")
6067         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6068                       (match_operand:V4SI 2 "register_operand" "w")]
6069          UNSPEC_SHA256SU0))]
6070   "TARGET_SIMD && TARGET_SHA2"
6071   "sha256su0\\t%0.4s, %2.4s"
6072   [(set_attr "type" "crypto_sha256_fast")]
6073 )
6074
6075 (define_insn "aarch64_crypto_sha256su1v4si"
6076   [(set (match_operand:V4SI 0 "register_operand" "=w")
6077         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6078                       (match_operand:V4SI 2 "register_operand" "w")
6079                       (match_operand:V4SI 3 "register_operand" "w")]
6080          UNSPEC_SHA256SU1))]
6081   "TARGET_SIMD && TARGET_SHA2"
6082   "sha256su1\\t%0.4s, %2.4s, %3.4s"
6083   [(set_attr "type" "crypto_sha256_slow")]
6084 )
6085
6086 ;; sha512
6087
6088 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6089   [(set (match_operand:V2DI 0 "register_operand" "=w")
6090         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6091                       (match_operand:V2DI 2 "register_operand" "w")
6092                       (match_operand:V2DI 3 "register_operand" "w")]
6093          CRYPTO_SHA512))]
6094   "TARGET_SIMD && TARGET_SHA3"
6095   "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6096   [(set_attr "type" "crypto_sha512")]
6097 )
6098
6099 (define_insn "aarch64_crypto_sha512su0qv2di"
6100   [(set (match_operand:V2DI 0 "register_operand" "=w")
6101         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6102                       (match_operand:V2DI 2 "register_operand" "w")]
6103          UNSPEC_SHA512SU0))]
6104   "TARGET_SIMD && TARGET_SHA3"
6105   "sha512su0\\t%0.2d, %2.2d"
6106   [(set_attr "type" "crypto_sha512")]
6107 )
6108
6109 (define_insn "aarch64_crypto_sha512su1qv2di"
6110   [(set (match_operand:V2DI 0 "register_operand" "=w")
6111         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6112                       (match_operand:V2DI 2 "register_operand" "w")
6113                       (match_operand:V2DI 3 "register_operand" "w")]
6114          UNSPEC_SHA512SU1))]
6115   "TARGET_SIMD && TARGET_SHA3"
6116   "sha512su1\\t%0.2d, %2.2d, %3.2d"
6117   [(set_attr "type" "crypto_sha512")]
6118 )
6119
6120 ;; sha3
6121
6122 (define_insn "eor3q<mode>4"
6123   [(set (match_operand:VQ_I 0 "register_operand" "=w")
6124         (xor:VQ_I
6125          (xor:VQ_I
6126           (match_operand:VQ_I 2 "register_operand" "w")
6127           (match_operand:VQ_I 3 "register_operand" "w"))
6128          (match_operand:VQ_I 1 "register_operand" "w")))]
6129   "TARGET_SIMD && TARGET_SHA3"
6130   "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6131   [(set_attr "type" "crypto_sha3")]
6132 )
6133
6134 (define_insn "aarch64_rax1qv2di"
6135   [(set (match_operand:V2DI 0 "register_operand" "=w")
6136         (xor:V2DI
6137          (rotate:V2DI
6138           (match_operand:V2DI 2 "register_operand" "w")
6139           (const_int 1))
6140          (match_operand:V2DI 1 "register_operand" "w")))]
6141   "TARGET_SIMD && TARGET_SHA3"
6142   "rax1\\t%0.2d, %1.2d, %2.2d"
6143   [(set_attr "type" "crypto_sha3")]
6144 )
6145
6146 (define_insn "aarch64_xarqv2di"
6147   [(set (match_operand:V2DI 0 "register_operand" "=w")
6148         (rotatert:V2DI
6149          (xor:V2DI
6150           (match_operand:V2DI 1 "register_operand" "%w")
6151           (match_operand:V2DI 2 "register_operand" "w"))
6152          (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6153   "TARGET_SIMD && TARGET_SHA3"
6154   "xar\\t%0.2d, %1.2d, %2.2d, %3"
6155   [(set_attr "type" "crypto_sha3")]
6156 )
6157
6158 (define_insn "bcaxq<mode>4"
6159   [(set (match_operand:VQ_I 0 "register_operand" "=w")
6160         (xor:VQ_I
6161          (and:VQ_I
6162           (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6163           (match_operand:VQ_I 2 "register_operand" "w"))
6164          (match_operand:VQ_I 1 "register_operand" "w")))]
6165   "TARGET_SIMD && TARGET_SHA3"
6166   "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6167   [(set_attr "type" "crypto_sha3")]
6168 )
6169
6170 ;; SM3
6171
6172 (define_insn "aarch64_sm3ss1qv4si"
6173   [(set (match_operand:V4SI 0 "register_operand" "=w")
6174         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6175                       (match_operand:V4SI 2 "register_operand" "w")
6176                       (match_operand:V4SI 3 "register_operand" "w")]
6177          UNSPEC_SM3SS1))]
6178   "TARGET_SIMD && TARGET_SM4"
6179   "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6180   [(set_attr "type" "crypto_sm3")]
6181 )
6182
6183
6184 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6185   [(set (match_operand:V4SI 0 "register_operand" "=w")
6186         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6187                       (match_operand:V4SI 2 "register_operand" "w")
6188                       (match_operand:V4SI 3 "register_operand" "w")
6189                       (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6190          CRYPTO_SM3TT))]
6191   "TARGET_SIMD && TARGET_SM4"
6192   "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6193   [(set_attr "type" "crypto_sm3")]
6194 )
6195
6196 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6197   [(set (match_operand:V4SI 0 "register_operand" "=w")
6198         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6199                       (match_operand:V4SI 2 "register_operand" "w")
6200                       (match_operand:V4SI 3 "register_operand" "w")]
6201          CRYPTO_SM3PART))]
6202   "TARGET_SIMD && TARGET_SM4"
6203   "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6204   [(set_attr "type" "crypto_sm3")]
6205 )
6206
6207 ;; SM4
6208
6209 (define_insn "aarch64_sm4eqv4si"
6210   [(set (match_operand:V4SI 0 "register_operand" "=w")
6211         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6212                       (match_operand:V4SI 2 "register_operand" "w")]
6213          UNSPEC_SM4E))]
6214   "TARGET_SIMD && TARGET_SM4"
6215   "sm4e\\t%0.4s, %2.4s"
6216   [(set_attr "type" "crypto_sm4")]
6217 )
6218
6219 (define_insn "aarch64_sm4ekeyqv4si"
6220   [(set (match_operand:V4SI 0 "register_operand" "=w")
6221         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6222                       (match_operand:V4SI 2 "register_operand" "w")]
6223          UNSPEC_SM4EKEY))]
6224   "TARGET_SIMD && TARGET_SM4"
6225   "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6226   [(set_attr "type" "crypto_sm4")]
6227 )
6228
6229 ;; fp16fml
6230
6231 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6232   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6233         (unspec:VDQSF
6234          [(match_operand:VDQSF 1 "register_operand" "0")
6235           (match_operand:<VFMLA_W> 2 "register_operand" "w")
6236           (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6237          VFMLA16_LOW))]
6238   "TARGET_F16FML"
6239 {
6240   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6241                                             <nunits> * 2, false);
6242   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6243                                             <nunits> * 2, false);
6244
6245   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6246                                                                 operands[1],
6247                                                                 operands[2],
6248                                                                 operands[3],
6249                                                                 p1, p2));
6250   DONE;
6251
6252 })
6253
6254 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6255   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6256         (unspec:VDQSF
6257          [(match_operand:VDQSF 1 "register_operand" "0")
6258           (match_operand:<VFMLA_W> 2 "register_operand" "w")
6259           (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6260          VFMLA16_HIGH))]
6261   "TARGET_F16FML"
6262 {
6263   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6264   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6265
6266   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6267                                                                  operands[1],
6268                                                                  operands[2],
6269                                                                  operands[3],
6270                                                                  p1, p2));
6271   DONE;
6272 })
6273
6274 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6275   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6276         (fma:VDQSF
6277          (float_extend:VDQSF
6278           (vec_select:<VFMLA_SEL_W>
6279            (match_operand:<VFMLA_W> 2 "register_operand" "w")
6280            (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6281          (float_extend:VDQSF
6282           (vec_select:<VFMLA_SEL_W>
6283            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6284            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6285          (match_operand:VDQSF 1 "register_operand" "0")))]
6286   "TARGET_F16FML"
6287   "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6288   [(set_attr "type" "neon_fp_mul_s")]
6289 )
6290
6291 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6292   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6293         (fma:VDQSF
6294          (float_extend:VDQSF
6295           (neg:<VFMLA_SEL_W>
6296            (vec_select:<VFMLA_SEL_W>
6297             (match_operand:<VFMLA_W> 2 "register_operand" "w")
6298             (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6299          (float_extend:VDQSF
6300           (vec_select:<VFMLA_SEL_W>
6301            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6302            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6303          (match_operand:VDQSF 1 "register_operand" "0")))]
6304   "TARGET_F16FML"
6305   "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6306   [(set_attr "type" "neon_fp_mul_s")]
6307 )
6308
6309 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6310   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6311         (fma:VDQSF
6312          (float_extend:VDQSF
6313           (vec_select:<VFMLA_SEL_W>
6314            (match_operand:<VFMLA_W> 2 "register_operand" "w")
6315            (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6316          (float_extend:VDQSF
6317           (vec_select:<VFMLA_SEL_W>
6318            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6319            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6320          (match_operand:VDQSF 1 "register_operand" "0")))]
6321   "TARGET_F16FML"
6322   "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6323   [(set_attr "type" "neon_fp_mul_s")]
6324 )
6325
6326 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6327   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6328         (fma:VDQSF
6329          (float_extend:VDQSF
6330           (neg:<VFMLA_SEL_W>
6331            (vec_select:<VFMLA_SEL_W>
6332             (match_operand:<VFMLA_W> 2 "register_operand" "w")
6333             (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6334          (float_extend:VDQSF
6335           (vec_select:<VFMLA_SEL_W>
6336            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6337            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6338          (match_operand:VDQSF 1 "register_operand" "0")))]
6339   "TARGET_F16FML"
6340   "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6341   [(set_attr "type" "neon_fp_mul_s")]
6342 )
6343
6344 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6345   [(set (match_operand:V2SF 0 "register_operand" "")
6346         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6347                            (match_operand:V4HF 2 "register_operand" "")
6348                            (match_operand:V4HF 3 "register_operand" "")
6349                            (match_operand:SI 4 "aarch64_imm2" "")]
6350          VFMLA16_LOW))]
6351   "TARGET_F16FML"
6352 {
6353     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6354     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6355
6356     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6357                                                             operands[1],
6358                                                             operands[2],
6359                                                             operands[3],
6360                                                             p1, lane));
6361     DONE;
6362 }
6363 )
6364
6365 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6366   [(set (match_operand:V2SF 0 "register_operand" "")
6367         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6368                            (match_operand:V4HF 2 "register_operand" "")
6369                            (match_operand:V4HF 3 "register_operand" "")
6370                            (match_operand:SI 4 "aarch64_imm2" "")]
6371          VFMLA16_HIGH))]
6372   "TARGET_F16FML"
6373 {
6374     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6375     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6376
6377     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6378                                                              operands[1],
6379                                                              operands[2],
6380                                                              operands[3],
6381                                                              p1, lane));
6382     DONE;
6383 })
6384
6385 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6386   [(set (match_operand:V2SF 0 "register_operand" "=w")
6387         (fma:V2SF
6388          (float_extend:V2SF
6389            (vec_select:V2HF
6390             (match_operand:V4HF 2 "register_operand" "w")
6391             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6392          (float_extend:V2SF
6393            (vec_duplicate:V2HF
6394             (vec_select:HF
6395              (match_operand:V4HF 3 "register_operand" "x")
6396              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6397          (match_operand:V2SF 1 "register_operand" "0")))]
6398   "TARGET_F16FML"
6399   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6400   [(set_attr "type" "neon_fp_mul_s")]
6401 )
6402
6403 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6404   [(set (match_operand:V2SF 0 "register_operand" "=w")
6405         (fma:V2SF
6406          (float_extend:V2SF
6407           (neg:V2HF
6408            (vec_select:V2HF
6409             (match_operand:V4HF 2 "register_operand" "w")
6410             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6411          (float_extend:V2SF
6412           (vec_duplicate:V2HF
6413            (vec_select:HF
6414             (match_operand:V4HF 3 "register_operand" "x")
6415             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6416          (match_operand:V2SF 1 "register_operand" "0")))]
6417   "TARGET_F16FML"
6418   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6419   [(set_attr "type" "neon_fp_mul_s")]
6420 )
6421
6422 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6423   [(set (match_operand:V2SF 0 "register_operand" "=w")
6424         (fma:V2SF
6425          (float_extend:V2SF
6426            (vec_select:V2HF
6427             (match_operand:V4HF 2 "register_operand" "w")
6428             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6429          (float_extend:V2SF
6430            (vec_duplicate:V2HF
6431             (vec_select:HF
6432              (match_operand:V4HF 3 "register_operand" "x")
6433              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6434          (match_operand:V2SF 1 "register_operand" "0")))]
6435   "TARGET_F16FML"
6436   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6437   [(set_attr "type" "neon_fp_mul_s")]
6438 )
6439
6440 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6441   [(set (match_operand:V2SF 0 "register_operand" "=w")
6442         (fma:V2SF
6443          (float_extend:V2SF
6444            (neg:V2HF
6445             (vec_select:V2HF
6446              (match_operand:V4HF 2 "register_operand" "w")
6447              (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6448          (float_extend:V2SF
6449            (vec_duplicate:V2HF
6450             (vec_select:HF
6451              (match_operand:V4HF 3 "register_operand" "x")
6452              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6453          (match_operand:V2SF 1 "register_operand" "0")))]
6454   "TARGET_F16FML"
6455   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6456   [(set_attr "type" "neon_fp_mul_s")]
6457 )
6458
6459 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6460   [(set (match_operand:V4SF 0 "register_operand" "")
6461         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6462                            (match_operand:V8HF 2 "register_operand" "")
6463                            (match_operand:V8HF 3 "register_operand" "")
6464                            (match_operand:SI 4 "aarch64_lane_imm3" "")]
6465          VFMLA16_LOW))]
6466   "TARGET_F16FML"
6467 {
6468     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6469     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6470
6471     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6472                                                               operands[1],
6473                                                               operands[2],
6474                                                               operands[3],
6475                                                               p1, lane));
6476     DONE;
6477 })
6478
6479 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6480   [(set (match_operand:V4SF 0 "register_operand" "")
6481         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6482                            (match_operand:V8HF 2 "register_operand" "")
6483                            (match_operand:V8HF 3 "register_operand" "")
6484                            (match_operand:SI 4 "aarch64_lane_imm3" "")]
6485          VFMLA16_HIGH))]
6486   "TARGET_F16FML"
6487 {
6488     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6489     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6490
6491     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6492                                                                operands[1],
6493                                                                operands[2],
6494                                                                operands[3],
6495                                                                p1, lane));
6496     DONE;
6497 })
6498
6499 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6500   [(set (match_operand:V4SF 0 "register_operand" "=w")
6501         (fma:V4SF
6502          (float_extend:V4SF
6503           (vec_select:V4HF
6504             (match_operand:V8HF 2 "register_operand" "w")
6505             (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6506          (float_extend:V4SF
6507           (vec_duplicate:V4HF
6508            (vec_select:HF
6509             (match_operand:V8HF 3 "register_operand" "x")
6510             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6511          (match_operand:V4SF 1 "register_operand" "0")))]
6512   "TARGET_F16FML"
6513   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6514   [(set_attr "type" "neon_fp_mul_s")]
6515 )
6516
6517 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6518   [(set (match_operand:V4SF 0 "register_operand" "=w")
6519         (fma:V4SF
6520           (float_extend:V4SF
6521            (neg:V4HF
6522             (vec_select:V4HF
6523              (match_operand:V8HF 2 "register_operand" "w")
6524              (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6525          (float_extend:V4SF
6526           (vec_duplicate:V4HF
6527            (vec_select:HF
6528             (match_operand:V8HF 3 "register_operand" "x")
6529             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6530          (match_operand:V4SF 1 "register_operand" "0")))]
6531   "TARGET_F16FML"
6532   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6533   [(set_attr "type" "neon_fp_mul_s")]
6534 )
6535
6536 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6537   [(set (match_operand:V4SF 0 "register_operand" "=w")
6538         (fma:V4SF
6539          (float_extend:V4SF
6540           (vec_select:V4HF
6541             (match_operand:V8HF 2 "register_operand" "w")
6542             (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6543          (float_extend:V4SF
6544           (vec_duplicate:V4HF
6545            (vec_select:HF
6546             (match_operand:V8HF 3 "register_operand" "x")
6547             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6548          (match_operand:V4SF 1 "register_operand" "0")))]
6549   "TARGET_F16FML"
6550   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6551   [(set_attr "type" "neon_fp_mul_s")]
6552 )
6553
6554 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6555   [(set (match_operand:V4SF 0 "register_operand" "=w")
6556         (fma:V4SF
6557          (float_extend:V4SF
6558           (neg:V4HF
6559            (vec_select:V4HF
6560             (match_operand:V8HF 2 "register_operand" "w")
6561             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6562          (float_extend:V4SF
6563           (vec_duplicate:V4HF
6564            (vec_select:HF
6565             (match_operand:V8HF 3 "register_operand" "x")
6566             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6567          (match_operand:V4SF 1 "register_operand" "0")))]
6568   "TARGET_F16FML"
6569   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6570   [(set_attr "type" "neon_fp_mul_s")]
6571 )
6572
6573 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6574   [(set (match_operand:V2SF 0 "register_operand" "")
6575         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6576                       (match_operand:V4HF 2 "register_operand" "")
6577                       (match_operand:V8HF 3 "register_operand" "")
6578                       (match_operand:SI 4 "aarch64_lane_imm3" "")]
6579          VFMLA16_LOW))]
6580   "TARGET_F16FML"
6581 {
6582     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6583     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6584
6585     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6586                                                              operands[1],
6587                                                              operands[2],
6588                                                              operands[3],
6589                                                              p1, lane));
6590     DONE;
6591
6592 })
6593
6594 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6595   [(set (match_operand:V2SF 0 "register_operand" "")
6596         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6597                       (match_operand:V4HF 2 "register_operand" "")
6598                       (match_operand:V8HF 3 "register_operand" "")
6599                       (match_operand:SI 4 "aarch64_lane_imm3" "")]
6600          VFMLA16_HIGH))]
6601   "TARGET_F16FML"
6602 {
6603     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6604     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6605
6606     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6607                                                               operands[1],
6608                                                               operands[2],
6609                                                               operands[3],
6610                                                               p1, lane));
6611     DONE;
6612
6613 })
6614
6615 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6616   [(set (match_operand:V2SF 0 "register_operand" "=w")
6617         (fma:V2SF
6618          (float_extend:V2SF
6619            (vec_select:V2HF
6620             (match_operand:V4HF 2 "register_operand" "w")
6621             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6622          (float_extend:V2SF
6623           (vec_duplicate:V2HF
6624            (vec_select:HF
6625             (match_operand:V8HF 3 "register_operand" "x")
6626             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6627          (match_operand:V2SF 1 "register_operand" "0")))]
6628   "TARGET_F16FML"
6629   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6630   [(set_attr "type" "neon_fp_mul_s")]
6631 )
6632
6633 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6634   [(set (match_operand:V2SF 0 "register_operand" "=w")
6635         (fma:V2SF
6636          (float_extend:V2SF
6637           (neg:V2HF
6638            (vec_select:V2HF
6639             (match_operand:V4HF 2 "register_operand" "w")
6640             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6641          (float_extend:V2SF
6642           (vec_duplicate:V2HF
6643            (vec_select:HF
6644             (match_operand:V8HF 3 "register_operand" "x")
6645             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6646          (match_operand:V2SF 1 "register_operand" "0")))]
6647   "TARGET_F16FML"
6648   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6649   [(set_attr "type" "neon_fp_mul_s")]
6650 )
6651
6652 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6653   [(set (match_operand:V2SF 0 "register_operand" "=w")
6654         (fma:V2SF
6655          (float_extend:V2SF
6656            (vec_select:V2HF
6657             (match_operand:V4HF 2 "register_operand" "w")
6658             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6659          (float_extend:V2SF
6660           (vec_duplicate:V2HF
6661            (vec_select:HF
6662             (match_operand:V8HF 3 "register_operand" "x")
6663             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6664          (match_operand:V2SF 1 "register_operand" "0")))]
6665   "TARGET_F16FML"
6666   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6667   [(set_attr "type" "neon_fp_mul_s")]
6668 )
6669
6670 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6671   [(set (match_operand:V2SF 0 "register_operand" "=w")
6672         (fma:V2SF
6673          (float_extend:V2SF
6674           (neg:V2HF
6675            (vec_select:V2HF
6676             (match_operand:V4HF 2 "register_operand" "w")
6677             (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6678          (float_extend:V2SF
6679           (vec_duplicate:V2HF
6680            (vec_select:HF
6681             (match_operand:V8HF 3 "register_operand" "x")
6682             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6683          (match_operand:V2SF 1 "register_operand" "0")))]
6684   "TARGET_F16FML"
6685   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6686   [(set_attr "type" "neon_fp_mul_s")]
6687 )
6688
6689 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6690   [(set (match_operand:V4SF 0 "register_operand" "")
6691         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6692                       (match_operand:V8HF 2 "register_operand" "")
6693                       (match_operand:V4HF 3 "register_operand" "")
6694                       (match_operand:SI 4 "aarch64_imm2" "")]
6695          VFMLA16_LOW))]
6696   "TARGET_F16FML"
6697 {
6698     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6699     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6700
6701     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6702                                                              operands[1],
6703                                                              operands[2],
6704                                                              operands[3],
6705                                                              p1, lane));
6706     DONE;
6707 })
6708
6709 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6710   [(set (match_operand:V4SF 0 "register_operand" "")
6711         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6712                       (match_operand:V8HF 2 "register_operand" "")
6713                       (match_operand:V4HF 3 "register_operand" "")
6714                       (match_operand:SI 4 "aarch64_imm2" "")]
6715          VFMLA16_HIGH))]
6716   "TARGET_F16FML"
6717 {
6718     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6719     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6720
6721     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6722                                                               operands[1],
6723                                                               operands[2],
6724                                                               operands[3],
6725                                                               p1, lane));
6726     DONE;
6727 })
6728
6729 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6730   [(set (match_operand:V4SF 0 "register_operand" "=w")
6731         (fma:V4SF
6732          (float_extend:V4SF
6733           (vec_select:V4HF
6734            (match_operand:V8HF 2 "register_operand" "w")
6735            (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6736          (float_extend:V4SF
6737           (vec_duplicate:V4HF
6738            (vec_select:HF
6739             (match_operand:V4HF 3 "register_operand" "x")
6740             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6741          (match_operand:V4SF 1 "register_operand" "0")))]
6742   "TARGET_F16FML"
6743   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6744   [(set_attr "type" "neon_fp_mul_s")]
6745 )
6746
6747 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6748   [(set (match_operand:V4SF 0 "register_operand" "=w")
6749         (fma:V4SF
6750          (float_extend:V4SF
6751           (neg:V4HF
6752            (vec_select:V4HF
6753             (match_operand:V8HF 2 "register_operand" "w")
6754             (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6755          (float_extend:V4SF
6756           (vec_duplicate:V4HF
6757            (vec_select:HF
6758             (match_operand:V4HF 3 "register_operand" "x")
6759             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6760          (match_operand:V4SF 1 "register_operand" "0")))]
6761   "TARGET_F16FML"
6762   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6763   [(set_attr "type" "neon_fp_mul_s")]
6764 )
6765
6766 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6767   [(set (match_operand:V4SF 0 "register_operand" "=w")
6768         (fma:V4SF
6769          (float_extend:V4SF
6770           (vec_select:V4HF
6771            (match_operand:V8HF 2 "register_operand" "w")
6772            (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6773          (float_extend:V4SF
6774           (vec_duplicate:V4HF
6775            (vec_select:HF
6776             (match_operand:V4HF 3 "register_operand" "x")
6777             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6778          (match_operand:V4SF 1 "register_operand" "0")))]
6779   "TARGET_F16FML"
6780   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6781   [(set_attr "type" "neon_fp_mul_s")]
6782 )
6783
6784 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6785   [(set (match_operand:V4SF 0 "register_operand" "=w")
6786         (fma:V4SF
6787          (float_extend:V4SF
6788           (neg:V4HF
6789            (vec_select:V4HF
6790             (match_operand:V8HF 2 "register_operand" "w")
6791             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6792          (float_extend:V4SF
6793           (vec_duplicate:V4HF
6794            (vec_select:HF
6795             (match_operand:V4HF 3 "register_operand" "x")
6796             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6797          (match_operand:V4SF 1 "register_operand" "0")))]
6798   "TARGET_F16FML"
6799   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6800   [(set_attr "type" "neon_fp_mul_s")]
6801 )
6802
6803 ;; pmull
6804
6805 (define_insn "aarch64_crypto_pmulldi"
6806   [(set (match_operand:TI 0 "register_operand" "=w")
6807         (unspec:TI  [(match_operand:DI 1 "register_operand" "w")
6808                      (match_operand:DI 2 "register_operand" "w")]
6809                     UNSPEC_PMULL))]
6810  "TARGET_SIMD && TARGET_AES"
6811  "pmull\\t%0.1q, %1.1d, %2.1d"
6812   [(set_attr "type" "crypto_pmull")]
6813 )
6814
6815 (define_insn "aarch64_crypto_pmullv2di"
6816  [(set (match_operand:TI 0 "register_operand" "=w")
6817        (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
6818                    (match_operand:V2DI 2 "register_operand" "w")]
6819                   UNSPEC_PMULL2))]
6820   "TARGET_SIMD && TARGET_AES"
6821   "pmull2\\t%0.1q, %1.2d, %2.2d"
6822   [(set_attr "type" "crypto_pmull")]
6823 )