gcc/config/aarch64/aarch64-simd.md

   1 ;; Machine description for AArch64 AdvSIMD architecture.
   2 ;; Copyright (C) 2011-2018 Free Software Foundation, Inc.
   3 ;; Contributed by ARM Ltd.
   4 ;;
   5 ;; This file is part of GCC.
   6 ;;
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published by
   9 ;; the Free Software Foundation; either version 3, or (at your option)
  10 ;; any later version.
  11 ;;
  12 ;; GCC is distributed in the hope that it will be useful, but
  13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 ;; General Public License for more details.
  16 ;;
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 (define_expand "mov<mode>"
  22   [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
  23         (match_operand:VALL_F16 1 "general_operand" ""))]
  24   "TARGET_SIMD"
  25   "
  26   /* Force the operand into a register if it is not an
  27      immediate whose use can be replaced with xzr.
  28      If the mode is 16 bytes wide, then we will be doing
  29      a stp in DI mode, so we check the validity of that.
  30      If the mode is 8 bytes wide, then we will do doing a
  31      normal str, so the check need not apply.  */
  32   if (GET_CODE (operands[0]) == MEM
  33       && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
  34            && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
  35                 && aarch64_mem_pair_operand (operands[0], DImode))
  36                || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
  37       operands[1] = force_reg (<MODE>mode, operands[1]);
  38   "
  39 )
  40
  41 (define_expand "movmisalign<mode>"
  42   [(set (match_operand:VALL 0 "nonimmediate_operand" "")
  43         (match_operand:VALL 1 "general_operand" ""))]
  44   "TARGET_SIMD"
  45 {
  46   /* This pattern is not permitted to fail during expansion: if both arguments
  47      are non-registers (e.g. memory := constant, which can be created by the
  48      auto-vectorizer), force operand 1 into a register.  */
  49   if (!register_operand (operands[0], <MODE>mode)
  50       && !register_operand (operands[1], <MODE>mode))
  51     operands[1] = force_reg (<MODE>mode, operands[1]);
  52 })
  53
  54 (define_insn "aarch64_simd_dup<mode>"
  55   [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
  56         (vec_duplicate:VDQ_I
  57           (match_operand:<VEL> 1 "register_operand" "w,?r")))]
  58   "TARGET_SIMD"
  59   "@
  60    dup\\t%0.<Vtype>, %1.<Vetype>[0]
  61    dup\\t%0.<Vtype>, %<vw>1"
  62   [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
  63 )
  64
  65 (define_insn "aarch64_simd_dup<mode>"
  66   [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
  67         (vec_duplicate:VDQF_F16
  68           (match_operand:<VEL> 1 "register_operand" "w")))]
  69   "TARGET_SIMD"
  70   "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
  71   [(set_attr "type" "neon_dup<q>")]
  72 )
  73
  74 (define_insn "aarch64_dup_lane<mode>"
  75   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
  76         (vec_duplicate:VALL_F16
  77           (vec_select:<VEL>
  78             (match_operand:VALL_F16 1 "register_operand" "w")
  79             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  80           )))]
  81   "TARGET_SIMD"
  82   {
  83     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
  84     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
  85   }
  86   [(set_attr "type" "neon_dup<q>")]
  87 )
  88
  89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
  90   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
  91         (vec_duplicate:VALL_F16_NO_V2Q
  92           (vec_select:<VEL>
  93             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
  94             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  95           )))]
  96   "TARGET_SIMD"
  97   {
  98     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
  99     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
 100   }
 101   [(set_attr "type" "neon_dup<q>")]
 102 )
 103
 104 (define_insn "*aarch64_simd_mov<VD:mode>"
 105   [(set (match_operand:VD 0 "nonimmediate_operand"
 106                 "=w, m,  m,  w, ?r, ?w, ?r, w")
 107         (match_operand:VD 1 "general_operand"
 108                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 109   "TARGET_SIMD
 110    && (register_operand (operands[0], <MODE>mode)
 111        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 112 {
 113    switch (which_alternative)
 114      {
 115      case 0: return "ldr\t%d0, %1";
 116      case 1: return "str\txzr, %0";
 117      case 2: return "str\t%d1, %0";
 118      case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 119      case 4: return "umov\t%0, %1.d[0]";
 120      case 5: return "fmov\t%d0, %1";
 121      case 6: return "mov\t%0, %1";
 122      case 7:
 123         return aarch64_output_simd_mov_immediate (operands[1], 64);
 124      default: gcc_unreachable ();
 125      }
 126 }
 127   [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
 128                      neon_logic<q>, neon_to_gp<q>, f_mcr,\
 129                      mov_reg, neon_move<q>")]
 130 )
 131
 132 (define_insn "*aarch64_simd_mov<VQ:mode>"
 133   [(set (match_operand:VQ 0 "nonimmediate_operand"
 134                 "=w, Umn,  m,  w, ?r, ?w, ?r, w")
 135         (match_operand:VQ 1 "general_operand"
 136                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 137   "TARGET_SIMD
 138    && (register_operand (operands[0], <MODE>mode)
 139        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 140 {
 141   switch (which_alternative)
 142     {
 143     case 0:
 144         return "ldr\t%q0, %1";
 145     case 1:
 146         return "stp\txzr, xzr, %0";
 147     case 2:
 148         return "str\t%q1, %0";
 149     case 3:
 150         return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 151     case 4:
 152     case 5:
 153     case 6:
 154         return "#";
 155     case 7:
 156         return aarch64_output_simd_mov_immediate (operands[1], 128);
 157     default:
 158         gcc_unreachable ();
 159     }
 160 }
 161   [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
 162                      neon_logic<q>, multiple, multiple,\
 163                      multiple, neon_move<q>")
 164    (set_attr "length" "4,4,4,4,8,8,8,4")]
 165 )
 166
 167 ;; When storing lane zero we can use the normal STR and its more permissive
 168 ;; addressing modes.
 169
 170 (define_insn "aarch64_store_lane0<mode>"
 171   [(set (match_operand:<VEL> 0 "memory_operand" "=m")
 172         (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
 173                         (parallel [(match_operand 2 "const_int_operand" "n")])))]
 174   "TARGET_SIMD
 175    && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
 176   "str\\t%<Vetype>1, %0"
 177   [(set_attr "type" "neon_store1_1reg<q>")]
 178 )
 179
 180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
 181   [(set (match_operand:DREG 0 "register_operand" "=w")
 182         (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
 183    (set (match_operand:DREG2 2 "register_operand" "=w")
 184         (match_operand:DREG2 3 "memory_operand" "m"))]
 185   "TARGET_SIMD
 186    && rtx_equal_p (XEXP (operands[3], 0),
 187                    plus_constant (Pmode,
 188                                   XEXP (operands[1], 0),
 189                                   GET_MODE_SIZE (<DREG:MODE>mode)))"
 190   "ldp\\t%d0, %d2, %1"
 191   [(set_attr "type" "neon_ldp")]
 192 )
 193
 194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
 195   [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
 196         (match_operand:DREG 1 "register_operand" "w"))
 197    (set (match_operand:DREG2 2 "memory_operand" "=m")
 198         (match_operand:DREG2 3 "register_operand" "w"))]
 199   "TARGET_SIMD
 200    && rtx_equal_p (XEXP (operands[2], 0),
 201                    plus_constant (Pmode,
 202                                   XEXP (operands[0], 0),
 203                                   GET_MODE_SIZE (<DREG:MODE>mode)))"
 204   "stp\\t%d1, %d3, %0"
 205   [(set_attr "type" "neon_stp")]
 206 )
 207
 208 (define_insn "load_pair<VQ:mode><VQ2:mode>"
 209   [(set (match_operand:VQ 0 "register_operand" "=w")
 210         (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
 211    (set (match_operand:VQ2 2 "register_operand" "=w")
 212         (match_operand:VQ2 3 "memory_operand" "m"))]
 213   "TARGET_SIMD
 214     && rtx_equal_p (XEXP (operands[3], 0),
 215                     plus_constant (Pmode,
 216                                XEXP (operands[1], 0),
 217                                GET_MODE_SIZE (<VQ:MODE>mode)))"
 218   "ldp\\t%q0, %q2, %1"
 219   [(set_attr "type" "neon_ldp_q")]
 220 )
 221
 222 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
 223   [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
 224         (match_operand:VQ 1 "register_operand" "w"))
 225    (set (match_operand:VQ2 2 "memory_operand" "=m")
 226         (match_operand:VQ2 3 "register_operand" "w"))]
 227   "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
 228                 plus_constant (Pmode,
 229                                XEXP (operands[0], 0),
 230                                GET_MODE_SIZE (<VQ:MODE>mode)))"
 231   "stp\\t%q1, %q3, %0"
 232   [(set_attr "type" "neon_stp_q")]
 233 )
 234
 235
 236 (define_split
 237   [(set (match_operand:VQ 0 "register_operand" "")
 238       (match_operand:VQ 1 "register_operand" ""))]
 239   "TARGET_SIMD && reload_completed
 240    && GP_REGNUM_P (REGNO (operands[0]))
 241    && GP_REGNUM_P (REGNO (operands[1]))"
 242   [(const_int 0)]
 243 {
 244   aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
 245   DONE;
 246 })
 247
 248 (define_split
 249   [(set (match_operand:VQ 0 "register_operand" "")
 250         (match_operand:VQ 1 "register_operand" ""))]
 251   "TARGET_SIMD && reload_completed
 252    && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
 253        || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
 254   [(const_int 0)]
 255 {
 256   aarch64_split_simd_move (operands[0], operands[1]);
 257   DONE;
 258 })
 259
 260 (define_expand "aarch64_split_simd_mov<mode>"
 261   [(set (match_operand:VQ 0)
 262         (match_operand:VQ 1))]
 263   "TARGET_SIMD"
 264   {
 265     rtx dst = operands[0];
 266     rtx src = operands[1];
 267
 268     if (GP_REGNUM_P (REGNO (src)))
 269       {
 270         rtx src_low_part = gen_lowpart (<VHALF>mode, src);
 271         rtx src_high_part = gen_highpart (<VHALF>mode, src);
 272
 273         emit_insn
 274           (gen_move_lo_quad_<mode> (dst, src_low_part));
 275         emit_insn
 276           (gen_move_hi_quad_<mode> (dst, src_high_part));
 277       }
 278
 279     else
 280       {
 281         rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
 282         rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
 283         rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
 284         rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
 285
 286         emit_insn
 287           (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
 288         emit_insn
 289           (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
 290       }
 291     DONE;
 292   }
 293 )
 294
 295 (define_insn "aarch64_simd_mov_from_<mode>low"
 296   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
 297         (vec_select:<VHALF>
 298           (match_operand:VQ 1 "register_operand" "w")
 299           (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
 300   "TARGET_SIMD && reload_completed"
 301   "umov\t%0, %1.d[0]"
 302   [(set_attr "type" "neon_to_gp<q>")
 303    (set_attr "length" "4")
 304   ])
 305
 306 (define_insn "aarch64_simd_mov_from_<mode>high"
 307   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
 308         (vec_select:<VHALF>
 309           (match_operand:VQ 1 "register_operand" "w")
 310           (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
 311   "TARGET_SIMD && reload_completed"
 312   "umov\t%0, %1.d[1]"
 313   [(set_attr "type" "neon_to_gp<q>")
 314    (set_attr "length" "4")
 315   ])
 316
 317 (define_insn "orn<mode>3"
 318  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 319        (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 320                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 321  "TARGET_SIMD"
 322  "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 323   [(set_attr "type" "neon_logic<q>")]
 324 )
 325
 326 (define_insn "bic<mode>3"
 327  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 328        (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 329                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 330  "TARGET_SIMD"
 331  "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 332   [(set_attr "type" "neon_logic<q>")]
 333 )
 334
 335 (define_insn "add<mode>3"
 336   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 337         (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 338                   (match_operand:VDQ_I 2 "register_operand" "w")))]
 339   "TARGET_SIMD"
 340   "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 341   [(set_attr "type" "neon_add<q>")]
 342 )
 343
 344 (define_insn "sub<mode>3"
 345   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 346         (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 347                    (match_operand:VDQ_I 2 "register_operand" "w")))]
 348   "TARGET_SIMD"
 349   "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 350   [(set_attr "type" "neon_sub<q>")]
 351 )
 352
 353 (define_insn "mul<mode>3"
 354   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 355         (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
 356                    (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
 357   "TARGET_SIMD"
 358   "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 359   [(set_attr "type" "neon_mul_<Vetype><q>")]
 360 )
 361
 362 (define_insn "bswap<mode>2"
 363   [(set (match_operand:VDQHSD 0 "register_operand" "=w")
 364         (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
 365   "TARGET_SIMD"
 366   "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
 367   [(set_attr "type" "neon_rev<q>")]
 368 )
 369
 370 (define_insn "aarch64_rbit<mode>"
 371   [(set (match_operand:VB 0 "register_operand" "=w")
 372         (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
 373                    UNSPEC_RBIT))]
 374   "TARGET_SIMD"
 375   "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
 376   [(set_attr "type" "neon_rbit")]
 377 )
 378
 379 (define_expand "ctz<mode>2"
 380   [(set (match_operand:VS 0 "register_operand")
 381         (ctz:VS (match_operand:VS 1 "register_operand")))]
 382   "TARGET_SIMD"
 383   {
 384      emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
 385      rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
 386                                              <MODE>mode, 0);
 387      emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
 388      emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
 389      DONE;
 390   }
 391 )
 392
 393 (define_expand "xorsign<mode>3"
 394   [(match_operand:VHSDF 0 "register_operand")
 395    (match_operand:VHSDF 1 "register_operand")
 396    (match_operand:VHSDF 2 "register_operand")]
 397   "TARGET_SIMD"
 398 {
 399
 400   machine_mode imode = <V_INT_EQUIV>mode;
 401   rtx v_bitmask = gen_reg_rtx (imode);
 402   rtx op1x = gen_reg_rtx (imode);
 403   rtx op2x = gen_reg_rtx (imode);
 404
 405   rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
 406   rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
 407
 408   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 409
 410   emit_move_insn (v_bitmask,
 411                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 412                                                      HOST_WIDE_INT_M1U << bits));
 413
 414   emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
 415   emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
 416   emit_move_insn (operands[0],
 417                   lowpart_subreg (<MODE>mode, op1x, imode));
 418   DONE;
 419 }
 420 )
 421
 422 ;; These instructions map to the __builtins for the Dot Product operations.
 423 (define_insn "aarch64_<sur>dot<vsi2qi>"
 424   [(set (match_operand:VS 0 "register_operand" "=w")
 425         (plus:VS (match_operand:VS 1 "register_operand" "0")
 426                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 427                             (match_operand:<VSI2QI> 3 "register_operand" "w")]
 428                 DOTPROD)))]
 429   "TARGET_DOTPROD"
 430   "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
 431   [(set_attr "type" "neon_dot")]
 432 )
 433
 434 ;; These expands map to the Dot Product optab the vectorizer checks for.
 435 ;; The auto-vectorizer expects a dot product builtin that also does an
 436 ;; accumulation into the provided register.
 437 ;; Given the following pattern
 438 ;;
 439 ;; for (i=0; i<len; i++) {
 440 ;;     c = a[i] * b[i];
 441 ;;     r += c;
 442 ;; }
 443 ;; return result;
 444 ;;
 445 ;; This can be auto-vectorized to
 446 ;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
 447 ;;
 448 ;; given enough iterations.  However the vectorizer can keep unrolling the loop
 449 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
 450 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
 451 ;; ...
 452 ;;
 453 ;; and so the vectorizer provides r, in which the result has to be accumulated.
 454 (define_expand "<sur>dot_prod<vsi2qi>"
 455   [(set (match_operand:VS 0 "register_operand")
 456         (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
 457                             (match_operand:<VSI2QI> 2 "register_operand")]
 458                  DOTPROD)
 459                 (match_operand:VS 3 "register_operand")))]
 460   "TARGET_DOTPROD"
 461 {
 462   emit_insn (
 463     gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
 464                                     operands[2]));
 465   emit_insn (gen_rtx_SET (operands[0], operands[3]));
 466   DONE;
 467 })
 468
 469 ;; These instructions map to the __builtins for the Dot Product
 470 ;; indexed operations.
 471 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
 472   [(set (match_operand:VS 0 "register_operand" "=w")
 473         (plus:VS (match_operand:VS 1 "register_operand" "0")
 474                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 475                             (match_operand:V8QI 3 "register_operand" "<h_con>")
 476                             (match_operand:SI 4 "immediate_operand" "i")]
 477                 DOTPROD)))]
 478   "TARGET_DOTPROD"
 479   {
 480     operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
 481     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 482   }
 483   [(set_attr "type" "neon_dot")]
 484 )
 485
 486 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
 487   [(set (match_operand:VS 0 "register_operand" "=w")
 488         (plus:VS (match_operand:VS 1 "register_operand" "0")
 489                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 490                             (match_operand:V16QI 3 "register_operand" "<h_con>")
 491                             (match_operand:SI 4 "immediate_operand" "i")]
 492                 DOTPROD)))]
 493   "TARGET_DOTPROD"
 494   {
 495     operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
 496     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 497   }
 498   [(set_attr "type" "neon_dot")]
 499 )
 500
 501 (define_expand "copysign<mode>3"
 502   [(match_operand:VHSDF 0 "register_operand")
 503    (match_operand:VHSDF 1 "register_operand")
 504    (match_operand:VHSDF 2 "register_operand")]
 505   "TARGET_FLOAT && TARGET_SIMD"
 506 {
 507   rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
 508   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 509
 510   emit_move_insn (v_bitmask,
 511                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 512                                                      HOST_WIDE_INT_M1U << bits));
 513   emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
 514                                          operands[2], operands[1]));
 515   DONE;
 516 }
 517 )
 518
 519 (define_insn "*aarch64_mul3_elt<mode>"
 520  [(set (match_operand:VMUL 0 "register_operand" "=w")
 521     (mult:VMUL
 522       (vec_duplicate:VMUL
 523           (vec_select:<VEL>
 524             (match_operand:VMUL 1 "register_operand" "<h_con>")
 525             (parallel [(match_operand:SI 2 "immediate_operand")])))
 526       (match_operand:VMUL 3 "register_operand" "w")))]
 527   "TARGET_SIMD"
 528   {
 529     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
 530     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 531   }
 532   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 533 )
 534
 535 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
 536   [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
 537      (mult:VMUL_CHANGE_NLANES
 538        (vec_duplicate:VMUL_CHANGE_NLANES
 539           (vec_select:<VEL>
 540             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
 541             (parallel [(match_operand:SI 2 "immediate_operand")])))
 542       (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
 543   "TARGET_SIMD"
 544   {
 545     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
 546     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 547   }
 548   [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
 549 )
 550
 551 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
 552  [(set (match_operand:VMUL 0 "register_operand" "=w")
 553     (mult:VMUL
 554       (vec_duplicate:VMUL
 555             (match_operand:<VEL> 1 "register_operand" "<h_con>"))
 556       (match_operand:VMUL 2 "register_operand" "w")))]
 557   "TARGET_SIMD"
 558   "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
 559   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 560 )
 561
 562 (define_insn "aarch64_rsqrte<mode>"
 563   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 564         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
 565                      UNSPEC_RSQRTE))]
 566   "TARGET_SIMD"
 567   "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 568   [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
 569
 570 (define_insn "aarch64_rsqrts<mode>"
 571   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 572         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
 573                             (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
 574          UNSPEC_RSQRTS))]
 575   "TARGET_SIMD"
 576   "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 577   [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
 578
 579 (define_expand "rsqrt<mode>2"
 580   [(set (match_operand:VALLF 0 "register_operand" "=w")
 581         (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
 582                      UNSPEC_RSQRT))]
 583   "TARGET_SIMD"
 584 {
 585   aarch64_emit_approx_sqrt (operands[0], operands[1], true);
 586   DONE;
 587 })
 588
 589 (define_insn "*aarch64_mul3_elt_to_64v2df"
 590   [(set (match_operand:DF 0 "register_operand" "=w")
 591      (mult:DF
 592        (vec_select:DF
 593          (match_operand:V2DF 1 "register_operand" "w")
 594          (parallel [(match_operand:SI 2 "immediate_operand")]))
 595        (match_operand:DF 3 "register_operand" "w")))]
 596   "TARGET_SIMD"
 597   {
 598     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
 599     return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
 600   }
 601   [(set_attr "type" "neon_fp_mul_d_scalar_q")]
 602 )
 603
 604 (define_insn "neg<mode>2"
 605   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 606         (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 607   "TARGET_SIMD"
 608   "neg\t%0.<Vtype>, %1.<Vtype>"
 609   [(set_attr "type" "neon_neg<q>")]
 610 )
 611
 612 (define_insn "abs<mode>2"
 613   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 614         (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 615   "TARGET_SIMD"
 616   "abs\t%0.<Vtype>, %1.<Vtype>"
 617   [(set_attr "type" "neon_abs<q>")]
 618 )
 619
 620 ;; The intrinsic version of integer ABS must not be allowed to
 621 ;; combine with any operation with an integerated ABS step, such
 622 ;; as SABD.
 623 (define_insn "aarch64_abs<mode>"
 624   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
 625           (unspec:VSDQ_I_DI
 626             [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
 627            UNSPEC_ABS))]
 628   "TARGET_SIMD"
 629   "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 630   [(set_attr "type" "neon_abs<q>")]
 631 )
 632
 633 (define_insn "abd<mode>_3"
 634   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 635         (abs:VDQ_BHSI (minus:VDQ_BHSI
 636                        (match_operand:VDQ_BHSI 1 "register_operand" "w")
 637                        (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
 638   "TARGET_SIMD"
 639   "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 640   [(set_attr "type" "neon_abd<q>")]
 641 )
 642
 643 (define_insn "aarch64_<sur>abdl2<mode>_3"
 644   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 645         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 646                           (match_operand:VDQV_S 2 "register_operand" "w")]
 647         ABDL2))]
 648   "TARGET_SIMD"
 649   "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
 650   [(set_attr "type" "neon_abd<q>")]
 651 )
 652
 653 (define_insn "aarch64_<sur>abal<mode>_4"
 654   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 655         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 656                           (match_operand:VDQV_S 2 "register_operand" "w")
 657                          (match_operand:<VDBLW> 3 "register_operand" "0")]
 658         ABAL))]
 659   "TARGET_SIMD"
 660   "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
 661   [(set_attr "type" "neon_arith_acc<q>")]
 662 )
 663
 664 (define_insn "aarch64_<sur>adalp<mode>_3"
 665   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 666         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 667                           (match_operand:<VDBLW> 2 "register_operand" "0")]
 668         ADALP))]
 669   "TARGET_SIMD"
 670   "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
 671   [(set_attr "type" "neon_reduc_add<q>")]
 672 )
 673
 674 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
 675 ;; inputs in operands 1 and 2.  The sequence also has to perform a widening
 676 ;; reduction of the difference into a V4SI vector and accumulate that into
 677 ;; operand 3 before copying that into the result operand 0.
 678 ;; Perform that with a sequence of:
 679 ;; UABDL2       tmp.8h, op1.16b, op2.16b
 680 ;; UABAL        tmp.8h, op1.16b, op2.16b
 681 ;; UADALP       op3.4s, tmp.8h
 682 ;; MOV          op0, op3 // should be eliminated in later passes.
 683 ;; The signed version just uses the signed variants of the above instructions.
 684
 685 (define_expand "<sur>sadv16qi"
 686   [(use (match_operand:V4SI 0 "register_operand"))
 687    (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
 688                   (use (match_operand:V16QI 2 "register_operand"))] ABAL)
 689    (use (match_operand:V4SI 3 "register_operand"))]
 690   "TARGET_SIMD"
 691   {
 692     rtx reduc = gen_reg_rtx (V8HImode);
 693     emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
 694                                                operands[2]));
 695     emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
 696                                               operands[2], reduc));
 697     emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
 698                                               operands[3]));
 699     emit_move_insn (operands[0], operands[3]);
 700     DONE;
 701   }
 702 )
 703
 704 (define_insn "aba<mode>_3"
 705   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 706         (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
 707                          (match_operand:VDQ_BHSI 1 "register_operand" "w")
 708                          (match_operand:VDQ_BHSI 2 "register_operand" "w")))
 709                        (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
 710   "TARGET_SIMD"
 711   "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 712   [(set_attr "type" "neon_arith_acc<q>")]
 713 )
 714
 715 (define_insn "fabd<mode>3"
 716   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 717         (abs:VHSDF_HSDF
 718           (minus:VHSDF_HSDF
 719             (match_operand:VHSDF_HSDF 1 "register_operand" "w")
 720             (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
 721   "TARGET_SIMD"
 722   "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 723   [(set_attr "type" "neon_fp_abd_<stype><q>")]
 724 )
 725
 726 ;; For AND (vector, register) and BIC (vector, immediate)
 727 (define_insn "and<mode>3"
 728   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
 729         (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
 730                    (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
 731   "TARGET_SIMD"
 732   {
 733     switch (which_alternative)
 734       {
 735       case 0:
 736         return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
 737       case 1:
 738         return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
 739                                                   AARCH64_CHECK_BIC);
 740       default:
 741         gcc_unreachable ();
 742       }
 743   }
 744   [(set_attr "type" "neon_logic<q>")]
 745 )
 746
 747 ;; For ORR (vector, register) and ORR (vector, immediate)
 748 (define_insn "ior<mode>3"
 749   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
 750         (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
 751                    (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
 752   "TARGET_SIMD"
 753   {
 754     switch (which_alternative)
 755       {
 756       case 0:
 757         return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
 758       case 1:
 759         return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
 760                                                   AARCH64_CHECK_ORR);
 761       default:
 762         gcc_unreachable ();
 763       }
 764   }
 765   [(set_attr "type" "neon_logic<q>")]
 766 )
 767
 768 (define_insn "xor<mode>3"
 769   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 770         (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 771                  (match_operand:VDQ_I 2 "register_operand" "w")))]
 772   "TARGET_SIMD"
 773   "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
 774   [(set_attr "type" "neon_logic<q>")]
 775 )
 776
 777 (define_insn "one_cmpl<mode>2"
 778   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 779         (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 780   "TARGET_SIMD"
 781   "not\t%0.<Vbtype>, %1.<Vbtype>"
 782   [(set_attr "type" "neon_logic<q>")]
 783 )
 784
 785 (define_insn "aarch64_simd_vec_set<mode>"
 786   [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
 787         (vec_merge:VALL_F16
 788             (vec_duplicate:VALL_F16
 789                 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
 790             (match_operand:VALL_F16 3 "register_operand" "0,0,0")
 791             (match_operand:SI 2 "immediate_operand" "i,i,i")))]
 792   "TARGET_SIMD"
 793   {
 794    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 795    operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
 796    switch (which_alternative)
 797      {
 798      case 0:
 799         return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
 800      case 1:
 801         return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
 802      case 2:
 803         return "ld1\\t{%0.<Vetype>}[%p2], %1";
 804      default:
 805         gcc_unreachable ();
 806      }
 807   }
 808   [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
 809 )
 810
 811 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
 812   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
 813         (vec_merge:VALL_F16
 814             (vec_duplicate:VALL_F16
 815               (vec_select:<VEL>
 816                 (match_operand:VALL_F16 3 "register_operand" "w")
 817                 (parallel
 818                   [(match_operand:SI 4 "immediate_operand" "i")])))
 819             (match_operand:VALL_F16 1 "register_operand" "0")
 820             (match_operand:SI 2 "immediate_operand" "i")))]
 821   "TARGET_SIMD"
 822   {
 823     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 824     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
 825     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
 826
 827     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
 828   }
 829   [(set_attr "type" "neon_ins<q>")]
 830 )
 831
 832 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
 833   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
 834         (vec_merge:VALL_F16_NO_V2Q
 835             (vec_duplicate:VALL_F16_NO_V2Q
 836               (vec_select:<VEL>
 837                 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
 838                 (parallel
 839                   [(match_operand:SI 4 "immediate_operand" "i")])))
 840             (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
 841             (match_operand:SI 2 "immediate_operand" "i")))]
 842   "TARGET_SIMD"
 843   {
 844     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 845     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
 846     operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
 847                                            INTVAL (operands[4]));
 848
 849     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
 850   }
 851   [(set_attr "type" "neon_ins<q>")]
 852 )
 853
 854 (define_insn "aarch64_simd_lshr<mode>"
 855  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 856        (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 857                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
 858  "TARGET_SIMD"
 859  "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
 860   [(set_attr "type" "neon_shift_imm<q>")]
 861 )
 862
 863 (define_insn "aarch64_simd_ashr<mode>"
 864  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 865        (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 866                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
 867  "TARGET_SIMD"
 868  "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
 869   [(set_attr "type" "neon_shift_imm<q>")]
 870 )
 871
 872 (define_insn "aarch64_simd_imm_shl<mode>"
 873  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 874        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 875                    (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm" "Dl")))]
 876  "TARGET_SIMD"
 877   "shl\t%0.<Vtype>, %1.<Vtype>, %2"
 878   [(set_attr "type" "neon_shift_imm<q>")]
 879 )
 880
 881 (define_insn "aarch64_simd_reg_sshl<mode>"
 882  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 883        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 884                    (match_operand:VDQ_I 2 "register_operand" "w")))]
 885  "TARGET_SIMD"
 886  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 887   [(set_attr "type" "neon_shift_reg<q>")]
 888 )
 889
 890 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
 891  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 892        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
 893                     (match_operand:VDQ_I 2 "register_operand" "w")]
 894                    UNSPEC_ASHIFT_UNSIGNED))]
 895  "TARGET_SIMD"
 896  "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 897   [(set_attr "type" "neon_shift_reg<q>")]
 898 )
 899
 900 (define_insn "aarch64_simd_reg_shl<mode>_signed"
 901  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 902        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
 903                     (match_operand:VDQ_I 2 "register_operand" "w")]
 904                    UNSPEC_ASHIFT_SIGNED))]
 905  "TARGET_SIMD"
 906  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 907   [(set_attr "type" "neon_shift_reg<q>")]
 908 )
 909
 910 (define_expand "ashl<mode>3"
 911   [(match_operand:VDQ_I 0 "register_operand" "")
 912    (match_operand:VDQ_I 1 "register_operand" "")
 913    (match_operand:SI  2 "general_operand" "")]
 914  "TARGET_SIMD"
 915 {
 916   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
 917   int shift_amount;
 918
 919   if (CONST_INT_P (operands[2]))
 920     {
 921       shift_amount = INTVAL (operands[2]);
 922       if (shift_amount >= 0 && shift_amount < bit_width)
 923         {
 924           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 925                                                        shift_amount);
 926           emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
 927                                                      operands[1],
 928                                                      tmp));
 929           DONE;
 930         }
 931       else
 932         {
 933           operands[2] = force_reg (SImode, operands[2]);
 934         }
 935     }
 936   else if (MEM_P (operands[2]))
 937     {
 938       operands[2] = force_reg (SImode, operands[2]);
 939     }
 940
 941   if (REG_P (operands[2]))
 942     {
 943       rtx tmp = gen_reg_rtx (<MODE>mode);
 944       emit_insn (gen_aarch64_simd_dup<mode> (tmp,
 945                                              convert_to_mode (<VEL>mode,
 946                                                               operands[2],
 947                                                               0)));
 948       emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
 949                                                   tmp));
 950       DONE;
 951     }
 952   else
 953     FAIL;
 954 }
 955 )
 956
 957 (define_expand "lshr<mode>3"
 958   [(match_operand:VDQ_I 0 "register_operand" "")
 959    (match_operand:VDQ_I 1 "register_operand" "")
 960    (match_operand:SI  2 "general_operand" "")]
 961  "TARGET_SIMD"
 962 {
 963   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
 964   int shift_amount;
 965
 966   if (CONST_INT_P (operands[2]))
 967     {
 968       shift_amount = INTVAL (operands[2]);
 969       if (shift_amount > 0 && shift_amount <= bit_width)
 970         {
 971           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 972                                                        shift_amount);
 973           emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
 974                                                   operands[1],
 975                                                   tmp));
 976           DONE;
 977         }
 978       else
 979         operands[2] = force_reg (SImode, operands[2]);
 980     }
 981   else if (MEM_P (operands[2]))
 982     {
 983       operands[2] = force_reg (SImode, operands[2]);
 984     }
 985
 986   if (REG_P (operands[2]))
 987     {
 988       rtx tmp = gen_reg_rtx (SImode);
 989       rtx tmp1 = gen_reg_rtx (<MODE>mode);
 990       emit_insn (gen_negsi2 (tmp, operands[2]));
 991       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
 992                                              convert_to_mode (<VEL>mode,
 993                                                               tmp, 0)));
 994       emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
 995                                                           operands[1],
 996                                                           tmp1));
 997       DONE;
 998     }
 999   else
1000     FAIL;
1001 }
1002 )
1003
1004 (define_expand "ashr<mode>3"
1005   [(match_operand:VDQ_I 0 "register_operand" "")
1006    (match_operand:VDQ_I 1 "register_operand" "")
1007    (match_operand:SI  2 "general_operand" "")]
1008  "TARGET_SIMD"
1009 {
1010   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1011   int shift_amount;
1012
1013   if (CONST_INT_P (operands[2]))
1014     {
1015       shift_amount = INTVAL (operands[2]);
1016       if (shift_amount > 0 && shift_amount <= bit_width)
1017         {
1018           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1019                                                        shift_amount);
1020           emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1021                                                   operands[1],
1022                                                   tmp));
1023           DONE;
1024         }
1025       else
1026         operands[2] = force_reg (SImode, operands[2]);
1027     }
1028   else if (MEM_P (operands[2]))
1029     {
1030       operands[2] = force_reg (SImode, operands[2]);
1031     }
1032
1033   if (REG_P (operands[2]))
1034     {
1035       rtx tmp = gen_reg_rtx (SImode);
1036       rtx tmp1 = gen_reg_rtx (<MODE>mode);
1037       emit_insn (gen_negsi2 (tmp, operands[2]));
1038       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1039                                              convert_to_mode (<VEL>mode,
1040                                                               tmp, 0)));
1041       emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
1042                                                         operands[1],
1043                                                         tmp1));
1044       DONE;
1045     }
1046   else
1047     FAIL;
1048 }
1049 )
1050
1051 (define_expand "vashl<mode>3"
1052  [(match_operand:VDQ_I 0 "register_operand" "")
1053   (match_operand:VDQ_I 1 "register_operand" "")
1054   (match_operand:VDQ_I 2 "register_operand" "")]
1055  "TARGET_SIMD"
1056 {
1057   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1058                                               operands[2]));
1059   DONE;
1060 })
1061
1062 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1063 ;; Negating individual lanes most certainly offsets the
1064 ;; gain from vectorization.
1065 (define_expand "vashr<mode>3"
1066  [(match_operand:VDQ_BHSI 0 "register_operand" "")
1067   (match_operand:VDQ_BHSI 1 "register_operand" "")
1068   (match_operand:VDQ_BHSI 2 "register_operand" "")]
1069  "TARGET_SIMD"
1070 {
1071   rtx neg = gen_reg_rtx (<MODE>mode);
1072   emit (gen_neg<mode>2 (neg, operands[2]));
1073   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1074                                                     neg));
1075   DONE;
1076 })
1077
1078 ;; DI vector shift
1079 (define_expand "aarch64_ashr_simddi"
1080   [(match_operand:DI 0 "register_operand" "=w")
1081    (match_operand:DI 1 "register_operand" "w")
1082    (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1083   "TARGET_SIMD"
1084   {
1085     /* An arithmetic shift right by 64 fills the result with copies of the sign
1086        bit, just like asr by 63 - however the standard pattern does not handle
1087        a shift by 64.  */
1088     if (INTVAL (operands[2]) == 64)
1089       operands[2] = GEN_INT (63);
1090     emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1091     DONE;
1092   }
1093 )
1094
1095 (define_expand "vlshr<mode>3"
1096  [(match_operand:VDQ_BHSI 0 "register_operand" "")
1097   (match_operand:VDQ_BHSI 1 "register_operand" "")
1098   (match_operand:VDQ_BHSI 2 "register_operand" "")]
1099  "TARGET_SIMD"
1100 {
1101   rtx neg = gen_reg_rtx (<MODE>mode);
1102   emit (gen_neg<mode>2 (neg, operands[2]));
1103   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1104                                                       neg));
1105   DONE;
1106 })
1107
1108 (define_expand "aarch64_lshr_simddi"
1109   [(match_operand:DI 0 "register_operand" "=w")
1110    (match_operand:DI 1 "register_operand" "w")
1111    (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1112   "TARGET_SIMD"
1113   {
1114     if (INTVAL (operands[2]) == 64)
1115       emit_move_insn (operands[0], const0_rtx);
1116     else
1117       emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1118     DONE;
1119   }
1120 )
1121
1122 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1123 (define_insn "vec_shr_<mode>"
1124   [(set (match_operand:VD 0 "register_operand" "=w")
1125         (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1126                     (match_operand:SI 2 "immediate_operand" "i")]
1127                    UNSPEC_VEC_SHR))]
1128   "TARGET_SIMD"
1129   {
1130     if (BYTES_BIG_ENDIAN)
1131       return "shl %d0, %d1, %2";
1132     else
1133       return "ushr %d0, %d1, %2";
1134   }
1135   [(set_attr "type" "neon_shift_imm")]
1136 )
1137
1138 (define_expand "vec_set<mode>"
1139   [(match_operand:VALL_F16 0 "register_operand" "+w")
1140    (match_operand:<VEL> 1 "register_operand" "w")
1141    (match_operand:SI 2 "immediate_operand" "")]
1142   "TARGET_SIMD"
1143   {
1144     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1145     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1146                                           GEN_INT (elem), operands[0]));
1147     DONE;
1148   }
1149 )
1150
1151
1152 (define_insn "aarch64_mla<mode>"
1153  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1154        (plus:VDQ_BHSI (mult:VDQ_BHSI
1155                         (match_operand:VDQ_BHSI 2 "register_operand" "w")
1156                         (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1157                       (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1158  "TARGET_SIMD"
1159  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1160   [(set_attr "type" "neon_mla_<Vetype><q>")]
1161 )
1162
1163 (define_insn "*aarch64_mla_elt<mode>"
1164  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1165        (plus:VDQHS
1166          (mult:VDQHS
1167            (vec_duplicate:VDQHS
1168               (vec_select:<VEL>
1169                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1170                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1171            (match_operand:VDQHS 3 "register_operand" "w"))
1172          (match_operand:VDQHS 4 "register_operand" "0")))]
1173  "TARGET_SIMD"
1174   {
1175     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1176     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1177   }
1178   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1179 )
1180
1181 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1182  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1183        (plus:VDQHS
1184          (mult:VDQHS
1185            (vec_duplicate:VDQHS
1186               (vec_select:<VEL>
1187                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1188                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1189            (match_operand:VDQHS 3 "register_operand" "w"))
1190          (match_operand:VDQHS 4 "register_operand" "0")))]
1191  "TARGET_SIMD"
1192   {
1193     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1194     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1195   }
1196   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1197 )
1198
1199 (define_insn "*aarch64_mla_elt_merge<mode>"
1200   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1201         (plus:VDQHS
1202           (mult:VDQHS (vec_duplicate:VDQHS
1203                   (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1204                 (match_operand:VDQHS 2 "register_operand" "w"))
1205           (match_operand:VDQHS 3 "register_operand" "0")))]
1206  "TARGET_SIMD"
1207  "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1208   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1209 )
1210
1211 (define_insn "aarch64_mls<mode>"
1212  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1213        (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1214                    (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1215                               (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1216  "TARGET_SIMD"
1217  "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1218   [(set_attr "type" "neon_mla_<Vetype><q>")]
1219 )
1220
1221 (define_insn "*aarch64_mls_elt<mode>"
1222  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1223        (minus:VDQHS
1224          (match_operand:VDQHS 4 "register_operand" "0")
1225          (mult:VDQHS
1226            (vec_duplicate:VDQHS
1227               (vec_select:<VEL>
1228                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1229                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1230            (match_operand:VDQHS 3 "register_operand" "w"))))]
1231  "TARGET_SIMD"
1232   {
1233     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1234     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1235   }
1236   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1237 )
1238
1239 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1240  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1241        (minus:VDQHS
1242          (match_operand:VDQHS 4 "register_operand" "0")
1243          (mult:VDQHS
1244            (vec_duplicate:VDQHS
1245               (vec_select:<VEL>
1246                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1247                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1248            (match_operand:VDQHS 3 "register_operand" "w"))))]
1249  "TARGET_SIMD"
1250   {
1251     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1252     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1253   }
1254   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1255 )
1256
1257 (define_insn "*aarch64_mls_elt_merge<mode>"
1258   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1259         (minus:VDQHS
1260           (match_operand:VDQHS 1 "register_operand" "0")
1261           (mult:VDQHS (vec_duplicate:VDQHS
1262                   (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1263                 (match_operand:VDQHS 3 "register_operand" "w"))))]
1264   "TARGET_SIMD"
1265   "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1266   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1267 )
1268
1269 ;; Max/Min operations.
1270 (define_insn "<su><maxmin><mode>3"
1271  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1272        (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1273                     (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1274  "TARGET_SIMD"
1275  "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1276   [(set_attr "type" "neon_minmax<q>")]
1277 )
1278
1279 (define_expand "<su><maxmin>v2di3"
1280  [(set (match_operand:V2DI 0 "register_operand" "")
1281        (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1282                     (match_operand:V2DI 2 "register_operand" "")))]
1283  "TARGET_SIMD"
1284 {
1285   enum rtx_code cmp_operator;
1286   rtx cmp_fmt;
1287
1288   switch (<CODE>)
1289     {
1290     case UMIN:
1291       cmp_operator = LTU;
1292       break;
1293     case SMIN:
1294       cmp_operator = LT;
1295       break;
1296     case UMAX:
1297       cmp_operator = GTU;
1298       break;
1299     case SMAX:
1300       cmp_operator = GT;
1301       break;
1302     default:
1303       gcc_unreachable ();
1304     }
1305
1306   cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1307   emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1308               operands[2], cmp_fmt, operands[1], operands[2]));
1309   DONE;
1310 })
1311
1312 ;; Pairwise Integer Max/Min operations.
1313 (define_insn "aarch64_<maxmin_uns>p<mode>"
1314  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1315        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1316                          (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1317                         MAXMINV))]
1318  "TARGET_SIMD"
1319  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1320   [(set_attr "type" "neon_minmax<q>")]
1321 )
1322
1323 ;; Pairwise FP Max/Min operations.
1324 (define_insn "aarch64_<maxmin_uns>p<mode>"
1325  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1326        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1327                       (match_operand:VHSDF 2 "register_operand" "w")]
1328                       FMAXMINV))]
1329  "TARGET_SIMD"
1330  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1331   [(set_attr "type" "neon_minmax<q>")]
1332 )
1333
1334 ;; vec_concat gives a new vector with the low elements from operand 1, and
1335 ;; the high elements from operand 2.  That is to say, given op1 = { a, b }
1336 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1337 ;; What that means, is that the RTL descriptions of the below patterns
1338 ;; need to change depending on endianness.
1339
1340 ;; Move to the low architectural bits of the register.
1341 ;; On little-endian this is { operand, zeroes }
1342 ;; On big-endian this is { zeroes, operand }
1343
1344 (define_insn "move_lo_quad_internal_<mode>"
1345   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1346         (vec_concat:VQ_NO2E
1347           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1348           (vec_duplicate:<VHALF> (const_int 0))))]
1349   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1350   "@
1351    dup\\t%d0, %1.d[0]
1352    fmov\\t%d0, %1
1353    dup\\t%d0, %1"
1354   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1355    (set_attr "simd" "yes,*,yes")
1356    (set_attr "fp" "*,yes,*")
1357    (set_attr "length" "4")]
1358 )
1359
1360 (define_insn "move_lo_quad_internal_<mode>"
1361   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1362         (vec_concat:VQ_2E
1363           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1364           (const_int 0)))]
1365   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1366   "@
1367    dup\\t%d0, %1.d[0]
1368    fmov\\t%d0, %1
1369    dup\\t%d0, %1"
1370   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1371    (set_attr "simd" "yes,*,yes")
1372    (set_attr "fp" "*,yes,*")
1373    (set_attr "length" "4")]
1374 )
1375
1376 (define_insn "move_lo_quad_internal_be_<mode>"
1377   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1378         (vec_concat:VQ_NO2E
1379           (vec_duplicate:<VHALF> (const_int 0))
1380           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1381   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1382   "@
1383    dup\\t%d0, %1.d[0]
1384    fmov\\t%d0, %1
1385    dup\\t%d0, %1"
1386   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1387    (set_attr "simd" "yes,*,yes")
1388    (set_attr "fp" "*,yes,*")
1389    (set_attr "length" "4")]
1390 )
1391
1392 (define_insn "move_lo_quad_internal_be_<mode>"
1393   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1394         (vec_concat:VQ_2E
1395           (const_int 0)
1396           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1397   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1398   "@
1399    dup\\t%d0, %1.d[0]
1400    fmov\\t%d0, %1
1401    dup\\t%d0, %1"
1402   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1403    (set_attr "simd" "yes,*,yes")
1404    (set_attr "fp" "*,yes,*")
1405    (set_attr "length" "4")]
1406 )
1407
1408 (define_expand "move_lo_quad_<mode>"
1409   [(match_operand:VQ 0 "register_operand")
1410    (match_operand:VQ 1 "register_operand")]
1411   "TARGET_SIMD"
1412 {
1413   if (BYTES_BIG_ENDIAN)
1414     emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1415   else
1416     emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1417   DONE;
1418 }
1419 )
1420
1421 ;; Move operand1 to the high architectural bits of the register, keeping
1422 ;; the low architectural bits of operand2.
1423 ;; For little-endian this is { operand2, operand1 }
1424 ;; For big-endian this is { operand1, operand2 }
1425
1426 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1427   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1428         (vec_concat:VQ
1429           (vec_select:<VHALF>
1430                 (match_dup 0)
1431                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1432           (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1433   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1434   "@
1435    ins\\t%0.d[1], %1.d[0]
1436    ins\\t%0.d[1], %1"
1437   [(set_attr "type" "neon_ins")]
1438 )
1439
1440 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1441   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1442         (vec_concat:VQ
1443           (match_operand:<VHALF> 1 "register_operand" "w,r")
1444           (vec_select:<VHALF>
1445                 (match_dup 0)
1446                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1447   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1448   "@
1449    ins\\t%0.d[1], %1.d[0]
1450    ins\\t%0.d[1], %1"
1451   [(set_attr "type" "neon_ins")]
1452 )
1453
1454 (define_expand "move_hi_quad_<mode>"
1455  [(match_operand:VQ 0 "register_operand" "")
1456   (match_operand:<VHALF> 1 "register_operand" "")]
1457  "TARGET_SIMD"
1458 {
1459   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1460   if (BYTES_BIG_ENDIAN)
1461     emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1462                     operands[1], p));
1463   else
1464     emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1465                     operands[1], p));
1466   DONE;
1467 })
1468
1469 ;; Narrowing operations.
1470
1471 ;; For doubles.
1472 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1473  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1474        (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1475  "TARGET_SIMD"
1476  "xtn\\t%0.<Vntype>, %1.<Vtype>"
1477   [(set_attr "type" "neon_shift_imm_narrow_q")]
1478 )
1479
1480 (define_expand "vec_pack_trunc_<mode>"
1481  [(match_operand:<VNARROWD> 0 "register_operand" "")
1482   (match_operand:VDN 1 "register_operand" "")
1483   (match_operand:VDN 2 "register_operand" "")]
1484  "TARGET_SIMD"
1485 {
1486   rtx tempreg = gen_reg_rtx (<VDBL>mode);
1487   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1488   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1489
1490   emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1491   emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1492   emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1493   DONE;
1494 })
1495
1496 ;; For quads.
1497
1498 (define_insn "vec_pack_trunc_<mode>"
1499  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1500        (vec_concat:<VNARROWQ2>
1501          (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1502          (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1503  "TARGET_SIMD"
1504  {
1505    if (BYTES_BIG_ENDIAN)
1506      return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1507    else
1508      return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1509  }
1510   [(set_attr "type" "multiple")
1511    (set_attr "length" "8")]
1512 )
1513
1514 ;; Widening operations.
1515
1516 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1517   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1518         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1519                                (match_operand:VQW 1 "register_operand" "w")
1520                                (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1521                             )))]
1522   "TARGET_SIMD"
1523   "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1524   [(set_attr "type" "neon_shift_imm_long")]
1525 )
1526
1527 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1528   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1529         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1530                                (match_operand:VQW 1 "register_operand" "w")
1531                                (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1532                             )))]
1533   "TARGET_SIMD"
1534   "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1535   [(set_attr "type" "neon_shift_imm_long")]
1536 )
1537
1538 (define_expand "vec_unpack<su>_hi_<mode>"
1539   [(match_operand:<VWIDE> 0 "register_operand" "")
1540    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1541   "TARGET_SIMD"
1542   {
1543     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1544     emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1545                                                           operands[1], p));
1546     DONE;
1547   }
1548 )
1549
1550 (define_expand "vec_unpack<su>_lo_<mode>"
1551   [(match_operand:<VWIDE> 0 "register_operand" "")
1552    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1553   "TARGET_SIMD"
1554   {
1555     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1556     emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1557                                                           operands[1], p));
1558     DONE;
1559   }
1560 )
1561
1562 ;; Widening arithmetic.
1563
1564 (define_insn "*aarch64_<su>mlal_lo<mode>"
1565   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1566         (plus:<VWIDE>
1567           (mult:<VWIDE>
1568               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1569                  (match_operand:VQW 2 "register_operand" "w")
1570                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1571               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1572                  (match_operand:VQW 4 "register_operand" "w")
1573                  (match_dup 3))))
1574           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1575   "TARGET_SIMD"
1576   "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1577   [(set_attr "type" "neon_mla_<Vetype>_long")]
1578 )
1579
1580 (define_insn "*aarch64_<su>mlal_hi<mode>"
1581   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1582         (plus:<VWIDE>
1583           (mult:<VWIDE>
1584               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1585                  (match_operand:VQW 2 "register_operand" "w")
1586                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1587               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1588                  (match_operand:VQW 4 "register_operand" "w")
1589                  (match_dup 3))))
1590           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1591   "TARGET_SIMD"
1592   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1593   [(set_attr "type" "neon_mla_<Vetype>_long")]
1594 )
1595
1596 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1597   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1598         (minus:<VWIDE>
1599           (match_operand:<VWIDE> 1 "register_operand" "0")
1600           (mult:<VWIDE>
1601               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1602                  (match_operand:VQW 2 "register_operand" "w")
1603                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1604               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1605                  (match_operand:VQW 4 "register_operand" "w")
1606                  (match_dup 3))))))]
1607   "TARGET_SIMD"
1608   "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1609   [(set_attr "type" "neon_mla_<Vetype>_long")]
1610 )
1611
1612 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1613   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1614         (minus:<VWIDE>
1615           (match_operand:<VWIDE> 1 "register_operand" "0")
1616           (mult:<VWIDE>
1617               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1618                  (match_operand:VQW 2 "register_operand" "w")
1619                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1620               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1621                  (match_operand:VQW 4 "register_operand" "w")
1622                  (match_dup 3))))))]
1623   "TARGET_SIMD"
1624   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1625   [(set_attr "type" "neon_mla_<Vetype>_long")]
1626 )
1627
1628 (define_insn "*aarch64_<su>mlal<mode>"
1629   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1630         (plus:<VWIDE>
1631           (mult:<VWIDE>
1632             (ANY_EXTEND:<VWIDE>
1633               (match_operand:VD_BHSI 1 "register_operand" "w"))
1634             (ANY_EXTEND:<VWIDE>
1635               (match_operand:VD_BHSI 2 "register_operand" "w")))
1636           (match_operand:<VWIDE> 3 "register_operand" "0")))]
1637   "TARGET_SIMD"
1638   "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1639   [(set_attr "type" "neon_mla_<Vetype>_long")]
1640 )
1641
1642 (define_insn "*aarch64_<su>mlsl<mode>"
1643   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1644         (minus:<VWIDE>
1645           (match_operand:<VWIDE> 1 "register_operand" "0")
1646           (mult:<VWIDE>
1647             (ANY_EXTEND:<VWIDE>
1648               (match_operand:VD_BHSI 2 "register_operand" "w"))
1649             (ANY_EXTEND:<VWIDE>
1650               (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1651   "TARGET_SIMD"
1652   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1653   [(set_attr "type" "neon_mla_<Vetype>_long")]
1654 )
1655
1656 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1657  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1658        (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1659                            (match_operand:VQW 1 "register_operand" "w")
1660                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1661                      (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1662                            (match_operand:VQW 2 "register_operand" "w")
1663                            (match_dup 3)))))]
1664   "TARGET_SIMD"
1665   "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1666   [(set_attr "type" "neon_mul_<Vetype>_long")]
1667 )
1668
1669 (define_expand "vec_widen_<su>mult_lo_<mode>"
1670   [(match_operand:<VWIDE> 0 "register_operand" "")
1671    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1672    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1673  "TARGET_SIMD"
1674  {
1675    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1676    emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1677                                                        operands[1],
1678                                                        operands[2], p));
1679    DONE;
1680  }
1681 )
1682
1683 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1684  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1685       (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1686                             (match_operand:VQW 1 "register_operand" "w")
1687                             (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1688                     (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1689                             (match_operand:VQW 2 "register_operand" "w")
1690                             (match_dup 3)))))]
1691   "TARGET_SIMD"
1692   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1693   [(set_attr "type" "neon_mul_<Vetype>_long")]
1694 )
1695
1696 (define_expand "vec_widen_<su>mult_hi_<mode>"
1697   [(match_operand:<VWIDE> 0 "register_operand" "")
1698    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1699    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1700  "TARGET_SIMD"
1701  {
1702    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1703    emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1704                                                        operands[1],
1705                                                        operands[2], p));
1706    DONE;
1707
1708  }
1709 )
1710
1711 ;; FP vector operations.
1712 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1713 ;; double-precision (64-bit) floating-point data types and arithmetic as
1714 ;; defined by the IEEE 754-2008 standard.  This makes them vectorizable
1715 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1716 ;;
1717 ;; Floating-point operations can raise an exception.  Vectorizing such
1718 ;; operations are safe because of reasons explained below.
1719 ;;
1720 ;; ARMv8 permits an extension to enable trapped floating-point
1721 ;; exception handling, however this is an optional feature.  In the
1722 ;; event of a floating-point exception being raised by vectorised
1723 ;; code then:
1724 ;; 1.  If trapped floating-point exceptions are available, then a trap
1725 ;;     will be taken when any lane raises an enabled exception.  A trap
1726 ;;     handler may determine which lane raised the exception.
1727 ;; 2.  Alternatively a sticky exception flag is set in the
1728 ;;     floating-point status register (FPSR).  Software may explicitly
1729 ;;     test the exception flags, in which case the tests will either
1730 ;;     prevent vectorisation, allowing precise identification of the
1731 ;;     failing operation, or if tested outside of vectorisable regions
1732 ;;     then the specific operation and lane are not of interest.
1733
1734 ;; FP arithmetic operations.
1735
1736 (define_insn "add<mode>3"
1737  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1738        (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1739                    (match_operand:VHSDF 2 "register_operand" "w")))]
1740  "TARGET_SIMD"
1741  "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1742   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1743 )
1744
1745 (define_insn "sub<mode>3"
1746  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1747        (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1748                     (match_operand:VHSDF 2 "register_operand" "w")))]
1749  "TARGET_SIMD"
1750  "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1751   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1752 )
1753
1754 (define_insn "mul<mode>3"
1755  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1756        (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1757                    (match_operand:VHSDF 2 "register_operand" "w")))]
1758  "TARGET_SIMD"
1759  "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1760   [(set_attr "type" "neon_fp_mul_<stype><q>")]
1761 )
1762
1763 (define_expand "div<mode>3"
1764  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1765        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1766                   (match_operand:VHSDF 2 "register_operand" "w")))]
1767  "TARGET_SIMD"
1768 {
1769   if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1770     DONE;
1771
1772   operands[1] = force_reg (<MODE>mode, operands[1]);
1773 })
1774
1775 (define_insn "*div<mode>3"
1776  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1777        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1778                  (match_operand:VHSDF 2 "register_operand" "w")))]
1779  "TARGET_SIMD"
1780  "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1781   [(set_attr "type" "neon_fp_div_<stype><q>")]
1782 )
1783
1784 (define_insn "neg<mode>2"
1785  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1786        (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1787  "TARGET_SIMD"
1788  "fneg\\t%0.<Vtype>, %1.<Vtype>"
1789   [(set_attr "type" "neon_fp_neg_<stype><q>")]
1790 )
1791
1792 (define_insn "abs<mode>2"
1793  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1794        (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1795  "TARGET_SIMD"
1796  "fabs\\t%0.<Vtype>, %1.<Vtype>"
1797   [(set_attr "type" "neon_fp_abs_<stype><q>")]
1798 )
1799
1800 (define_insn "fma<mode>4"
1801   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1802        (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1803                   (match_operand:VHSDF 2 "register_operand" "w")
1804                   (match_operand:VHSDF 3 "register_operand" "0")))]
1805   "TARGET_SIMD"
1806  "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1807   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1808 )
1809
1810 (define_insn "*aarch64_fma4_elt<mode>"
1811   [(set (match_operand:VDQF 0 "register_operand" "=w")
1812     (fma:VDQF
1813       (vec_duplicate:VDQF
1814         (vec_select:<VEL>
1815           (match_operand:VDQF 1 "register_operand" "<h_con>")
1816           (parallel [(match_operand:SI 2 "immediate_operand")])))
1817       (match_operand:VDQF 3 "register_operand" "w")
1818       (match_operand:VDQF 4 "register_operand" "0")))]
1819   "TARGET_SIMD"
1820   {
1821     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1822     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1823   }
1824   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1825 )
1826
1827 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1828   [(set (match_operand:VDQSF 0 "register_operand" "=w")
1829     (fma:VDQSF
1830       (vec_duplicate:VDQSF
1831         (vec_select:<VEL>
1832           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1833           (parallel [(match_operand:SI 2 "immediate_operand")])))
1834       (match_operand:VDQSF 3 "register_operand" "w")
1835       (match_operand:VDQSF 4 "register_operand" "0")))]
1836   "TARGET_SIMD"
1837   {
1838     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1839     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1840   }
1841   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1842 )
1843
1844 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1845   [(set (match_operand:VMUL 0 "register_operand" "=w")
1846     (fma:VMUL
1847       (vec_duplicate:VMUL
1848           (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1849       (match_operand:VMUL 2 "register_operand" "w")
1850       (match_operand:VMUL 3 "register_operand" "0")))]
1851   "TARGET_SIMD"
1852   "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1853   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1854 )
1855
1856 (define_insn "*aarch64_fma4_elt_to_64v2df"
1857   [(set (match_operand:DF 0 "register_operand" "=w")
1858     (fma:DF
1859         (vec_select:DF
1860           (match_operand:V2DF 1 "register_operand" "w")
1861           (parallel [(match_operand:SI 2 "immediate_operand")]))
1862       (match_operand:DF 3 "register_operand" "w")
1863       (match_operand:DF 4 "register_operand" "0")))]
1864   "TARGET_SIMD"
1865   {
1866     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1867     return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1868   }
1869   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1870 )
1871
1872 (define_insn "fnma<mode>4"
1873   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1874         (fma:VHSDF
1875           (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1876           (match_operand:VHSDF 2 "register_operand" "w")
1877           (match_operand:VHSDF 3 "register_operand" "0")))]
1878   "TARGET_SIMD"
1879   "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1880   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1881 )
1882
1883 (define_insn "*aarch64_fnma4_elt<mode>"
1884   [(set (match_operand:VDQF 0 "register_operand" "=w")
1885     (fma:VDQF
1886       (neg:VDQF
1887         (match_operand:VDQF 3 "register_operand" "w"))
1888       (vec_duplicate:VDQF
1889         (vec_select:<VEL>
1890           (match_operand:VDQF 1 "register_operand" "<h_con>")
1891           (parallel [(match_operand:SI 2 "immediate_operand")])))
1892       (match_operand:VDQF 4 "register_operand" "0")))]
1893   "TARGET_SIMD"
1894   {
1895     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1896     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1897   }
1898   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1899 )
1900
1901 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1902   [(set (match_operand:VDQSF 0 "register_operand" "=w")
1903     (fma:VDQSF
1904       (neg:VDQSF
1905         (match_operand:VDQSF 3 "register_operand" "w"))
1906       (vec_duplicate:VDQSF
1907         (vec_select:<VEL>
1908           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1909           (parallel [(match_operand:SI 2 "immediate_operand")])))
1910       (match_operand:VDQSF 4 "register_operand" "0")))]
1911   "TARGET_SIMD"
1912   {
1913     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1914     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1915   }
1916   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1917 )
1918
1919 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1920   [(set (match_operand:VMUL 0 "register_operand" "=w")
1921     (fma:VMUL
1922       (neg:VMUL
1923         (match_operand:VMUL 2 "register_operand" "w"))
1924       (vec_duplicate:VMUL
1925         (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1926       (match_operand:VMUL 3 "register_operand" "0")))]
1927   "TARGET_SIMD"
1928   "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1929   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1930 )
1931
1932 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1933   [(set (match_operand:DF 0 "register_operand" "=w")
1934     (fma:DF
1935       (vec_select:DF
1936         (match_operand:V2DF 1 "register_operand" "w")
1937         (parallel [(match_operand:SI 2 "immediate_operand")]))
1938       (neg:DF
1939         (match_operand:DF 3 "register_operand" "w"))
1940       (match_operand:DF 4 "register_operand" "0")))]
1941   "TARGET_SIMD"
1942   {
1943     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1944     return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1945   }
1946   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1947 )
1948
1949 ;; Vector versions of the floating-point frint patterns.
1950 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1951 (define_insn "<frint_pattern><mode>2"
1952   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1953         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1954                        FRINT))]
1955   "TARGET_SIMD"
1956   "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1957   [(set_attr "type" "neon_fp_round_<stype><q>")]
1958 )
1959
1960 ;; Vector versions of the fcvt standard patterns.
1961 ;; Expands to lbtrunc, lround, lceil, lfloor
1962 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1963   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1964         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1965                                [(match_operand:VHSDF 1 "register_operand" "w")]
1966                                FCVT)))]
1967   "TARGET_SIMD"
1968   "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1969   [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1970 )
1971
1972 ;; HF Scalar variants of related SIMD instructions.
1973 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1974   [(set (match_operand:HI 0 "register_operand" "=w")
1975         (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1976                       FCVT)))]
1977   "TARGET_SIMD_F16INST"
1978   "fcvt<frint_suffix><su>\t%h0, %h1"
1979   [(set_attr "type" "neon_fp_to_int_s")]
1980 )
1981
1982 (define_insn "<optab>_trunchfhi2"
1983   [(set (match_operand:HI 0 "register_operand" "=w")
1984         (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1985   "TARGET_SIMD_F16INST"
1986   "fcvtz<su>\t%h0, %h1"
1987   [(set_attr "type" "neon_fp_to_int_s")]
1988 )
1989
1990 (define_insn "<optab>hihf2"
1991   [(set (match_operand:HF 0 "register_operand" "=w")
1992         (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1993   "TARGET_SIMD_F16INST"
1994   "<su_optab>cvtf\t%h0, %h1"
1995   [(set_attr "type" "neon_int_to_fp_s")]
1996 )
1997
1998 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1999   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2000         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2001                                [(mult:VDQF
2002          (match_operand:VDQF 1 "register_operand" "w")
2003          (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
2004                                UNSPEC_FRINTZ)))]
2005   "TARGET_SIMD
2006    && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
2007                 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
2008   {
2009     int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
2010     char buf[64];
2011     snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
2012     output_asm_insn (buf, operands);
2013     return "";
2014   }
2015   [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
2016 )
2017
2018 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
2019   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2020         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2021                                [(match_operand:VHSDF 1 "register_operand")]
2022                                 UNSPEC_FRINTZ)))]
2023   "TARGET_SIMD"
2024   {})
2025
2026 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2027   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2028         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2029                                [(match_operand:VHSDF 1 "register_operand")]
2030                                 UNSPEC_FRINTZ)))]
2031   "TARGET_SIMD"
2032   {})
2033
2034 (define_expand "ftrunc<VHSDF:mode>2"
2035   [(set (match_operand:VHSDF 0 "register_operand")
2036         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2037                        UNSPEC_FRINTZ))]
2038   "TARGET_SIMD"
2039   {})
2040
2041 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2042   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2043         (FLOATUORS:VHSDF
2044           (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2045   "TARGET_SIMD"
2046   "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2047   [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2048 )
2049
2050 ;; Conversions between vectors of floats and doubles.
2051 ;; Contains a mix of patterns to match standard pattern names
2052 ;; and those for intrinsics.
2053
2054 ;; Float widening operations.
2055
2056 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2057   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2058         (float_extend:<VWIDE> (vec_select:<VHALF>
2059                                (match_operand:VQ_HSF 1 "register_operand" "w")
2060                                (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2061                             )))]
2062   "TARGET_SIMD"
2063   "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2064   [(set_attr "type" "neon_fp_cvt_widen_s")]
2065 )
2066
2067 ;; Convert between fixed-point and floating-point (vector modes)
2068
2069 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2070   [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2071         (unspec:<VHSDF:FCVT_TARGET>
2072           [(match_operand:VHSDF 1 "register_operand" "w")
2073            (match_operand:SI 2 "immediate_operand" "i")]
2074          FCVT_F2FIXED))]
2075   "TARGET_SIMD"
2076   "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2077   [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2078 )
2079
2080 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2081   [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2082         (unspec:<VDQ_HSDI:FCVT_TARGET>
2083           [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2084            (match_operand:SI 2 "immediate_operand" "i")]
2085          FCVT_FIXED2F))]
2086   "TARGET_SIMD"
2087   "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2088   [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2089 )
2090
2091 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2092 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2093 ;; the meaning of HI and LO changes depending on the target endianness.
2094 ;; While elsewhere we map the higher numbered elements of a vector to
2095 ;; the lower architectural lanes of the vector, for these patterns we want
2096 ;; to always treat "hi" as referring to the higher architectural lanes.
2097 ;; Consequently, while the patterns below look inconsistent with our
2098 ;; other big-endian patterns their behavior is as required.
2099
2100 (define_expand "vec_unpacks_lo_<mode>"
2101   [(match_operand:<VWIDE> 0 "register_operand" "")
2102    (match_operand:VQ_HSF 1 "register_operand" "")]
2103   "TARGET_SIMD"
2104   {
2105     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2106     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2107                                                        operands[1], p));
2108     DONE;
2109   }
2110 )
2111
2112 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2113   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2114         (float_extend:<VWIDE> (vec_select:<VHALF>
2115                                (match_operand:VQ_HSF 1 "register_operand" "w")
2116                                (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2117                             )))]
2118   "TARGET_SIMD"
2119   "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2120   [(set_attr "type" "neon_fp_cvt_widen_s")]
2121 )
2122
2123 (define_expand "vec_unpacks_hi_<mode>"
2124   [(match_operand:<VWIDE> 0 "register_operand" "")
2125    (match_operand:VQ_HSF 1 "register_operand" "")]
2126   "TARGET_SIMD"
2127   {
2128     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2129     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2130                                                        operands[1], p));
2131     DONE;
2132   }
2133 )
2134 (define_insn "aarch64_float_extend_lo_<Vwide>"
2135   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2136         (float_extend:<VWIDE>
2137           (match_operand:VDF 1 "register_operand" "w")))]
2138   "TARGET_SIMD"
2139   "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2140   [(set_attr "type" "neon_fp_cvt_widen_s")]
2141 )
2142
2143 ;; Float narrowing operations.
2144
2145 (define_insn "aarch64_float_truncate_lo_<mode>"
2146   [(set (match_operand:VDF 0 "register_operand" "=w")
2147       (float_truncate:VDF
2148         (match_operand:<VWIDE> 1 "register_operand" "w")))]
2149   "TARGET_SIMD"
2150   "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2151   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2152 )
2153
2154 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2155   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2156     (vec_concat:<VDBL>
2157       (match_operand:VDF 1 "register_operand" "0")
2158       (float_truncate:VDF
2159         (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2160   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2161   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2162   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2163 )
2164
2165 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2166   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2167     (vec_concat:<VDBL>
2168       (float_truncate:VDF
2169         (match_operand:<VWIDE> 2 "register_operand" "w"))
2170       (match_operand:VDF 1 "register_operand" "0")))]
2171   "TARGET_SIMD && BYTES_BIG_ENDIAN"
2172   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2173   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2174 )
2175
2176 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2177   [(match_operand:<VDBL> 0 "register_operand" "=w")
2178    (match_operand:VDF 1 "register_operand" "0")
2179    (match_operand:<VWIDE> 2 "register_operand" "w")]
2180   "TARGET_SIMD"
2181 {
2182   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2183                              ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2184                              : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2185   emit_insn (gen (operands[0], operands[1], operands[2]));
2186   DONE;
2187 }
2188 )
2189
2190 (define_expand "vec_pack_trunc_v2df"
2191   [(set (match_operand:V4SF 0 "register_operand")
2192       (vec_concat:V4SF
2193         (float_truncate:V2SF
2194             (match_operand:V2DF 1 "register_operand"))
2195         (float_truncate:V2SF
2196             (match_operand:V2DF 2 "register_operand"))
2197           ))]
2198   "TARGET_SIMD"
2199   {
2200     rtx tmp = gen_reg_rtx (V2SFmode);
2201     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2202     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2203
2204     emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2205     emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2206                                                    tmp, operands[hi]));
2207     DONE;
2208   }
2209 )
2210
2211 (define_expand "vec_pack_trunc_df"
2212   [(set (match_operand:V2SF 0 "register_operand")
2213       (vec_concat:V2SF
2214         (float_truncate:SF
2215             (match_operand:DF 1 "register_operand"))
2216         (float_truncate:SF
2217             (match_operand:DF 2 "register_operand"))
2218           ))]
2219   "TARGET_SIMD"
2220   {
2221     rtx tmp = gen_reg_rtx (V2SFmode);
2222     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2223     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2224
2225     emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2226     emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2227     emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2228     DONE;
2229   }
2230 )
2231
2232 ;; FP Max/Min
2233 ;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
2234 ;; expression like:
2235 ;;      a = (b < c) ? b : c;
2236 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
2237 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
2238 ;; -ffast-math.
2239 ;;
2240 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2241 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2242 ;; operand will be returned when both operands are zero (i.e. they may not
2243 ;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
2244 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2245 ;; NaNs.
2246
2247 (define_insn "<su><maxmin><mode>3"
2248   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2249         (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2250                        (match_operand:VHSDF 2 "register_operand" "w")))]
2251   "TARGET_SIMD"
2252   "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2253   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2254 )
2255
2256 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2257 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2258 ;; which implement the IEEE fmax ()/fmin () functions.
2259 (define_insn "<maxmin_uns><mode>3"
2260   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2261        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2262                       (match_operand:VHSDF 2 "register_operand" "w")]
2263                       FMAXMIN_UNS))]
2264   "TARGET_SIMD"
2265   "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2266   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2267 )
2268
2269 ;; 'across lanes' add.
2270
2271 (define_expand "reduc_plus_scal_<mode>"
2272   [(match_operand:<VEL> 0 "register_operand" "=w")
2273    (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2274                UNSPEC_ADDV)]
2275   "TARGET_SIMD"
2276   {
2277     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2278     rtx scratch = gen_reg_rtx (<MODE>mode);
2279     emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2280     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2281     DONE;
2282   }
2283 )
2284
2285 (define_insn "aarch64_faddp<mode>"
2286  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2287        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2288                       (match_operand:VHSDF 2 "register_operand" "w")]
2289         UNSPEC_FADDV))]
2290  "TARGET_SIMD"
2291  "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2292   [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2293 )
2294
2295 (define_insn "aarch64_reduc_plus_internal<mode>"
2296  [(set (match_operand:VDQV 0 "register_operand" "=w")
2297        (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2298                     UNSPEC_ADDV))]
2299  "TARGET_SIMD"
2300  "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2301   [(set_attr "type" "neon_reduc_add<q>")]
2302 )
2303
2304 (define_insn "aarch64_reduc_plus_internalv2si"
2305  [(set (match_operand:V2SI 0 "register_operand" "=w")
2306        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2307                     UNSPEC_ADDV))]
2308  "TARGET_SIMD"
2309  "addp\\t%0.2s, %1.2s, %1.2s"
2310   [(set_attr "type" "neon_reduc_add")]
2311 )
2312
2313 (define_insn "reduc_plus_scal_<mode>"
2314  [(set (match_operand:<VEL> 0 "register_operand" "=w")
2315        (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2316                    UNSPEC_FADDV))]
2317  "TARGET_SIMD"
2318  "faddp\\t%<Vetype>0, %1.<Vtype>"
2319   [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2320 )
2321
2322 (define_expand "reduc_plus_scal_v4sf"
2323  [(set (match_operand:SF 0 "register_operand")
2324        (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2325                     UNSPEC_FADDV))]
2326  "TARGET_SIMD"
2327 {
2328   rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2329   rtx scratch = gen_reg_rtx (V4SFmode);
2330   emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2331   emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2332   emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2333   DONE;
2334 })
2335
2336 (define_insn "clrsb<mode>2"
2337   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2338         (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2339   "TARGET_SIMD"
2340   "cls\\t%0.<Vtype>, %1.<Vtype>"
2341   [(set_attr "type" "neon_cls<q>")]
2342 )
2343
2344 (define_insn "clz<mode>2"
2345  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2346        (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2347  "TARGET_SIMD"
2348  "clz\\t%0.<Vtype>, %1.<Vtype>"
2349   [(set_attr "type" "neon_cls<q>")]
2350 )
2351
2352 (define_insn "popcount<mode>2"
2353   [(set (match_operand:VB 0 "register_operand" "=w")
2354         (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2355   "TARGET_SIMD"
2356   "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2357   [(set_attr "type" "neon_cnt<q>")]
2358 )
2359
2360 ;; 'across lanes' max and min ops.
2361
2362 ;; Template for outputting a scalar, so we can create __builtins which can be
2363 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function.  (This is FP smax/smin).
2364 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2365   [(match_operand:<VEL> 0 "register_operand")
2366    (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2367                   FMAXMINV)]
2368   "TARGET_SIMD"
2369   {
2370     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2371     rtx scratch = gen_reg_rtx (<MODE>mode);
2372     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2373                                                               operands[1]));
2374     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2375     DONE;
2376   }
2377 )
2378
2379 ;; Likewise for integer cases, signed and unsigned.
2380 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2381   [(match_operand:<VEL> 0 "register_operand")
2382    (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2383                     MAXMINV)]
2384   "TARGET_SIMD"
2385   {
2386     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2387     rtx scratch = gen_reg_rtx (<MODE>mode);
2388     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2389                                                               operands[1]));
2390     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2391     DONE;
2392   }
2393 )
2394
2395 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2396  [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2397        (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2398                     MAXMINV))]
2399  "TARGET_SIMD"
2400  "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2401   [(set_attr "type" "neon_reduc_minmax<q>")]
2402 )
2403
2404 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2405  [(set (match_operand:V2SI 0 "register_operand" "=w")
2406        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2407                     MAXMINV))]
2408  "TARGET_SIMD"
2409  "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2410   [(set_attr "type" "neon_reduc_minmax")]
2411 )
2412
2413 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2414  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2415        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2416                       FMAXMINV))]
2417  "TARGET_SIMD"
2418  "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2419   [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2420 )
2421
2422 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2423 ;; allocation.
2424 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2425 ;; to select.
2426 ;;
2427 ;; Thus our BSL is of the form:
2428 ;;   op0 = bsl (mask, op2, op3)
2429 ;; We can use any of:
2430 ;;
2431 ;;   if (op0 = mask)
2432 ;;     bsl mask, op1, op2
2433 ;;   if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2434 ;;     bit op0, op2, mask
2435 ;;   if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2436 ;;     bif op0, op1, mask
2437 ;;
2438 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2439 ;; Some forms of straight-line code may generate the equivalent form
2440 ;; in *aarch64_simd_bsl<mode>_alt.
2441
2442 (define_insn "aarch64_simd_bsl<mode>_internal"
2443   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2444         (xor:VDQ_I
2445            (and:VDQ_I
2446              (xor:VDQ_I
2447                (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2448                (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2449              (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2450           (match_dup:<V_INT_EQUIV> 3)
2451         ))]
2452   "TARGET_SIMD"
2453   "@
2454   bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2455   bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2456   bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2457   [(set_attr "type" "neon_bsl<q>")]
2458 )
2459
2460 ;; We need this form in addition to the above pattern to match the case
2461 ;; when combine tries merging three insns such that the second operand of
2462 ;; the outer XOR matches the second operand of the inner XOR rather than
2463 ;; the first.  The two are equivalent but since recog doesn't try all
2464 ;; permutations of commutative operations, we have to have a separate pattern.
2465
2466 (define_insn "*aarch64_simd_bsl<mode>_alt"
2467   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2468         (xor:VDQ_I
2469            (and:VDQ_I
2470              (xor:VDQ_I
2471                (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2472                (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2473               (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2474           (match_dup:<V_INT_EQUIV> 2)))]
2475   "TARGET_SIMD"
2476   "@
2477   bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2478   bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2479   bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2480   [(set_attr "type" "neon_bsl<q>")]
2481 )
2482
2483 ;; DImode is special, we want to avoid computing operations which are
2484 ;; more naturally computed in general purpose registers in the vector
2485 ;; registers.  If we do that, we need to move all three operands from general
2486 ;; purpose registers to vector registers, then back again.  However, we
2487 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2488 ;; optimizations based on the component operations of a BSL.
2489 ;;
2490 ;; That means we need a splitter back to the individual operations, if they
2491 ;; would be better calculated on the integer side.
2492
2493 (define_insn_and_split "aarch64_simd_bsldi_internal"
2494   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2495         (xor:DI
2496            (and:DI
2497              (xor:DI
2498                (match_operand:DI 3 "register_operand" "w,0,w,r")
2499                (match_operand:DI 2 "register_operand" "w,w,0,r"))
2500              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2501           (match_dup:DI 3)
2502         ))]
2503   "TARGET_SIMD"
2504   "@
2505   bsl\\t%0.8b, %2.8b, %3.8b
2506   bit\\t%0.8b, %2.8b, %1.8b
2507   bif\\t%0.8b, %3.8b, %1.8b
2508   #"
2509   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2510   [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2511 {
2512   /* Split back to individual operations.  If we're before reload, and
2513      able to create a temporary register, do so.  If we're after reload,
2514      we've got an early-clobber destination register, so use that.
2515      Otherwise, we can't create pseudos and we can't yet guarantee that
2516      operands[0] is safe to write, so FAIL to split.  */
2517
2518   rtx scratch;
2519   if (reload_completed)
2520     scratch = operands[0];
2521   else if (can_create_pseudo_p ())
2522     scratch = gen_reg_rtx (DImode);
2523   else
2524     FAIL;
2525
2526   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2527   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2528   emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2529   DONE;
2530 }
2531   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2532    (set_attr "length" "4,4,4,12")]
2533 )
2534
2535 (define_insn_and_split "aarch64_simd_bsldi_alt"
2536   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2537         (xor:DI
2538            (and:DI
2539              (xor:DI
2540                (match_operand:DI 3 "register_operand" "w,w,0,r")
2541                (match_operand:DI 2 "register_operand" "w,0,w,r"))
2542              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2543           (match_dup:DI 2)
2544         ))]
2545   "TARGET_SIMD"
2546   "@
2547   bsl\\t%0.8b, %3.8b, %2.8b
2548   bit\\t%0.8b, %3.8b, %1.8b
2549   bif\\t%0.8b, %2.8b, %1.8b
2550   #"
2551   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2552   [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2553 {
2554   /* Split back to individual operations.  If we're before reload, and
2555      able to create a temporary register, do so.  If we're after reload,
2556      we've got an early-clobber destination register, so use that.
2557      Otherwise, we can't create pseudos and we can't yet guarantee that
2558      operands[0] is safe to write, so FAIL to split.  */
2559
2560   rtx scratch;
2561   if (reload_completed)
2562     scratch = operands[0];
2563   else if (can_create_pseudo_p ())
2564     scratch = gen_reg_rtx (DImode);
2565   else
2566     FAIL;
2567
2568   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2569   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2570   emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2571   DONE;
2572 }
2573   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2574    (set_attr "length" "4,4,4,12")]
2575 )
2576
2577 (define_expand "aarch64_simd_bsl<mode>"
2578   [(match_operand:VALLDIF 0 "register_operand")
2579    (match_operand:<V_INT_EQUIV> 1 "register_operand")
2580    (match_operand:VALLDIF 2 "register_operand")
2581    (match_operand:VALLDIF 3 "register_operand")]
2582  "TARGET_SIMD"
2583 {
2584   /* We can't alias operands together if they have different modes.  */
2585   rtx tmp = operands[0];
2586   if (FLOAT_MODE_P (<MODE>mode))
2587     {
2588       operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2589       operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2590       tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2591     }
2592   operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2593   emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2594                                                          operands[1],
2595                                                          operands[2],
2596                                                          operands[3]));
2597   if (tmp != operands[0])
2598     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2599
2600   DONE;
2601 })
2602
2603 (define_expand "vcond_mask_<mode><v_int_equiv>"
2604   [(match_operand:VALLDI 0 "register_operand")
2605    (match_operand:VALLDI 1 "nonmemory_operand")
2606    (match_operand:VALLDI 2 "nonmemory_operand")
2607    (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2608   "TARGET_SIMD"
2609 {
2610   /* If we have (a = (P) ? -1 : 0);
2611      Then we can simply move the generated mask (result must be int).  */
2612   if (operands[1] == CONSTM1_RTX (<MODE>mode)
2613       && operands[2] == CONST0_RTX (<MODE>mode))
2614     emit_move_insn (operands[0], operands[3]);
2615   /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask.  */
2616   else if (operands[1] == CONST0_RTX (<MODE>mode)
2617            && operands[2] == CONSTM1_RTX (<MODE>mode))
2618     emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2619   else
2620     {
2621       if (!REG_P (operands[1]))
2622         operands[1] = force_reg (<MODE>mode, operands[1]);
2623       if (!REG_P (operands[2]))
2624         operands[2] = force_reg (<MODE>mode, operands[2]);
2625       emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2626                                              operands[1], operands[2]));
2627     }
2628
2629   DONE;
2630 })
2631
2632 ;; Patterns comparing two vectors to produce a mask.
2633
2634 (define_expand "vec_cmp<mode><mode>"
2635   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2636           (match_operator 1 "comparison_operator"
2637             [(match_operand:VSDQ_I_DI 2 "register_operand")
2638              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2639   "TARGET_SIMD"
2640 {
2641   rtx mask = operands[0];
2642   enum rtx_code code = GET_CODE (operands[1]);
2643
2644   switch (code)
2645     {
2646     case NE:
2647     case LE:
2648     case LT:
2649     case GE:
2650     case GT:
2651     case EQ:
2652       if (operands[3] == CONST0_RTX (<MODE>mode))
2653         break;
2654
2655       /* Fall through.  */
2656     default:
2657       if (!REG_P (operands[3]))
2658         operands[3] = force_reg (<MODE>mode, operands[3]);
2659
2660       break;
2661     }
2662
2663   switch (code)
2664     {
2665     case LT:
2666       emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2667       break;
2668
2669     case GE:
2670       emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2671       break;
2672
2673     case LE:
2674       emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2675       break;
2676
2677     case GT:
2678       emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2679       break;
2680
2681     case LTU:
2682       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2683       break;
2684
2685     case GEU:
2686       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2687       break;
2688
2689     case LEU:
2690       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2691       break;
2692
2693     case GTU:
2694       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2695       break;
2696
2697     case NE:
2698       /* Handle NE as !EQ.  */
2699       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2700       emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2701       break;
2702
2703     case EQ:
2704       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2705       break;
2706
2707     default:
2708       gcc_unreachable ();
2709     }
2710
2711   DONE;
2712 })
2713
2714 (define_expand "vec_cmp<mode><v_int_equiv>"
2715   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2716         (match_operator 1 "comparison_operator"
2717             [(match_operand:VDQF 2 "register_operand")
2718              (match_operand:VDQF 3 "nonmemory_operand")]))]
2719   "TARGET_SIMD"
2720 {
2721   int use_zero_form = 0;
2722   enum rtx_code code = GET_CODE (operands[1]);
2723   rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2724
2725   rtx (*comparison) (rtx, rtx, rtx) = NULL;
2726
2727   switch (code)
2728     {
2729     case LE:
2730     case LT:
2731     case GE:
2732     case GT:
2733     case EQ:
2734       if (operands[3] == CONST0_RTX (<MODE>mode))
2735         {
2736           use_zero_form = 1;
2737           break;
2738         }
2739       /* Fall through.  */
2740     default:
2741       if (!REG_P (operands[3]))
2742         operands[3] = force_reg (<MODE>mode, operands[3]);
2743
2744       break;
2745     }
2746
2747   switch (code)
2748     {
2749     case LT:
2750       if (use_zero_form)
2751         {
2752           comparison = gen_aarch64_cmlt<mode>;
2753           break;
2754         }
2755       /* Fall through.  */
2756     case UNLT:
2757       std::swap (operands[2], operands[3]);
2758       /* Fall through.  */
2759     case UNGT:
2760     case GT:
2761       comparison = gen_aarch64_cmgt<mode>;
2762       break;
2763     case LE:
2764       if (use_zero_form)
2765         {
2766           comparison = gen_aarch64_cmle<mode>;
2767           break;
2768         }
2769       /* Fall through.  */
2770     case UNLE:
2771       std::swap (operands[2], operands[3]);
2772       /* Fall through.  */
2773     case UNGE:
2774     case GE:
2775       comparison = gen_aarch64_cmge<mode>;
2776       break;
2777     case NE:
2778     case EQ:
2779       comparison = gen_aarch64_cmeq<mode>;
2780       break;
2781     case UNEQ:
2782     case ORDERED:
2783     case UNORDERED:
2784     case LTGT:
2785       break;
2786     default:
2787       gcc_unreachable ();
2788     }
2789
2790   switch (code)
2791     {
2792     case UNGE:
2793     case UNGT:
2794     case UNLE:
2795     case UNLT:
2796       {
2797         /* All of the above must not raise any FP exceptions.  Thus we first
2798            check each operand for NaNs and force any elements containing NaN to
2799            zero before using them in the compare.
2800            Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2801                                      (cm<cc> (isnan (a) ? 0.0 : a,
2802                                               isnan (b) ? 0.0 : b))
2803            We use the following transformations for doing the comparisions:
2804            a UNGE b -> a GE b
2805            a UNGT b -> a GT b
2806            a UNLE b -> b GE a
2807            a UNLT b -> b GT a.  */
2808
2809         rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2810         rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2811         rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2812         emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2813         emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2814         emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2815         emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2816                                           lowpart_subreg (<V_INT_EQUIV>mode,
2817                                                           operands[2],
2818                                                           <MODE>mode)));
2819         emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2820                                           lowpart_subreg (<V_INT_EQUIV>mode,
2821                                                           operands[3],
2822                                                           <MODE>mode)));
2823         gcc_assert (comparison != NULL);
2824         emit_insn (comparison (operands[0],
2825                                lowpart_subreg (<MODE>mode,
2826                                                tmp0, <V_INT_EQUIV>mode),
2827                                lowpart_subreg (<MODE>mode,
2828                                                tmp1, <V_INT_EQUIV>mode)));
2829         emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2830       }
2831       break;
2832
2833     case LT:
2834     case LE:
2835     case GT:
2836     case GE:
2837     case EQ:
2838     case NE:
2839       /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
2840          As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
2841          a GE b -> a GE b
2842          a GT b -> a GT b
2843          a LE b -> b GE a
2844          a LT b -> b GT a
2845          a EQ b -> a EQ b
2846          a NE b -> ~(a EQ b)  */
2847       gcc_assert (comparison != NULL);
2848       emit_insn (comparison (operands[0], operands[2], operands[3]));
2849       if (code == NE)
2850         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2851       break;
2852
2853     case LTGT:
2854       /* LTGT is not guranteed to not generate a FP exception.  So let's
2855          go the faster way : ((a > b) || (b > a)).  */
2856       emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2857                                          operands[2], operands[3]));
2858       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2859       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2860       break;
2861
2862     case ORDERED:
2863     case UNORDERED:
2864     case UNEQ:
2865       /* cmeq (a, a) & cmeq (b, b).  */
2866       emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2867                                          operands[2], operands[2]));
2868       emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2869       emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2870
2871       if (code == UNORDERED)
2872         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2873       else if (code == UNEQ)
2874         {
2875           emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
2876           emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
2877         }
2878       break;
2879
2880     default:
2881       gcc_unreachable ();
2882     }
2883
2884   DONE;
2885 })
2886
2887 (define_expand "vec_cmpu<mode><mode>"
2888   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2889           (match_operator 1 "comparison_operator"
2890             [(match_operand:VSDQ_I_DI 2 "register_operand")
2891              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2892   "TARGET_SIMD"
2893 {
2894   emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2895                                       operands[2], operands[3]));
2896   DONE;
2897 })
2898
2899 (define_expand "vcond<mode><mode>"
2900   [(set (match_operand:VALLDI 0 "register_operand")
2901         (if_then_else:VALLDI
2902           (match_operator 3 "comparison_operator"
2903             [(match_operand:VALLDI 4 "register_operand")
2904              (match_operand:VALLDI 5 "nonmemory_operand")])
2905           (match_operand:VALLDI 1 "nonmemory_operand")
2906           (match_operand:VALLDI 2 "nonmemory_operand")))]
2907   "TARGET_SIMD"
2908 {
2909   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2910   enum rtx_code code = GET_CODE (operands[3]);
2911
2912   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2913      it as well as switch operands 1/2 in order to avoid the additional
2914      NOT instruction.  */
2915   if (code == NE)
2916     {
2917       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2918                                     operands[4], operands[5]);
2919       std::swap (operands[1], operands[2]);
2920     }
2921   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2922                                              operands[4], operands[5]));
2923   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2924                                                  operands[2], mask));
2925
2926   DONE;
2927 })
2928
2929 (define_expand "vcond<v_cmp_mixed><mode>"
2930   [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2931         (if_then_else:<V_cmp_mixed>
2932           (match_operator 3 "comparison_operator"
2933             [(match_operand:VDQF_COND 4 "register_operand")
2934              (match_operand:VDQF_COND 5 "nonmemory_operand")])
2935           (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2936           (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2937   "TARGET_SIMD"
2938 {
2939   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2940   enum rtx_code code = GET_CODE (operands[3]);
2941
2942   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2943      it as well as switch operands 1/2 in order to avoid the additional
2944      NOT instruction.  */
2945   if (code == NE)
2946     {
2947       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2948                                     operands[4], operands[5]);
2949       std::swap (operands[1], operands[2]);
2950     }
2951   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2952                                              operands[4], operands[5]));
2953   emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
2954                                                 operands[0], operands[1],
2955                                                 operands[2], mask));
2956
2957   DONE;
2958 })
2959
2960 (define_expand "vcondu<mode><mode>"
2961   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2962         (if_then_else:VSDQ_I_DI
2963           (match_operator 3 "comparison_operator"
2964             [(match_operand:VSDQ_I_DI 4 "register_operand")
2965              (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2966           (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2967           (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2968   "TARGET_SIMD"
2969 {
2970   rtx mask = gen_reg_rtx (<MODE>mode);
2971   enum rtx_code code = GET_CODE (operands[3]);
2972
2973   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2974      it as well as switch operands 1/2 in order to avoid the additional
2975      NOT instruction.  */
2976   if (code == NE)
2977     {
2978       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2979                                     operands[4], operands[5]);
2980       std::swap (operands[1], operands[2]);
2981     }
2982   emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2983                                       operands[4], operands[5]));
2984   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2985                                                  operands[2], mask));
2986   DONE;
2987 })
2988
2989 (define_expand "vcondu<mode><v_cmp_mixed>"
2990   [(set (match_operand:VDQF 0 "register_operand")
2991         (if_then_else:VDQF
2992           (match_operator 3 "comparison_operator"
2993             [(match_operand:<V_cmp_mixed> 4 "register_operand")
2994              (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2995           (match_operand:VDQF 1 "nonmemory_operand")
2996           (match_operand:VDQF 2 "nonmemory_operand")))]
2997   "TARGET_SIMD"
2998 {
2999   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3000   enum rtx_code code = GET_CODE (operands[3]);
3001
3002   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3003      it as well as switch operands 1/2 in order to avoid the additional
3004      NOT instruction.  */
3005   if (code == NE)
3006     {
3007       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3008                                     operands[4], operands[5]);
3009       std::swap (operands[1], operands[2]);
3010     }
3011   emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
3012                                                   mask, operands[3],
3013                                                   operands[4], operands[5]));
3014   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3015                                                  operands[2], mask));
3016   DONE;
3017 })
3018
3019 ;; Patterns for AArch64 SIMD Intrinsics.
3020
3021 ;; Lane extraction with sign extension to general purpose register.
3022 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3023   [(set (match_operand:GPI 0 "register_operand" "=r")
3024         (sign_extend:GPI
3025           (vec_select:<VEL>
3026             (match_operand:VDQQH 1 "register_operand" "w")
3027             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3028   "TARGET_SIMD"
3029   {
3030     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3031     return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3032   }
3033   [(set_attr "type" "neon_to_gp<q>")]
3034 )
3035
3036 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
3037   [(set (match_operand:GPI 0 "register_operand" "=r")
3038         (zero_extend:GPI
3039           (vec_select:<VEL>
3040             (match_operand:VDQQH 1 "register_operand" "w")
3041             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3042   "TARGET_SIMD"
3043   {
3044     operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3045                                            INTVAL (operands[2]));
3046     return "umov\\t%w0, %1.<Vetype>[%2]";
3047   }
3048   [(set_attr "type" "neon_to_gp<q>")]
3049 )
3050
3051 ;; Lane extraction of a value, neither sign nor zero extension
3052 ;; is guaranteed so upper bits should be considered undefined.
3053 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3054 (define_insn "aarch64_get_lane<mode>"
3055   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3056         (vec_select:<VEL>
3057           (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3058           (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3059   "TARGET_SIMD"
3060   {
3061     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3062     switch (which_alternative)
3063       {
3064         case 0:
3065           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3066         case 1:
3067           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3068         case 2:
3069           return "st1\\t{%1.<Vetype>}[%2], %0";
3070         default:
3071           gcc_unreachable ();
3072       }
3073   }
3074   [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3075 )
3076
3077 (define_insn "load_pair_lanes<mode>"
3078   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3079         (vec_concat:<VDBL>
3080            (match_operand:VDC 1 "memory_operand" "Utq")
3081            (match_operand:VDC 2 "memory_operand" "m")))]
3082   "TARGET_SIMD && !STRICT_ALIGNMENT
3083    && rtx_equal_p (XEXP (operands[2], 0),
3084                    plus_constant (Pmode,
3085                                   XEXP (operands[1], 0),
3086                                   GET_MODE_SIZE (<MODE>mode)))"
3087   "ldr\\t%q0, %1"
3088   [(set_attr "type" "neon_load1_1reg_q")]
3089 )
3090
3091 (define_insn "store_pair_lanes<mode>"
3092   [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
3093         (vec_concat:<VDBL>
3094            (match_operand:VDC 1 "register_operand" "w, r")
3095            (match_operand:VDC 2 "register_operand" "w, r")))]
3096   "TARGET_SIMD"
3097   "@
3098    stp\\t%d1, %d2, %y0
3099    stp\\t%x1, %x2, %y0"
3100   [(set_attr "type" "neon_stp, store_16")]
3101 )
3102
3103 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3104 ;; dest vector.
3105
3106 (define_insn "*aarch64_combinez<mode>"
3107   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3108         (vec_concat:<VDBL>
3109           (match_operand:VDC 1 "general_operand" "w,?r,m")
3110           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3111   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3112   "@
3113    mov\\t%0.8b, %1.8b
3114    fmov\t%d0, %1
3115    ldr\\t%d0, %1"
3116   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3117    (set_attr "simd" "yes,*,yes")
3118    (set_attr "fp" "*,yes,*")]
3119 )
3120
3121 (define_insn "*aarch64_combinez_be<mode>"
3122   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3123         (vec_concat:<VDBL>
3124           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3125           (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3126   "TARGET_SIMD && BYTES_BIG_ENDIAN"
3127   "@
3128    mov\\t%0.8b, %1.8b
3129    fmov\t%d0, %1
3130    ldr\\t%d0, %1"
3131   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3132    (set_attr "simd" "yes,*,yes")
3133    (set_attr "fp" "*,yes,*")]
3134 )
3135
3136 (define_expand "aarch64_combine<mode>"
3137   [(match_operand:<VDBL> 0 "register_operand")
3138    (match_operand:VDC 1 "register_operand")
3139    (match_operand:VDC 2 "register_operand")]
3140   "TARGET_SIMD"
3141 {
3142   aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3143
3144   DONE;
3145 }
3146 )
3147
3148 (define_expand "aarch64_simd_combine<mode>"
3149   [(match_operand:<VDBL> 0 "register_operand")
3150    (match_operand:VDC 1 "register_operand")
3151    (match_operand:VDC 2 "register_operand")]
3152   "TARGET_SIMD"
3153   {
3154     emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3155     emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3156     DONE;
3157   }
3158 [(set_attr "type" "multiple")]
3159 )
3160
3161 ;; <su><addsub>l<q>.
3162
3163 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3164  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3165        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3166                            (match_operand:VQW 1 "register_operand" "w")
3167                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3168                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3169                            (match_operand:VQW 2 "register_operand" "w")
3170                            (match_dup 3)))))]
3171   "TARGET_SIMD"
3172   "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3173   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3174 )
3175
3176 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3177  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3178        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3179                            (match_operand:VQW 1 "register_operand" "w")
3180                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3181                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3182                            (match_operand:VQW 2 "register_operand" "w")
3183                            (match_dup 3)))))]
3184   "TARGET_SIMD"
3185   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3186   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3187 )
3188
3189
3190 (define_expand "aarch64_saddl2<mode>"
3191   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3192    (match_operand:VQW 1 "register_operand" "w")
3193    (match_operand:VQW 2 "register_operand" "w")]
3194   "TARGET_SIMD"
3195 {
3196   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3197   emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3198                                                   operands[2], p));
3199   DONE;
3200 })
3201
3202 (define_expand "aarch64_uaddl2<mode>"
3203   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3204    (match_operand:VQW 1 "register_operand" "w")
3205    (match_operand:VQW 2 "register_operand" "w")]
3206   "TARGET_SIMD"
3207 {
3208   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3209   emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3210                                                   operands[2], p));
3211   DONE;
3212 })
3213
3214 (define_expand "aarch64_ssubl2<mode>"
3215   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3216    (match_operand:VQW 1 "register_operand" "w")
3217    (match_operand:VQW 2 "register_operand" "w")]
3218   "TARGET_SIMD"
3219 {
3220   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3221   emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3222                                                 operands[2], p));
3223   DONE;
3224 })
3225
3226 (define_expand "aarch64_usubl2<mode>"
3227   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3228    (match_operand:VQW 1 "register_operand" "w")
3229    (match_operand:VQW 2 "register_operand" "w")]
3230   "TARGET_SIMD"
3231 {
3232   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3233   emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3234                                                 operands[2], p));
3235   DONE;
3236 })
3237
3238 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3239  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3240        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3241                            (match_operand:VD_BHSI 1 "register_operand" "w"))
3242                        (ANY_EXTEND:<VWIDE>
3243                            (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3244   "TARGET_SIMD"
3245   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3246   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3247 )
3248
3249 ;; <su><addsub>w<q>.
3250
3251 (define_expand "widen_ssum<mode>3"
3252   [(set (match_operand:<VDBLW> 0 "register_operand" "")
3253         (plus:<VDBLW> (sign_extend:<VDBLW>
3254                         (match_operand:VQW 1 "register_operand" ""))
3255                       (match_operand:<VDBLW> 2 "register_operand" "")))]
3256   "TARGET_SIMD"
3257   {
3258     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3259     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3260
3261     emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3262                                                 operands[1], p));
3263     emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3264     DONE;
3265   }
3266 )
3267
3268 (define_expand "widen_ssum<mode>3"
3269   [(set (match_operand:<VWIDE> 0 "register_operand" "")
3270         (plus:<VWIDE> (sign_extend:<VWIDE>
3271                         (match_operand:VD_BHSI 1 "register_operand" ""))
3272                       (match_operand:<VWIDE> 2 "register_operand" "")))]
3273   "TARGET_SIMD"
3274 {
3275   emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3276   DONE;
3277 })
3278
3279 (define_expand "widen_usum<mode>3"
3280   [(set (match_operand:<VDBLW> 0 "register_operand" "")
3281         (plus:<VDBLW> (zero_extend:<VDBLW>
3282                         (match_operand:VQW 1 "register_operand" ""))
3283                       (match_operand:<VDBLW> 2 "register_operand" "")))]
3284   "TARGET_SIMD"
3285   {
3286     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3287     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3288
3289     emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3290                                                  operands[1], p));
3291     emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3292     DONE;
3293   }
3294 )
3295
3296 (define_expand "widen_usum<mode>3"
3297   [(set (match_operand:<VWIDE> 0 "register_operand" "")
3298         (plus:<VWIDE> (zero_extend:<VWIDE>
3299                         (match_operand:VD_BHSI 1 "register_operand" ""))
3300                       (match_operand:<VWIDE> 2 "register_operand" "")))]
3301   "TARGET_SIMD"
3302 {
3303   emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3304   DONE;
3305 })
3306
3307 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
3308   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3309         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3310           (ANY_EXTEND:<VWIDE>
3311             (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3312   "TARGET_SIMD"
3313   "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3314   [(set_attr "type" "neon_sub_widen")]
3315 )
3316
3317 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
3318   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3319         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3320           (ANY_EXTEND:<VWIDE>
3321             (vec_select:<VHALF>
3322               (match_operand:VQW 2 "register_operand" "w")
3323               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3324   "TARGET_SIMD"
3325   "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3326   [(set_attr "type" "neon_sub_widen")]
3327 )
3328
3329 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
3330   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3331         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3332           (ANY_EXTEND:<VWIDE>
3333             (vec_select:<VHALF>
3334               (match_operand:VQW 2 "register_operand" "w")
3335               (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3336   "TARGET_SIMD"
3337   "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3338   [(set_attr "type" "neon_sub_widen")]
3339 )
3340
3341 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
3342   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3343         (plus:<VWIDE>
3344           (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
3345           (match_operand:<VWIDE> 1 "register_operand" "w")))]
3346   "TARGET_SIMD"
3347   "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3348   [(set_attr "type" "neon_add_widen")]
3349 )
3350
3351 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
3352   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3353         (plus:<VWIDE>
3354           (ANY_EXTEND:<VWIDE>
3355             (vec_select:<VHALF>
3356               (match_operand:VQW 2 "register_operand" "w")
3357               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3358           (match_operand:<VWIDE> 1 "register_operand" "w")))]
3359   "TARGET_SIMD"
3360   "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3361   [(set_attr "type" "neon_add_widen")]
3362 )
3363
3364 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
3365   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3366         (plus:<VWIDE>
3367           (ANY_EXTEND:<VWIDE>
3368             (vec_select:<VHALF>
3369               (match_operand:VQW 2 "register_operand" "w")
3370               (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3371           (match_operand:<VWIDE> 1 "register_operand" "w")))]
3372   "TARGET_SIMD"
3373   "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3374   [(set_attr "type" "neon_add_widen")]
3375 )
3376
3377 (define_expand "aarch64_saddw2<mode>"
3378   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3379    (match_operand:<VWIDE> 1 "register_operand" "w")
3380    (match_operand:VQW 2 "register_operand" "w")]
3381   "TARGET_SIMD"
3382 {
3383   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3384   emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3385                                                 operands[2], p));
3386   DONE;
3387 })
3388
3389 (define_expand "aarch64_uaddw2<mode>"
3390   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3391    (match_operand:<VWIDE> 1 "register_operand" "w")
3392    (match_operand:VQW 2 "register_operand" "w")]
3393   "TARGET_SIMD"
3394 {
3395   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3396   emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3397                                                 operands[2], p));
3398   DONE;
3399 })
3400
3401
3402 (define_expand "aarch64_ssubw2<mode>"
3403   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3404    (match_operand:<VWIDE> 1 "register_operand" "w")
3405    (match_operand:VQW 2 "register_operand" "w")]
3406   "TARGET_SIMD"
3407 {
3408   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3409   emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3410                                                 operands[2], p));
3411   DONE;
3412 })
3413
3414 (define_expand "aarch64_usubw2<mode>"
3415   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3416    (match_operand:<VWIDE> 1 "register_operand" "w")
3417    (match_operand:VQW 2 "register_operand" "w")]
3418   "TARGET_SIMD"
3419 {
3420   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3421   emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3422                                                 operands[2], p));
3423   DONE;
3424 })
3425
3426 ;; <su><r>h<addsub>.
3427
3428 (define_expand "<u>avg<mode>3_floor"
3429   [(set (match_operand:VDQ_BHSI 0 "register_operand")
3430         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3431                           (match_operand:VDQ_BHSI 2 "register_operand")]
3432                          HADD))]
3433   "TARGET_SIMD"
3434 )
3435
3436 (define_expand "<u>avg<mode>3_ceil"
3437   [(set (match_operand:VDQ_BHSI 0 "register_operand")
3438         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3439                           (match_operand:VDQ_BHSI 2 "register_operand")]
3440                          RHADD))]
3441   "TARGET_SIMD"
3442 )
3443
3444 (define_insn "aarch64_<sur>h<addsub><mode>"
3445   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3446         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3447                       (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3448                      HADDSUB))]
3449   "TARGET_SIMD"
3450   "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3451   [(set_attr "type" "neon_<addsub>_halve<q>")]
3452 )
3453
3454 ;; <r><addsub>hn<q>.
3455
3456 (define_insn "aarch64_<sur><addsub>hn<mode>"
3457   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3458         (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3459                             (match_operand:VQN 2 "register_operand" "w")]
3460                            ADDSUBHN))]
3461   "TARGET_SIMD"
3462   "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3463   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3464 )
3465
3466 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3467   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3468         (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3469                              (match_operand:VQN 2 "register_operand" "w")
3470                              (match_operand:VQN 3 "register_operand" "w")]
3471                             ADDSUBHN2))]
3472   "TARGET_SIMD"
3473   "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3474   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3475 )
3476
3477 ;; pmul.
3478
3479 (define_insn "aarch64_pmul<mode>"
3480   [(set (match_operand:VB 0 "register_operand" "=w")
3481         (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3482                     (match_operand:VB 2 "register_operand" "w")]
3483                    UNSPEC_PMUL))]
3484  "TARGET_SIMD"
3485  "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3486   [(set_attr "type" "neon_mul_<Vetype><q>")]
3487 )
3488
3489 ;; fmulx.
3490
3491 (define_insn "aarch64_fmulx<mode>"
3492   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3493         (unspec:VHSDF_HSDF
3494           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3495            (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3496            UNSPEC_FMULX))]
3497  "TARGET_SIMD"
3498  "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3499  [(set_attr "type" "neon_fp_mul_<stype>")]
3500 )
3501
3502 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3503
3504 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3505   [(set (match_operand:VDQSF 0 "register_operand" "=w")
3506         (unspec:VDQSF
3507          [(match_operand:VDQSF 1 "register_operand" "w")
3508           (vec_duplicate:VDQSF
3509            (vec_select:<VEL>
3510             (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3511             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3512          UNSPEC_FMULX))]
3513   "TARGET_SIMD"
3514   {
3515     operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3516     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3517   }
3518   [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3519 )
3520
3521 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3522
3523 (define_insn "*aarch64_mulx_elt<mode>"
3524   [(set (match_operand:VDQF 0 "register_operand" "=w")
3525         (unspec:VDQF
3526          [(match_operand:VDQF 1 "register_operand" "w")
3527           (vec_duplicate:VDQF
3528            (vec_select:<VEL>
3529             (match_operand:VDQF 2 "register_operand" "w")
3530             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3531          UNSPEC_FMULX))]
3532   "TARGET_SIMD"
3533   {
3534     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3535     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3536   }
3537   [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3538 )
3539
3540 ;; vmulxq_lane
3541
3542 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3543   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3544         (unspec:VHSDF
3545          [(match_operand:VHSDF 1 "register_operand" "w")
3546           (vec_duplicate:VHSDF
3547             (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3548          UNSPEC_FMULX))]
3549   "TARGET_SIMD"
3550   "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3551   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3552 )
3553
3554 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3555 ;; vmulxd_lane_f64 ==  vmulx_lane_f64
3556 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3557
3558 (define_insn "*aarch64_vgetfmulx<mode>"
3559   [(set (match_operand:<VEL> 0 "register_operand" "=w")
3560         (unspec:<VEL>
3561          [(match_operand:<VEL> 1 "register_operand" "w")
3562           (vec_select:<VEL>
3563            (match_operand:VDQF 2 "register_operand" "w")
3564             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3565          UNSPEC_FMULX))]
3566   "TARGET_SIMD"
3567   {
3568     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3569     return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3570   }
3571   [(set_attr "type" "fmul<Vetype>")]
3572 )
3573 ;; <su>q<addsub>
3574
3575 (define_insn "aarch64_<su_optab><optab><mode>"
3576   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3577         (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3578                           (match_operand:VSDQ_I 2 "register_operand" "w")))]
3579   "TARGET_SIMD"
3580   "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3581   [(set_attr "type" "neon_<optab><q>")]
3582 )
3583
3584 ;; suqadd and usqadd
3585
3586 (define_insn "aarch64_<sur>qadd<mode>"
3587   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3588         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3589                         (match_operand:VSDQ_I 2 "register_operand" "w")]
3590                        USSUQADD))]
3591   "TARGET_SIMD"
3592   "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3593   [(set_attr "type" "neon_qadd<q>")]
3594 )
3595
3596 ;; sqmovun
3597
3598 (define_insn "aarch64_sqmovun<mode>"
3599   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3600         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3601                             UNSPEC_SQXTUN))]
3602    "TARGET_SIMD"
3603    "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3604    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3605 )
3606
3607 ;; sqmovn and uqmovn
3608
3609 (define_insn "aarch64_<sur>qmovn<mode>"
3610   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3611         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3612                             SUQMOVN))]
3613   "TARGET_SIMD"
3614   "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3615    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3616 )
3617
3618 ;; <su>q<absneg>
3619
3620 (define_insn "aarch64_s<optab><mode>"
3621   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3622         (UNQOPS:VSDQ_I
3623           (match_operand:VSDQ_I 1 "register_operand" "w")))]
3624   "TARGET_SIMD"
3625   "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3626   [(set_attr "type" "neon_<optab><q>")]
3627 )
3628
3629 ;; sq<r>dmulh.
3630
3631 (define_insn "aarch64_sq<r>dmulh<mode>"
3632   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3633         (unspec:VSDQ_HSI
3634           [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3635            (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3636          VQDMULH))]
3637   "TARGET_SIMD"
3638   "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3639   [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3640 )
3641
3642 ;; sq<r>dmulh_lane
3643
3644 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3645   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3646         (unspec:VDQHS
3647           [(match_operand:VDQHS 1 "register_operand" "w")
3648            (vec_select:<VEL>
3649              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3650              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3651          VQDMULH))]
3652   "TARGET_SIMD"
3653   "*
3654    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3655    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3656   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3657 )
3658
3659 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3660   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3661         (unspec:VDQHS
3662           [(match_operand:VDQHS 1 "register_operand" "w")
3663            (vec_select:<VEL>
3664              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3665              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3666          VQDMULH))]
3667   "TARGET_SIMD"
3668   "*
3669    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3670    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3671   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3672 )
3673
3674 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3675   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3676         (unspec:SD_HSI
3677           [(match_operand:SD_HSI 1 "register_operand" "w")
3678            (vec_select:<VEL>
3679              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3680              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3681          VQDMULH))]
3682   "TARGET_SIMD"
3683   "*
3684    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3685    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3686   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3687 )
3688
3689 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3690   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3691         (unspec:SD_HSI
3692           [(match_operand:SD_HSI 1 "register_operand" "w")
3693            (vec_select:<VEL>
3694              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3695              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3696          VQDMULH))]
3697   "TARGET_SIMD"
3698   "*
3699    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3700    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3701   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3702 )
3703
3704 ;; sqrdml[as]h.
3705
3706 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3707   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3708         (unspec:VSDQ_HSI
3709           [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3710            (match_operand:VSDQ_HSI 2 "register_operand" "w")
3711            (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3712           SQRDMLH_AS))]
3713    "TARGET_SIMD_RDMA"
3714    "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3715    [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3716 )
3717
3718 ;; sqrdml[as]h_lane.
3719
3720 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3721   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3722         (unspec:VDQHS
3723           [(match_operand:VDQHS 1 "register_operand" "0")
3724            (match_operand:VDQHS 2 "register_operand" "w")
3725            (vec_select:<VEL>
3726              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3727              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3728           SQRDMLH_AS))]
3729    "TARGET_SIMD_RDMA"
3730    {
3731      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3732      return
3733       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3734    }
3735    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3736 )
3737
3738 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3739   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3740         (unspec:SD_HSI
3741           [(match_operand:SD_HSI 1 "register_operand" "0")
3742            (match_operand:SD_HSI 2 "register_operand" "w")
3743            (vec_select:<VEL>
3744              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3745              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3746           SQRDMLH_AS))]
3747    "TARGET_SIMD_RDMA"
3748    {
3749      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3750      return
3751       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3752    }
3753    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3754 )
3755
3756 ;; sqrdml[as]h_laneq.
3757
3758 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3759   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3760         (unspec:VDQHS
3761           [(match_operand:VDQHS 1 "register_operand" "0")
3762            (match_operand:VDQHS 2 "register_operand" "w")
3763            (vec_select:<VEL>
3764              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3765              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3766           SQRDMLH_AS))]
3767    "TARGET_SIMD_RDMA"
3768    {
3769      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3770      return
3771       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3772    }
3773    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3774 )
3775
3776 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3777   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3778         (unspec:SD_HSI
3779           [(match_operand:SD_HSI 1 "register_operand" "0")
3780            (match_operand:SD_HSI 2 "register_operand" "w")
3781            (vec_select:<VEL>
3782              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3783              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3784           SQRDMLH_AS))]
3785    "TARGET_SIMD_RDMA"
3786    {
3787      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3788      return
3789       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3790    }
3791    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3792 )
3793
3794 ;; vqdml[sa]l
3795
3796 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3797   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3798         (SBINQOPS:<VWIDE>
3799           (match_operand:<VWIDE> 1 "register_operand" "0")
3800           (ss_ashift:<VWIDE>
3801               (mult:<VWIDE>
3802                 (sign_extend:<VWIDE>
3803                       (match_operand:VSD_HSI 2 "register_operand" "w"))
3804                 (sign_extend:<VWIDE>
3805                       (match_operand:VSD_HSI 3 "register_operand" "w")))
3806               (const_int 1))))]
3807   "TARGET_SIMD"
3808   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3809   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3810 )
3811
3812 ;; vqdml[sa]l_lane
3813
3814 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3815   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3816         (SBINQOPS:<VWIDE>
3817           (match_operand:<VWIDE> 1 "register_operand" "0")
3818           (ss_ashift:<VWIDE>
3819             (mult:<VWIDE>
3820               (sign_extend:<VWIDE>
3821                 (match_operand:VD_HSI 2 "register_operand" "w"))
3822               (sign_extend:<VWIDE>
3823                 (vec_duplicate:VD_HSI
3824                   (vec_select:<VEL>
3825                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3826                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3827               ))
3828             (const_int 1))))]
3829   "TARGET_SIMD"
3830   {
3831     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3832     return
3833       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3834   }
3835   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3836 )
3837
3838 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3839   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3840         (SBINQOPS:<VWIDE>
3841           (match_operand:<VWIDE> 1 "register_operand" "0")
3842           (ss_ashift:<VWIDE>
3843             (mult:<VWIDE>
3844               (sign_extend:<VWIDE>
3845                 (match_operand:VD_HSI 2 "register_operand" "w"))
3846               (sign_extend:<VWIDE>
3847                 (vec_duplicate:VD_HSI
3848                   (vec_select:<VEL>
3849                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3850                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3851               ))
3852             (const_int 1))))]
3853   "TARGET_SIMD"
3854   {
3855     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3856     return
3857       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3858   }
3859   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3860 )
3861
3862 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3863   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3864         (SBINQOPS:<VWIDE>
3865           (match_operand:<VWIDE> 1 "register_operand" "0")
3866           (ss_ashift:<VWIDE>
3867             (mult:<VWIDE>
3868               (sign_extend:<VWIDE>
3869                 (match_operand:SD_HSI 2 "register_operand" "w"))
3870               (sign_extend:<VWIDE>
3871                 (vec_select:<VEL>
3872                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3873                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3874               )
3875             (const_int 1))))]
3876   "TARGET_SIMD"
3877   {
3878     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3879     return
3880       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3881   }
3882   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3883 )
3884
3885 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3886   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3887         (SBINQOPS:<VWIDE>
3888           (match_operand:<VWIDE> 1 "register_operand" "0")
3889           (ss_ashift:<VWIDE>
3890             (mult:<VWIDE>
3891               (sign_extend:<VWIDE>
3892                 (match_operand:SD_HSI 2 "register_operand" "w"))
3893               (sign_extend:<VWIDE>
3894                 (vec_select:<VEL>
3895                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3896                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3897               )
3898             (const_int 1))))]
3899   "TARGET_SIMD"
3900   {
3901     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3902     return
3903       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3904   }
3905   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3906 )
3907
3908 ;; vqdml[sa]l_n
3909
3910 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3911   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3912         (SBINQOPS:<VWIDE>
3913           (match_operand:<VWIDE> 1 "register_operand" "0")
3914           (ss_ashift:<VWIDE>
3915               (mult:<VWIDE>
3916                 (sign_extend:<VWIDE>
3917                       (match_operand:VD_HSI 2 "register_operand" "w"))
3918                 (sign_extend:<VWIDE>
3919                   (vec_duplicate:VD_HSI
3920                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3921               (const_int 1))))]
3922   "TARGET_SIMD"
3923   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3924   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3925 )
3926
3927 ;; sqdml[as]l2
3928
3929 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3930   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3931         (SBINQOPS:<VWIDE>
3932          (match_operand:<VWIDE> 1 "register_operand" "0")
3933          (ss_ashift:<VWIDE>
3934              (mult:<VWIDE>
3935                (sign_extend:<VWIDE>
3936                  (vec_select:<VHALF>
3937                      (match_operand:VQ_HSI 2 "register_operand" "w")
3938                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3939                (sign_extend:<VWIDE>
3940                  (vec_select:<VHALF>
3941                      (match_operand:VQ_HSI 3 "register_operand" "w")
3942                      (match_dup 4))))
3943              (const_int 1))))]
3944   "TARGET_SIMD"
3945   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3946   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3947 )
3948
3949 (define_expand "aarch64_sqdmlal2<mode>"
3950   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3951    (match_operand:<VWIDE> 1 "register_operand" "w")
3952    (match_operand:VQ_HSI 2 "register_operand" "w")
3953    (match_operand:VQ_HSI 3 "register_operand" "w")]
3954   "TARGET_SIMD"
3955 {
3956   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3957   emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3958                                                   operands[2], operands[3], p));
3959   DONE;
3960 })
3961
3962 (define_expand "aarch64_sqdmlsl2<mode>"
3963   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3964    (match_operand:<VWIDE> 1 "register_operand" "w")
3965    (match_operand:VQ_HSI 2 "register_operand" "w")
3966    (match_operand:VQ_HSI 3 "register_operand" "w")]
3967   "TARGET_SIMD"
3968 {
3969   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3970   emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3971                                                   operands[2], operands[3], p));
3972   DONE;
3973 })
3974
3975 ;; vqdml[sa]l2_lane
3976
3977 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3978   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3979         (SBINQOPS:<VWIDE>
3980           (match_operand:<VWIDE> 1 "register_operand" "0")
3981           (ss_ashift:<VWIDE>
3982               (mult:<VWIDE>
3983                 (sign_extend:<VWIDE>
3984                   (vec_select:<VHALF>
3985                     (match_operand:VQ_HSI 2 "register_operand" "w")
3986                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3987                 (sign_extend:<VWIDE>
3988                   (vec_duplicate:<VHALF>
3989                     (vec_select:<VEL>
3990                       (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3991                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3992                     ))))
3993               (const_int 1))))]
3994   "TARGET_SIMD"
3995   {
3996     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3997     return
3998      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3999   }
4000   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4001 )
4002
4003 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
4004   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4005         (SBINQOPS:<VWIDE>
4006           (match_operand:<VWIDE> 1 "register_operand" "0")
4007           (ss_ashift:<VWIDE>
4008               (mult:<VWIDE>
4009                 (sign_extend:<VWIDE>
4010                   (vec_select:<VHALF>
4011                     (match_operand:VQ_HSI 2 "register_operand" "w")
4012                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4013                 (sign_extend:<VWIDE>
4014                   (vec_duplicate:<VHALF>
4015                     (vec_select:<VEL>
4016                       (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4017                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4018                     ))))
4019               (const_int 1))))]
4020   "TARGET_SIMD"
4021   {
4022     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4023     return
4024      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4025   }
4026   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4027 )
4028
4029 (define_expand "aarch64_sqdmlal2_lane<mode>"
4030   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4031    (match_operand:<VWIDE> 1 "register_operand" "w")
4032    (match_operand:VQ_HSI 2 "register_operand" "w")
4033    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4034    (match_operand:SI 4 "immediate_operand" "i")]
4035   "TARGET_SIMD"
4036 {
4037   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4038   emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
4039                                                        operands[2], operands[3],
4040                                                        operands[4], p));
4041   DONE;
4042 })
4043
4044 (define_expand "aarch64_sqdmlal2_laneq<mode>"
4045   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4046    (match_operand:<VWIDE> 1 "register_operand" "w")
4047    (match_operand:VQ_HSI 2 "register_operand" "w")
4048    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4049    (match_operand:SI 4 "immediate_operand" "i")]
4050   "TARGET_SIMD"
4051 {
4052   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4053   emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
4054                                                        operands[2], operands[3],
4055                                                        operands[4], p));
4056   DONE;
4057 })
4058
4059 (define_expand "aarch64_sqdmlsl2_lane<mode>"
4060   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4061    (match_operand:<VWIDE> 1 "register_operand" "w")
4062    (match_operand:VQ_HSI 2 "register_operand" "w")
4063    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4064    (match_operand:SI 4 "immediate_operand" "i")]
4065   "TARGET_SIMD"
4066 {
4067   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4068   emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
4069                                                        operands[2], operands[3],
4070                                                        operands[4], p));
4071   DONE;
4072 })
4073
4074 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
4075   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4076    (match_operand:<VWIDE> 1 "register_operand" "w")
4077    (match_operand:VQ_HSI 2 "register_operand" "w")
4078    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4079    (match_operand:SI 4 "immediate_operand" "i")]
4080   "TARGET_SIMD"
4081 {
4082   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4083   emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4084                                                        operands[2], operands[3],
4085                                                        operands[4], p));
4086   DONE;
4087 })
4088
4089 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4090   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4091         (SBINQOPS:<VWIDE>
4092           (match_operand:<VWIDE> 1 "register_operand" "0")
4093           (ss_ashift:<VWIDE>
4094             (mult:<VWIDE>
4095               (sign_extend:<VWIDE>
4096                 (vec_select:<VHALF>
4097                   (match_operand:VQ_HSI 2 "register_operand" "w")
4098                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4099               (sign_extend:<VWIDE>
4100                 (vec_duplicate:<VHALF>
4101                   (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4102             (const_int 1))))]
4103   "TARGET_SIMD"
4104   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4105   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4106 )
4107
4108 (define_expand "aarch64_sqdmlal2_n<mode>"
4109   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4110    (match_operand:<VWIDE> 1 "register_operand" "w")
4111    (match_operand:VQ_HSI 2 "register_operand" "w")
4112    (match_operand:<VEL> 3 "register_operand" "w")]
4113   "TARGET_SIMD"
4114 {
4115   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4116   emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4117                                                     operands[2], operands[3],
4118                                                     p));
4119   DONE;
4120 })
4121
4122 (define_expand "aarch64_sqdmlsl2_n<mode>"
4123   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4124    (match_operand:<VWIDE> 1 "register_operand" "w")
4125    (match_operand:VQ_HSI 2 "register_operand" "w")
4126    (match_operand:<VEL> 3 "register_operand" "w")]
4127   "TARGET_SIMD"
4128 {
4129   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4130   emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4131                                                     operands[2], operands[3],
4132                                                     p));
4133   DONE;
4134 })
4135
4136 ;; vqdmull
4137
4138 (define_insn "aarch64_sqdmull<mode>"
4139   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4140         (ss_ashift:<VWIDE>
4141              (mult:<VWIDE>
4142                (sign_extend:<VWIDE>
4143                      (match_operand:VSD_HSI 1 "register_operand" "w"))
4144                (sign_extend:<VWIDE>
4145                      (match_operand:VSD_HSI 2 "register_operand" "w")))
4146              (const_int 1)))]
4147   "TARGET_SIMD"
4148   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4149   [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4150 )
4151
4152 ;; vqdmull_lane
4153
4154 (define_insn "aarch64_sqdmull_lane<mode>"
4155   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4156         (ss_ashift:<VWIDE>
4157              (mult:<VWIDE>
4158                (sign_extend:<VWIDE>
4159                  (match_operand:VD_HSI 1 "register_operand" "w"))
4160                (sign_extend:<VWIDE>
4161                  (vec_duplicate:VD_HSI
4162                    (vec_select:<VEL>
4163                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4164                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4165                ))
4166              (const_int 1)))]
4167   "TARGET_SIMD"
4168   {
4169     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4170     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4171   }
4172   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4173 )
4174
4175 (define_insn "aarch64_sqdmull_laneq<mode>"
4176   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4177         (ss_ashift:<VWIDE>
4178              (mult:<VWIDE>
4179                (sign_extend:<VWIDE>
4180                  (match_operand:VD_HSI 1 "register_operand" "w"))
4181                (sign_extend:<VWIDE>
4182                  (vec_duplicate:VD_HSI
4183                    (vec_select:<VEL>
4184                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4185                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4186                ))
4187              (const_int 1)))]
4188   "TARGET_SIMD"
4189   {
4190     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4191     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4192   }
4193   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4194 )
4195
4196 (define_insn "aarch64_sqdmull_lane<mode>"
4197   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4198         (ss_ashift:<VWIDE>
4199              (mult:<VWIDE>
4200                (sign_extend:<VWIDE>
4201                  (match_operand:SD_HSI 1 "register_operand" "w"))
4202                (sign_extend:<VWIDE>
4203                  (vec_select:<VEL>
4204                    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4205                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4206                ))
4207              (const_int 1)))]
4208   "TARGET_SIMD"
4209   {
4210     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4211     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4212   }
4213   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4214 )
4215
4216 (define_insn "aarch64_sqdmull_laneq<mode>"
4217   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4218         (ss_ashift:<VWIDE>
4219              (mult:<VWIDE>
4220                (sign_extend:<VWIDE>
4221                  (match_operand:SD_HSI 1 "register_operand" "w"))
4222                (sign_extend:<VWIDE>
4223                  (vec_select:<VEL>
4224                    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4225                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4226                ))
4227              (const_int 1)))]
4228   "TARGET_SIMD"
4229   {
4230     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4231     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4232   }
4233   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4234 )
4235
4236 ;; vqdmull_n
4237
4238 (define_insn "aarch64_sqdmull_n<mode>"
4239   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4240         (ss_ashift:<VWIDE>
4241              (mult:<VWIDE>
4242                (sign_extend:<VWIDE>
4243                  (match_operand:VD_HSI 1 "register_operand" "w"))
4244                (sign_extend:<VWIDE>
4245                  (vec_duplicate:VD_HSI
4246                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4247                )
4248              (const_int 1)))]
4249   "TARGET_SIMD"
4250   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4251   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4252 )
4253
4254 ;; vqdmull2
4255
4256
4257
4258 (define_insn "aarch64_sqdmull2<mode>_internal"
4259   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4260         (ss_ashift:<VWIDE>
4261              (mult:<VWIDE>
4262                (sign_extend:<VWIDE>
4263                  (vec_select:<VHALF>
4264                    (match_operand:VQ_HSI 1 "register_operand" "w")
4265                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4266                (sign_extend:<VWIDE>
4267                  (vec_select:<VHALF>
4268                    (match_operand:VQ_HSI 2 "register_operand" "w")
4269                    (match_dup 3)))
4270                )
4271              (const_int 1)))]
4272   "TARGET_SIMD"
4273   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4274   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4275 )
4276
4277 (define_expand "aarch64_sqdmull2<mode>"
4278   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4279    (match_operand:VQ_HSI 1 "register_operand" "w")
4280    (match_operand:VQ_HSI 2 "register_operand" "w")]
4281   "TARGET_SIMD"
4282 {
4283   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4284   emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4285                                                   operands[2], p));
4286   DONE;
4287 })
4288
4289 ;; vqdmull2_lane
4290
4291 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4292   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4293         (ss_ashift:<VWIDE>
4294              (mult:<VWIDE>
4295                (sign_extend:<VWIDE>
4296                  (vec_select:<VHALF>
4297                    (match_operand:VQ_HSI 1 "register_operand" "w")
4298                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4299                (sign_extend:<VWIDE>
4300                  (vec_duplicate:<VHALF>
4301                    (vec_select:<VEL>
4302                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4303                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4304                ))
4305              (const_int 1)))]
4306   "TARGET_SIMD"
4307   {
4308     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4309     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4310   }
4311   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4312 )
4313
4314 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4315   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4316         (ss_ashift:<VWIDE>
4317              (mult:<VWIDE>
4318                (sign_extend:<VWIDE>
4319                  (vec_select:<VHALF>
4320                    (match_operand:VQ_HSI 1 "register_operand" "w")
4321                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4322                (sign_extend:<VWIDE>
4323                  (vec_duplicate:<VHALF>
4324                    (vec_select:<VEL>
4325                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4326                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4327                ))
4328              (const_int 1)))]
4329   "TARGET_SIMD"
4330   {
4331     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4332     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4333   }
4334   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4335 )
4336
4337 (define_expand "aarch64_sqdmull2_lane<mode>"
4338   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4339    (match_operand:VQ_HSI 1 "register_operand" "w")
4340    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4341    (match_operand:SI 3 "immediate_operand" "i")]
4342   "TARGET_SIMD"
4343 {
4344   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4345   emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4346                                                        operands[2], operands[3],
4347                                                        p));
4348   DONE;
4349 })
4350
4351 (define_expand "aarch64_sqdmull2_laneq<mode>"
4352   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4353    (match_operand:VQ_HSI 1 "register_operand" "w")
4354    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4355    (match_operand:SI 3 "immediate_operand" "i")]
4356   "TARGET_SIMD"
4357 {
4358   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4359   emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4360                                                        operands[2], operands[3],
4361                                                        p));
4362   DONE;
4363 })
4364
4365 ;; vqdmull2_n
4366
4367 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4368   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4369         (ss_ashift:<VWIDE>
4370              (mult:<VWIDE>
4371                (sign_extend:<VWIDE>
4372                  (vec_select:<VHALF>
4373                    (match_operand:VQ_HSI 1 "register_operand" "w")
4374                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4375                (sign_extend:<VWIDE>
4376                  (vec_duplicate:<VHALF>
4377                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4378                )
4379              (const_int 1)))]
4380   "TARGET_SIMD"
4381   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4382   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4383 )
4384
4385 (define_expand "aarch64_sqdmull2_n<mode>"
4386   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4387    (match_operand:VQ_HSI 1 "register_operand" "w")
4388    (match_operand:<VEL> 2 "register_operand" "w")]
4389   "TARGET_SIMD"
4390 {
4391   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4392   emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4393                                                     operands[2], p));
4394   DONE;
4395 })
4396
4397 ;; vshl
4398
4399 (define_insn "aarch64_<sur>shl<mode>"
4400   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4401         (unspec:VSDQ_I_DI
4402           [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4403            (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4404          VSHL))]
4405   "TARGET_SIMD"
4406   "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4407   [(set_attr "type" "neon_shift_reg<q>")]
4408 )
4409
4410
4411 ;; vqshl
4412
4413 (define_insn "aarch64_<sur>q<r>shl<mode>"
4414   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4415         (unspec:VSDQ_I
4416           [(match_operand:VSDQ_I 1 "register_operand" "w")
4417            (match_operand:VSDQ_I 2 "register_operand" "w")]
4418          VQSHL))]
4419   "TARGET_SIMD"
4420   "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4421   [(set_attr "type" "neon_sat_shift_reg<q>")]
4422 )
4423
4424 ;; vshll_n
4425
4426 (define_insn "aarch64_<sur>shll_n<mode>"
4427   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4428         (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4429                          (match_operand:SI 2
4430                            "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4431                          VSHLL))]
4432   "TARGET_SIMD"
4433   {
4434     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4435       return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4436     else
4437       return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4438   }
4439   [(set_attr "type" "neon_shift_imm_long")]
4440 )
4441
4442 ;; vshll_high_n
4443
4444 (define_insn "aarch64_<sur>shll2_n<mode>"
4445   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4446         (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4447                          (match_operand:SI 2 "immediate_operand" "i")]
4448                          VSHLL))]
4449   "TARGET_SIMD"
4450   {
4451     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4452       return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4453     else
4454       return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4455   }
4456   [(set_attr "type" "neon_shift_imm_long")]
4457 )
4458
4459 ;; vrshr_n
4460
4461 (define_insn "aarch64_<sur>shr_n<mode>"
4462   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4463         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4464                            (match_operand:SI 2
4465                              "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4466                           VRSHR_N))]
4467   "TARGET_SIMD"
4468   "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4469   [(set_attr "type" "neon_sat_shift_imm<q>")]
4470 )
4471
4472 ;; v(r)sra_n
4473
4474 (define_insn "aarch64_<sur>sra_n<mode>"
4475   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4476         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4477                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4478                        (match_operand:SI 3
4479                          "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4480                       VSRA))]
4481   "TARGET_SIMD"
4482   "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4483   [(set_attr "type" "neon_shift_acc<q>")]
4484 )
4485
4486 ;; vs<lr>i_n
4487
4488 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4489   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4490         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4491                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4492                        (match_operand:SI 3
4493                          "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4494                       VSLRI))]
4495   "TARGET_SIMD"
4496   "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4497   [(set_attr "type" "neon_shift_imm<q>")]
4498 )
4499
4500 ;; vqshl(u)
4501
4502 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4503   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4504         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4505                        (match_operand:SI 2
4506                          "aarch64_simd_shift_imm_<ve_mode>" "i")]
4507                       VQSHL_N))]
4508   "TARGET_SIMD"
4509   "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4510   [(set_attr "type" "neon_sat_shift_imm<q>")]
4511 )
4512
4513
4514 ;; vq(r)shr(u)n_n
4515
4516 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4517   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4518         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4519                             (match_operand:SI 2
4520                               "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4521                            VQSHRN_N))]
4522   "TARGET_SIMD"
4523   "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4524   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4525 )
4526
4527
4528 ;; cm(eq|ge|gt|lt|le)
4529 ;; Note, we have constraints for Dz and Z as different expanders
4530 ;; have different ideas of what should be passed to this pattern.
4531
4532 (define_insn "aarch64_cm<optab><mode>"
4533   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4534         (neg:<V_INT_EQUIV>
4535           (COMPARISONS:<V_INT_EQUIV>
4536             (match_operand:VDQ_I 1 "register_operand" "w,w")
4537             (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4538           )))]
4539   "TARGET_SIMD"
4540   "@
4541   cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4542   cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4543   [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4544 )
4545
4546 (define_insn_and_split "aarch64_cm<optab>di"
4547   [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4548         (neg:DI
4549           (COMPARISONS:DI
4550             (match_operand:DI 1 "register_operand" "w,w,r")
4551             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4552           )))
4553      (clobber (reg:CC CC_REGNUM))]
4554   "TARGET_SIMD"
4555   "#"
4556   "&& reload_completed"
4557   [(set (match_operand:DI 0 "register_operand")
4558         (neg:DI
4559           (COMPARISONS:DI
4560             (match_operand:DI 1 "register_operand")
4561             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4562           )))]
4563   {
4564     /* If we are in the general purpose register file,
4565        we split to a sequence of comparison and store.  */
4566     if (GP_REGNUM_P (REGNO (operands[0]))
4567         && GP_REGNUM_P (REGNO (operands[1])))
4568       {
4569         machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4570         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4571         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4572         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4573         DONE;
4574       }
4575     /* Otherwise, we expand to a similar pattern which does not
4576        clobber CC_REGNUM.  */
4577   }
4578   [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4579 )
4580
4581 (define_insn "*aarch64_cm<optab>di"
4582   [(set (match_operand:DI 0 "register_operand" "=w,w")
4583         (neg:DI
4584           (COMPARISONS:DI
4585             (match_operand:DI 1 "register_operand" "w,w")
4586             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4587           )))]
4588   "TARGET_SIMD && reload_completed"
4589   "@
4590   cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4591   cm<optab>\t%d0, %d1, #0"
4592   [(set_attr "type" "neon_compare, neon_compare_zero")]
4593 )
4594
4595 ;; cm(hs|hi)
4596
4597 (define_insn "aarch64_cm<optab><mode>"
4598   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4599         (neg:<V_INT_EQUIV>
4600           (UCOMPARISONS:<V_INT_EQUIV>
4601             (match_operand:VDQ_I 1 "register_operand" "w")
4602             (match_operand:VDQ_I 2 "register_operand" "w")
4603           )))]
4604   "TARGET_SIMD"
4605   "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4606   [(set_attr "type" "neon_compare<q>")]
4607 )
4608
4609 (define_insn_and_split "aarch64_cm<optab>di"
4610   [(set (match_operand:DI 0 "register_operand" "=w,r")
4611         (neg:DI
4612           (UCOMPARISONS:DI
4613             (match_operand:DI 1 "register_operand" "w,r")
4614             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4615           )))
4616     (clobber (reg:CC CC_REGNUM))]
4617   "TARGET_SIMD"
4618   "#"
4619   "&& reload_completed"
4620   [(set (match_operand:DI 0 "register_operand")
4621         (neg:DI
4622           (UCOMPARISONS:DI
4623             (match_operand:DI 1 "register_operand")
4624             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4625           )))]
4626   {
4627     /* If we are in the general purpose register file,
4628        we split to a sequence of comparison and store.  */
4629     if (GP_REGNUM_P (REGNO (operands[0]))
4630         && GP_REGNUM_P (REGNO (operands[1])))
4631       {
4632         machine_mode mode = CCmode;
4633         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4634         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4635         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4636         DONE;
4637       }
4638     /* Otherwise, we expand to a similar pattern which does not
4639        clobber CC_REGNUM.  */
4640   }
4641   [(set_attr "type" "neon_compare,multiple")]
4642 )
4643
4644 (define_insn "*aarch64_cm<optab>di"
4645   [(set (match_operand:DI 0 "register_operand" "=w")
4646         (neg:DI
4647           (UCOMPARISONS:DI
4648             (match_operand:DI 1 "register_operand" "w")
4649             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4650           )))]
4651   "TARGET_SIMD && reload_completed"
4652   "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4653   [(set_attr "type" "neon_compare")]
4654 )
4655
4656 ;; cmtst
4657
4658 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4659 ;; we don't have any insns using ne, and aarch64_vcond outputs
4660 ;; not (neg (eq (and x y) 0))
4661 ;; which is rewritten by simplify_rtx as
4662 ;; plus (eq (and x y) 0) -1.
4663
4664 (define_insn "aarch64_cmtst<mode>"
4665   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4666         (plus:<V_INT_EQUIV>
4667           (eq:<V_INT_EQUIV>
4668             (and:VDQ_I
4669               (match_operand:VDQ_I 1 "register_operand" "w")
4670               (match_operand:VDQ_I 2 "register_operand" "w"))
4671             (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4672           (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4673   ]
4674   "TARGET_SIMD"
4675   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4676   [(set_attr "type" "neon_tst<q>")]
4677 )
4678
4679 (define_insn_and_split "aarch64_cmtstdi"
4680   [(set (match_operand:DI 0 "register_operand" "=w,r")
4681         (neg:DI
4682           (ne:DI
4683             (and:DI
4684               (match_operand:DI 1 "register_operand" "w,r")
4685               (match_operand:DI 2 "register_operand" "w,r"))
4686             (const_int 0))))
4687     (clobber (reg:CC CC_REGNUM))]
4688   "TARGET_SIMD"
4689   "#"
4690   "&& reload_completed"
4691   [(set (match_operand:DI 0 "register_operand")
4692         (neg:DI
4693           (ne:DI
4694             (and:DI
4695               (match_operand:DI 1 "register_operand")
4696               (match_operand:DI 2 "register_operand"))
4697             (const_int 0))))]
4698   {
4699     /* If we are in the general purpose register file,
4700        we split to a sequence of comparison and store.  */
4701     if (GP_REGNUM_P (REGNO (operands[0]))
4702         && GP_REGNUM_P (REGNO (operands[1])))
4703       {
4704         rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4705         machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4706         rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4707         rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4708         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4709         DONE;
4710       }
4711     /* Otherwise, we expand to a similar pattern which does not
4712        clobber CC_REGNUM.  */
4713   }
4714   [(set_attr "type" "neon_tst,multiple")]
4715 )
4716
4717 (define_insn "*aarch64_cmtstdi"
4718   [(set (match_operand:DI 0 "register_operand" "=w")
4719         (neg:DI
4720           (ne:DI
4721             (and:DI
4722               (match_operand:DI 1 "register_operand" "w")
4723               (match_operand:DI 2 "register_operand" "w"))
4724             (const_int 0))))]
4725   "TARGET_SIMD"
4726   "cmtst\t%d0, %d1, %d2"
4727   [(set_attr "type" "neon_tst")]
4728 )
4729
4730 ;; fcm(eq|ge|gt|le|lt)
4731
4732 (define_insn "aarch64_cm<optab><mode>"
4733   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4734         (neg:<V_INT_EQUIV>
4735           (COMPARISONS:<V_INT_EQUIV>
4736             (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4737             (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4738           )))]
4739   "TARGET_SIMD"
4740   "@
4741   fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4742   fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4743   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4744 )
4745
4746 ;; fac(ge|gt)
4747 ;; Note we can also handle what would be fac(le|lt) by
4748 ;; generating fac(ge|gt).
4749
4750 (define_insn "aarch64_fac<optab><mode>"
4751   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4752         (neg:<V_INT_EQUIV>
4753           (FAC_COMPARISONS:<V_INT_EQUIV>
4754             (abs:VHSDF_HSDF
4755               (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4756             (abs:VHSDF_HSDF
4757               (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4758   )))]
4759   "TARGET_SIMD"
4760   "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4761   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4762 )
4763
4764 ;; addp
4765
4766 (define_insn "aarch64_addp<mode>"
4767   [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4768         (unspec:VD_BHSI
4769           [(match_operand:VD_BHSI 1 "register_operand" "w")
4770            (match_operand:VD_BHSI 2 "register_operand" "w")]
4771           UNSPEC_ADDP))]
4772   "TARGET_SIMD"
4773   "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4774   [(set_attr "type" "neon_reduc_add<q>")]
4775 )
4776
4777 (define_insn "aarch64_addpdi"
4778   [(set (match_operand:DI 0 "register_operand" "=w")
4779         (unspec:DI
4780           [(match_operand:V2DI 1 "register_operand" "w")]
4781           UNSPEC_ADDP))]
4782   "TARGET_SIMD"
4783   "addp\t%d0, %1.2d"
4784   [(set_attr "type" "neon_reduc_add")]
4785 )
4786
4787 ;; sqrt
4788
4789 (define_expand "sqrt<mode>2"
4790   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4791         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4792   "TARGET_SIMD"
4793 {
4794   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4795     DONE;
4796 })
4797
4798 (define_insn "*sqrt<mode>2"
4799   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4800         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4801   "TARGET_SIMD"
4802   "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4803   [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4804 )
4805
4806 ;; Patterns for vector struct loads and stores.
4807
4808 (define_insn "aarch64_simd_ld2<mode>"
4809   [(set (match_operand:OI 0 "register_operand" "=w")
4810         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4811                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4812                    UNSPEC_LD2))]
4813   "TARGET_SIMD"
4814   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4815   [(set_attr "type" "neon_load2_2reg<q>")]
4816 )
4817
4818 (define_insn "aarch64_simd_ld2r<mode>"
4819   [(set (match_operand:OI 0 "register_operand" "=w")
4820        (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4821                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4822                   UNSPEC_LD2_DUP))]
4823   "TARGET_SIMD"
4824   "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4825   [(set_attr "type" "neon_load2_all_lanes<q>")]
4826 )
4827
4828 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4829   [(set (match_operand:OI 0 "register_operand" "=w")
4830         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4831                     (match_operand:OI 2 "register_operand" "0")
4832                     (match_operand:SI 3 "immediate_operand" "i")
4833                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4834                    UNSPEC_LD2_LANE))]
4835   "TARGET_SIMD"
4836   {
4837     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4838     return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4839   }
4840   [(set_attr "type" "neon_load2_one_lane")]
4841 )
4842
4843 (define_expand "vec_load_lanesoi<mode>"
4844   [(set (match_operand:OI 0 "register_operand" "=w")
4845         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4846                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4847                    UNSPEC_LD2))]
4848   "TARGET_SIMD"
4849 {
4850   if (BYTES_BIG_ENDIAN)
4851     {
4852       rtx tmp = gen_reg_rtx (OImode);
4853       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4854       emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4855       emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4856     }
4857   else
4858     emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4859   DONE;
4860 })
4861
4862 (define_insn "aarch64_simd_st2<mode>"
4863   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4864         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4865                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4866                    UNSPEC_ST2))]
4867   "TARGET_SIMD"
4868   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4869   [(set_attr "type" "neon_store2_2reg<q>")]
4870 )
4871
4872 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4873 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4874   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4875         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4876                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4877                     (match_operand:SI 2 "immediate_operand" "i")]
4878                    UNSPEC_ST2_LANE))]
4879   "TARGET_SIMD"
4880   {
4881     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4882     return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4883   }
4884   [(set_attr "type" "neon_store2_one_lane<q>")]
4885 )
4886
4887 (define_expand "vec_store_lanesoi<mode>"
4888   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4889         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4890                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4891                    UNSPEC_ST2))]
4892   "TARGET_SIMD"
4893 {
4894   if (BYTES_BIG_ENDIAN)
4895     {
4896       rtx tmp = gen_reg_rtx (OImode);
4897       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4898       emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4899       emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4900     }
4901   else
4902     emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4903   DONE;
4904 })
4905
4906 (define_insn "aarch64_simd_ld3<mode>"
4907   [(set (match_operand:CI 0 "register_operand" "=w")
4908         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4909                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4910                    UNSPEC_LD3))]
4911   "TARGET_SIMD"
4912   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4913   [(set_attr "type" "neon_load3_3reg<q>")]
4914 )
4915
4916 (define_insn "aarch64_simd_ld3r<mode>"
4917   [(set (match_operand:CI 0 "register_operand" "=w")
4918        (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4919                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4920                   UNSPEC_LD3_DUP))]
4921   "TARGET_SIMD"
4922   "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4923   [(set_attr "type" "neon_load3_all_lanes<q>")]
4924 )
4925
4926 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4927   [(set (match_operand:CI 0 "register_operand" "=w")
4928         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4929                     (match_operand:CI 2 "register_operand" "0")
4930                     (match_operand:SI 3 "immediate_operand" "i")
4931                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4932                    UNSPEC_LD3_LANE))]
4933   "TARGET_SIMD"
4934 {
4935     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4936     return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4937 }
4938   [(set_attr "type" "neon_load3_one_lane")]
4939 )
4940
4941 (define_expand "vec_load_lanesci<mode>"
4942   [(set (match_operand:CI 0 "register_operand" "=w")
4943         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4944                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4945                    UNSPEC_LD3))]
4946   "TARGET_SIMD"
4947 {
4948   if (BYTES_BIG_ENDIAN)
4949     {
4950       rtx tmp = gen_reg_rtx (CImode);
4951       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4952       emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4953       emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4954     }
4955   else
4956     emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4957   DONE;
4958 })
4959
4960 (define_insn "aarch64_simd_st3<mode>"
4961   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4962         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4963                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4964                    UNSPEC_ST3))]
4965   "TARGET_SIMD"
4966   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4967   [(set_attr "type" "neon_store3_3reg<q>")]
4968 )
4969
4970 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4971 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4972   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4973         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4974                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4975                      (match_operand:SI 2 "immediate_operand" "i")]
4976                     UNSPEC_ST3_LANE))]
4977   "TARGET_SIMD"
4978   {
4979     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4980     return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4981   }
4982   [(set_attr "type" "neon_store3_one_lane<q>")]
4983 )
4984
4985 (define_expand "vec_store_lanesci<mode>"
4986   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4987         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4988                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4989                    UNSPEC_ST3))]
4990   "TARGET_SIMD"
4991 {
4992   if (BYTES_BIG_ENDIAN)
4993     {
4994       rtx tmp = gen_reg_rtx (CImode);
4995       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4996       emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4997       emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4998     }
4999   else
5000     emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
5001   DONE;
5002 })
5003
5004 (define_insn "aarch64_simd_ld4<mode>"
5005   [(set (match_operand:XI 0 "register_operand" "=w")
5006         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5007                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5008                    UNSPEC_LD4))]
5009   "TARGET_SIMD"
5010   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5011   [(set_attr "type" "neon_load4_4reg<q>")]
5012 )
5013
5014 (define_insn "aarch64_simd_ld4r<mode>"
5015   [(set (match_operand:XI 0 "register_operand" "=w")
5016        (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5017                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5018                   UNSPEC_LD4_DUP))]
5019   "TARGET_SIMD"
5020   "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5021   [(set_attr "type" "neon_load4_all_lanes<q>")]
5022 )
5023
5024 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
5025   [(set (match_operand:XI 0 "register_operand" "=w")
5026         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5027                     (match_operand:XI 2 "register_operand" "0")
5028                     (match_operand:SI 3 "immediate_operand" "i")
5029                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5030                    UNSPEC_LD4_LANE))]
5031   "TARGET_SIMD"
5032 {
5033     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5034     return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
5035 }
5036   [(set_attr "type" "neon_load4_one_lane")]
5037 )
5038
5039 (define_expand "vec_load_lanesxi<mode>"
5040   [(set (match_operand:XI 0 "register_operand" "=w")
5041         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5042                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5043                    UNSPEC_LD4))]
5044   "TARGET_SIMD"
5045 {
5046   if (BYTES_BIG_ENDIAN)
5047     {
5048       rtx tmp = gen_reg_rtx (XImode);
5049       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5050       emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
5051       emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
5052     }
5053   else
5054     emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
5055   DONE;
5056 })
5057
5058 (define_insn "aarch64_simd_st4<mode>"
5059   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5060         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5061                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5062                    UNSPEC_ST4))]
5063   "TARGET_SIMD"
5064   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5065   [(set_attr "type" "neon_store4_4reg<q>")]
5066 )
5067
5068 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5069 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
5070   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5071         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5072                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5073                      (match_operand:SI 2 "immediate_operand" "i")]
5074                     UNSPEC_ST4_LANE))]
5075   "TARGET_SIMD"
5076   {
5077     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5078     return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5079   }
5080   [(set_attr "type" "neon_store4_one_lane<q>")]
5081 )
5082
5083 (define_expand "vec_store_lanesxi<mode>"
5084   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5085         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5086                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5087                    UNSPEC_ST4))]
5088   "TARGET_SIMD"
5089 {
5090   if (BYTES_BIG_ENDIAN)
5091     {
5092       rtx tmp = gen_reg_rtx (XImode);
5093       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5094       emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5095       emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5096     }
5097   else
5098     emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5099   DONE;
5100 })
5101
5102 (define_insn_and_split "aarch64_rev_reglist<mode>"
5103 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5104         (unspec:VSTRUCT
5105                    [(match_operand:VSTRUCT 1 "register_operand" "w")
5106                     (match_operand:V16QI 2 "register_operand" "w")]
5107                    UNSPEC_REV_REGLIST))]
5108   "TARGET_SIMD"
5109   "#"
5110   "&& reload_completed"
5111   [(const_int 0)]
5112 {
5113   int i;
5114   int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5115   for (i = 0; i < nregs; i++)
5116     {
5117       rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5118       rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5119       emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5120     }
5121   DONE;
5122 }
5123   [(set_attr "type" "neon_tbl1_q")
5124    (set_attr "length" "<insn_count>")]
5125 )
5126
5127 ;; Reload patterns for AdvSIMD register list operands.
5128
5129 (define_expand "mov<mode>"
5130   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
5131         (match_operand:VSTRUCT 1 "general_operand" ""))]
5132   "TARGET_SIMD"
5133 {
5134   if (can_create_pseudo_p ())
5135     {
5136       if (GET_CODE (operands[0]) != REG)
5137         operands[1] = force_reg (<MODE>mode, operands[1]);
5138     }
5139 })
5140
5141
5142 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5143   [(match_operand:CI 0 "register_operand" "=w")
5144    (match_operand:DI 1 "register_operand" "r")
5145    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5146   "TARGET_SIMD"
5147 {
5148   rtx mem = gen_rtx_MEM (CImode, operands[1]);
5149   emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5150   DONE;
5151 })
5152
5153 (define_insn "aarch64_ld1_x3_<mode>"
5154   [(set (match_operand:CI 0 "register_operand" "=w")
5155         (unspec:CI
5156           [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5157            (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5158   "TARGET_SIMD"
5159   "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5160   [(set_attr "type" "neon_load1_3reg<q>")]
5161 )
5162
5163 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5164   [(match_operand:DI 0 "register_operand" "")
5165    (match_operand:OI 1 "register_operand" "")
5166    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5167   "TARGET_SIMD"
5168 {
5169   rtx mem = gen_rtx_MEM (OImode, operands[0]);
5170   emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5171   DONE;
5172 })
5173
5174 (define_insn "aarch64_st1_x2_<mode>"
5175    [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5176          (unspec:OI
5177           [(match_operand:OI 1 "register_operand" "w")
5178           (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5179   "TARGET_SIMD"
5180   "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5181   [(set_attr "type" "neon_store1_2reg<q>")]
5182 )
5183
5184 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5185   [(match_operand:DI 0 "register_operand" "")
5186    (match_operand:CI 1 "register_operand" "")
5187    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5188   "TARGET_SIMD"
5189 {
5190   rtx mem = gen_rtx_MEM (CImode, operands[0]);
5191   emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5192   DONE;
5193 })
5194
5195 (define_insn "aarch64_st1_x3_<mode>"
5196    [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5197         (unspec:CI
5198          [(match_operand:CI 1 "register_operand" "w")
5199           (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5200   "TARGET_SIMD"
5201   "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5202   [(set_attr "type" "neon_store1_3reg<q>")]
5203 )
5204
5205 (define_insn "*aarch64_mov<mode>"
5206   [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5207         (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5208   "TARGET_SIMD && !BYTES_BIG_ENDIAN
5209    && (register_operand (operands[0], <MODE>mode)
5210        || register_operand (operands[1], <MODE>mode))"
5211   "@
5212    #
5213    st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5214    ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5215   [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5216                      neon_load<nregs>_<nregs>reg_q")
5217    (set_attr "length" "<insn_count>,4,4")]
5218 )
5219
5220 (define_insn "aarch64_be_ld1<mode>"
5221   [(set (match_operand:VALLDI_F16 0     "register_operand" "=w")
5222         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5223                              "aarch64_simd_struct_operand" "Utv")]
5224         UNSPEC_LD1))]
5225   "TARGET_SIMD"
5226   "ld1\\t{%0<Vmtype>}, %1"
5227   [(set_attr "type" "neon_load1_1reg<q>")]
5228 )
5229
5230 (define_insn "aarch64_be_st1<mode>"
5231   [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5232         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5233         UNSPEC_ST1))]
5234   "TARGET_SIMD"
5235   "st1\\t{%1<Vmtype>}, %0"
5236   [(set_attr "type" "neon_store1_1reg<q>")]
5237 )
5238
5239 (define_insn "*aarch64_be_movoi"
5240   [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5241         (match_operand:OI 1 "general_operand"      " w,w,m"))]
5242   "TARGET_SIMD && BYTES_BIG_ENDIAN
5243    && (register_operand (operands[0], OImode)
5244        || register_operand (operands[1], OImode))"
5245   "@
5246    #
5247    stp\\t%q1, %R1, %0
5248    ldp\\t%q0, %R0, %1"
5249   [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5250    (set_attr "length" "8,4,4")]
5251 )
5252
5253 (define_insn "*aarch64_be_movci"
5254   [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5255         (match_operand:CI 1 "general_operand"      " w,w,o"))]
5256   "TARGET_SIMD && BYTES_BIG_ENDIAN
5257    && (register_operand (operands[0], CImode)
5258        || register_operand (operands[1], CImode))"
5259   "#"
5260   [(set_attr "type" "multiple")
5261    (set_attr "length" "12,4,4")]
5262 )
5263
5264 (define_insn "*aarch64_be_movxi"
5265   [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5266         (match_operand:XI 1 "general_operand"      " w,w,o"))]
5267   "TARGET_SIMD && BYTES_BIG_ENDIAN
5268    && (register_operand (operands[0], XImode)
5269        || register_operand (operands[1], XImode))"
5270   "#"
5271   [(set_attr "type" "multiple")
5272    (set_attr "length" "16,4,4")]
5273 )
5274
5275 (define_split
5276   [(set (match_operand:OI 0 "register_operand")
5277         (match_operand:OI 1 "register_operand"))]
5278   "TARGET_SIMD && reload_completed"
5279   [(const_int 0)]
5280 {
5281   aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5282   DONE;
5283 })
5284
5285 (define_split
5286   [(set (match_operand:CI 0 "nonimmediate_operand")
5287         (match_operand:CI 1 "general_operand"))]
5288   "TARGET_SIMD && reload_completed"
5289   [(const_int 0)]
5290 {
5291   if (register_operand (operands[0], CImode)
5292       && register_operand (operands[1], CImode))
5293     {
5294       aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5295       DONE;
5296     }
5297   else if (BYTES_BIG_ENDIAN)
5298     {
5299       emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5300                       simplify_gen_subreg (OImode, operands[1], CImode, 0));
5301       emit_move_insn (gen_lowpart (V16QImode,
5302                                    simplify_gen_subreg (TImode, operands[0],
5303                                                         CImode, 32)),
5304                       gen_lowpart (V16QImode,
5305                                    simplify_gen_subreg (TImode, operands[1],
5306                                                         CImode, 32)));
5307       DONE;
5308     }
5309   else
5310     FAIL;
5311 })
5312
5313 (define_split
5314   [(set (match_operand:XI 0 "nonimmediate_operand")
5315         (match_operand:XI 1 "general_operand"))]
5316   "TARGET_SIMD && reload_completed"
5317   [(const_int 0)]
5318 {
5319   if (register_operand (operands[0], XImode)
5320       && register_operand (operands[1], XImode))
5321     {
5322       aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5323       DONE;
5324     }
5325   else if (BYTES_BIG_ENDIAN)
5326     {
5327       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5328                       simplify_gen_subreg (OImode, operands[1], XImode, 0));
5329       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5330                       simplify_gen_subreg (OImode, operands[1], XImode, 32));
5331       DONE;
5332     }
5333   else
5334     FAIL;
5335 })
5336
5337 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5338   [(match_operand:VSTRUCT 0 "register_operand" "=w")
5339    (match_operand:DI 1 "register_operand" "w")
5340    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5341   "TARGET_SIMD"
5342 {
5343   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5344   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5345                      * <VSTRUCT:nregs>);
5346
5347   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5348                                                                 mem));
5349   DONE;
5350 })
5351
5352 (define_insn "aarch64_ld2<mode>_dreg"
5353   [(set (match_operand:OI 0 "register_operand" "=w")
5354         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5355                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5356                    UNSPEC_LD2_DREG))]
5357   "TARGET_SIMD"
5358   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5359   [(set_attr "type" "neon_load2_2reg<q>")]
5360 )
5361
5362 (define_insn "aarch64_ld2<mode>_dreg"
5363   [(set (match_operand:OI 0 "register_operand" "=w")
5364         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5365                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5366                    UNSPEC_LD2_DREG))]
5367   "TARGET_SIMD"
5368   "ld1\\t{%S0.1d - %T0.1d}, %1"
5369   [(set_attr "type" "neon_load1_2reg<q>")]
5370 )
5371
5372 (define_insn "aarch64_ld3<mode>_dreg"
5373   [(set (match_operand:CI 0 "register_operand" "=w")
5374         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5375                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5376                    UNSPEC_LD3_DREG))]
5377   "TARGET_SIMD"
5378   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5379   [(set_attr "type" "neon_load3_3reg<q>")]
5380 )
5381
5382 (define_insn "aarch64_ld3<mode>_dreg"
5383   [(set (match_operand:CI 0 "register_operand" "=w")
5384         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5385                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5386                    UNSPEC_LD3_DREG))]
5387   "TARGET_SIMD"
5388   "ld1\\t{%S0.1d - %U0.1d}, %1"
5389   [(set_attr "type" "neon_load1_3reg<q>")]
5390 )
5391
5392 (define_insn "aarch64_ld4<mode>_dreg"
5393   [(set (match_operand:XI 0 "register_operand" "=w")
5394         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5395                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5396                    UNSPEC_LD4_DREG))]
5397   "TARGET_SIMD"
5398   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5399   [(set_attr "type" "neon_load4_4reg<q>")]
5400 )
5401
5402 (define_insn "aarch64_ld4<mode>_dreg"
5403   [(set (match_operand:XI 0 "register_operand" "=w")
5404         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5405                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5406                    UNSPEC_LD4_DREG))]
5407   "TARGET_SIMD"
5408   "ld1\\t{%S0.1d - %V0.1d}, %1"
5409   [(set_attr "type" "neon_load1_4reg<q>")]
5410 )
5411
5412 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5413  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5414   (match_operand:DI 1 "register_operand" "r")
5415   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5416   "TARGET_SIMD"
5417 {
5418   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5419   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5420
5421   emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5422   DONE;
5423 })
5424
5425 (define_expand "aarch64_ld1<VALL_F16:mode>"
5426  [(match_operand:VALL_F16 0 "register_operand")
5427   (match_operand:DI 1 "register_operand")]
5428   "TARGET_SIMD"
5429 {
5430   machine_mode mode = <VALL_F16:MODE>mode;
5431   rtx mem = gen_rtx_MEM (mode, operands[1]);
5432
5433   if (BYTES_BIG_ENDIAN)
5434     emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5435   else
5436     emit_move_insn (operands[0], mem);
5437   DONE;
5438 })
5439
5440 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5441  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5442   (match_operand:DI 1 "register_operand" "r")
5443   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5444   "TARGET_SIMD"
5445 {
5446   machine_mode mode = <VSTRUCT:MODE>mode;
5447   rtx mem = gen_rtx_MEM (mode, operands[1]);
5448
5449   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5450   DONE;
5451 })
5452
5453 (define_expand "aarch64_ld1x2<VQ:mode>"
5454  [(match_operand:OI 0 "register_operand" "=w")
5455   (match_operand:DI 1 "register_operand" "r")
5456   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5457   "TARGET_SIMD"
5458 {
5459   machine_mode mode = OImode;
5460   rtx mem = gen_rtx_MEM (mode, operands[1]);
5461
5462   emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5463   DONE;
5464 })
5465
5466 (define_expand "aarch64_ld1x2<VDC:mode>"
5467  [(match_operand:OI 0 "register_operand" "=w")
5468   (match_operand:DI 1 "register_operand" "r")
5469   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5470   "TARGET_SIMD"
5471 {
5472   machine_mode mode = OImode;
5473   rtx mem = gen_rtx_MEM (mode, operands[1]);
5474
5475   emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5476   DONE;
5477 })
5478
5479
5480 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5481   [(match_operand:VSTRUCT 0 "register_operand" "=w")
5482         (match_operand:DI 1 "register_operand" "w")
5483         (match_operand:VSTRUCT 2 "register_operand" "0")
5484         (match_operand:SI 3 "immediate_operand" "i")
5485         (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5486   "TARGET_SIMD"
5487 {
5488   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5489   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5490                      * <VSTRUCT:nregs>);
5491
5492   aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5493   emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5494         operands[0], mem, operands[2], operands[3]));
5495   DONE;
5496 })
5497
5498 ;; Expanders for builtins to extract vector registers from large
5499 ;; opaque integer modes.
5500
5501 ;; D-register list.
5502
5503 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5504  [(match_operand:VDC 0 "register_operand" "=w")
5505   (match_operand:VSTRUCT 1 "register_operand" "w")
5506   (match_operand:SI 2 "immediate_operand" "i")]
5507   "TARGET_SIMD"
5508 {
5509   int part = INTVAL (operands[2]);
5510   rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5511   int offset = part * 16;
5512
5513   emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5514   emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5515   DONE;
5516 })
5517
5518 ;; Q-register list.
5519
5520 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5521  [(match_operand:VQ 0 "register_operand" "=w")
5522   (match_operand:VSTRUCT 1 "register_operand" "w")
5523   (match_operand:SI 2 "immediate_operand" "i")]
5524   "TARGET_SIMD"
5525 {
5526   int part = INTVAL (operands[2]);
5527   int offset = part * 16;
5528
5529   emit_move_insn (operands[0],
5530                   gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5531   DONE;
5532 })
5533
5534 ;; Permuted-store expanders for neon intrinsics.
5535
5536 ;; Permute instructions
5537
5538 ;; vec_perm support
5539
5540 (define_expand "vec_perm<mode>"
5541   [(match_operand:VB 0 "register_operand")
5542    (match_operand:VB 1 "register_operand")
5543    (match_operand:VB 2 "register_operand")
5544    (match_operand:VB 3 "register_operand")]
5545   "TARGET_SIMD"
5546 {
5547   aarch64_expand_vec_perm (operands[0], operands[1],
5548                            operands[2], operands[3], <nunits>);
5549   DONE;
5550 })
5551
5552 (define_insn "aarch64_tbl1<mode>"
5553   [(set (match_operand:VB 0 "register_operand" "=w")
5554         (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5555                     (match_operand:VB 2 "register_operand" "w")]
5556                    UNSPEC_TBL))]
5557   "TARGET_SIMD"
5558   "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5559   [(set_attr "type" "neon_tbl1<q>")]
5560 )
5561
5562 ;; Two source registers.
5563
5564 (define_insn "aarch64_tbl2v16qi"
5565   [(set (match_operand:V16QI 0 "register_operand" "=w")
5566         (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5567                        (match_operand:V16QI 2 "register_operand" "w")]
5568                       UNSPEC_TBL))]
5569   "TARGET_SIMD"
5570   "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5571   [(set_attr "type" "neon_tbl2_q")]
5572 )
5573
5574 (define_insn "aarch64_tbl3<mode>"
5575   [(set (match_operand:VB 0 "register_operand" "=w")
5576         (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5577                       (match_operand:VB 2 "register_operand" "w")]
5578                       UNSPEC_TBL))]
5579   "TARGET_SIMD"
5580   "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5581   [(set_attr "type" "neon_tbl3")]
5582 )
5583
5584 (define_insn "aarch64_tbx4<mode>"
5585   [(set (match_operand:VB 0 "register_operand" "=w")
5586         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5587                       (match_operand:OI 2 "register_operand" "w")
5588                       (match_operand:VB 3 "register_operand" "w")]
5589                       UNSPEC_TBX))]
5590   "TARGET_SIMD"
5591   "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5592   [(set_attr "type" "neon_tbl4")]
5593 )
5594
5595 ;; Three source registers.
5596
5597 (define_insn "aarch64_qtbl3<mode>"
5598   [(set (match_operand:VB 0 "register_operand" "=w")
5599         (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5600                       (match_operand:VB 2 "register_operand" "w")]
5601                       UNSPEC_TBL))]
5602   "TARGET_SIMD"
5603   "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5604   [(set_attr "type" "neon_tbl3")]
5605 )
5606
5607 (define_insn "aarch64_qtbx3<mode>"
5608   [(set (match_operand:VB 0 "register_operand" "=w")
5609         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5610                       (match_operand:CI 2 "register_operand" "w")
5611                       (match_operand:VB 3 "register_operand" "w")]
5612                       UNSPEC_TBX))]
5613   "TARGET_SIMD"
5614   "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5615   [(set_attr "type" "neon_tbl3")]
5616 )
5617
5618 ;; Four source registers.
5619
5620 (define_insn "aarch64_qtbl4<mode>"
5621   [(set (match_operand:VB 0 "register_operand" "=w")
5622         (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5623                       (match_operand:VB 2 "register_operand" "w")]
5624                       UNSPEC_TBL))]
5625   "TARGET_SIMD"
5626   "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5627   [(set_attr "type" "neon_tbl4")]
5628 )
5629
5630 (define_insn "aarch64_qtbx4<mode>"
5631   [(set (match_operand:VB 0 "register_operand" "=w")
5632         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5633                       (match_operand:XI 2 "register_operand" "w")
5634                       (match_operand:VB 3 "register_operand" "w")]
5635                       UNSPEC_TBX))]
5636   "TARGET_SIMD"
5637   "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5638   [(set_attr "type" "neon_tbl4")]
5639 )
5640
5641 (define_insn_and_split "aarch64_combinev16qi"
5642   [(set (match_operand:OI 0 "register_operand" "=w")
5643         (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5644                     (match_operand:V16QI 2 "register_operand" "w")]
5645                    UNSPEC_CONCAT))]
5646   "TARGET_SIMD"
5647   "#"
5648   "&& reload_completed"
5649   [(const_int 0)]
5650 {
5651   aarch64_split_combinev16qi (operands);
5652   DONE;
5653 }
5654 [(set_attr "type" "multiple")]
5655 )
5656
5657 ;; This instruction's pattern is generated directly by
5658 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5659 ;; need corresponding changes there.
5660 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5661   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5662         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5663                           (match_operand:VALL_F16 2 "register_operand" "w")]
5664          PERMUTE))]
5665   "TARGET_SIMD"
5666   "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5667   [(set_attr "type" "neon_permute<q>")]
5668 )
5669
5670 ;; This instruction's pattern is generated directly by
5671 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5672 ;; need corresponding changes there.  Note that the immediate (third)
5673 ;; operand is a lane index not a byte index.
5674 (define_insn "aarch64_ext<mode>"
5675   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5676         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5677                           (match_operand:VALL_F16 2 "register_operand" "w")
5678                           (match_operand:SI 3 "immediate_operand" "i")]
5679          UNSPEC_EXT))]
5680   "TARGET_SIMD"
5681 {
5682   operands[3] = GEN_INT (INTVAL (operands[3])
5683       * GET_MODE_UNIT_SIZE (<MODE>mode));
5684   return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5685 }
5686   [(set_attr "type" "neon_ext<q>")]
5687 )
5688
5689 ;; This instruction's pattern is generated directly by
5690 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5691 ;; need corresponding changes there.
5692 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5693   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5694         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5695                     REVERSE))]
5696   "TARGET_SIMD"
5697   "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5698   [(set_attr "type" "neon_rev<q>")]
5699 )
5700
5701 (define_insn "aarch64_st2<mode>_dreg"
5702   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5703         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5704                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5705                    UNSPEC_ST2))]
5706   "TARGET_SIMD"
5707   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5708   [(set_attr "type" "neon_store2_2reg")]
5709 )
5710
5711 (define_insn "aarch64_st2<mode>_dreg"
5712   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5713         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5714                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5715                    UNSPEC_ST2))]
5716   "TARGET_SIMD"
5717   "st1\\t{%S1.1d - %T1.1d}, %0"
5718   [(set_attr "type" "neon_store1_2reg")]
5719 )
5720
5721 (define_insn "aarch64_st3<mode>_dreg"
5722   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5723         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5724                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5725                    UNSPEC_ST3))]
5726   "TARGET_SIMD"
5727   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5728   [(set_attr "type" "neon_store3_3reg")]
5729 )
5730
5731 (define_insn "aarch64_st3<mode>_dreg"
5732   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5733         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5734                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5735                    UNSPEC_ST3))]
5736   "TARGET_SIMD"
5737   "st1\\t{%S1.1d - %U1.1d}, %0"
5738   [(set_attr "type" "neon_store1_3reg")]
5739 )
5740
5741 (define_insn "aarch64_st4<mode>_dreg"
5742   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5743         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5744                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5745                    UNSPEC_ST4))]
5746   "TARGET_SIMD"
5747   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5748   [(set_attr "type" "neon_store4_4reg")]
5749 )
5750
5751 (define_insn "aarch64_st4<mode>_dreg"
5752   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5753         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5754                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5755                    UNSPEC_ST4))]
5756   "TARGET_SIMD"
5757   "st1\\t{%S1.1d - %V1.1d}, %0"
5758   [(set_attr "type" "neon_store1_4reg")]
5759 )
5760
5761 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5762  [(match_operand:DI 0 "register_operand" "r")
5763   (match_operand:VSTRUCT 1 "register_operand" "w")
5764   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5765   "TARGET_SIMD"
5766 {
5767   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5768   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5769
5770   emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5771   DONE;
5772 })
5773
5774 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5775  [(match_operand:DI 0 "register_operand" "r")
5776   (match_operand:VSTRUCT 1 "register_operand" "w")
5777   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5778   "TARGET_SIMD"
5779 {
5780   machine_mode mode = <VSTRUCT:MODE>mode;
5781   rtx mem = gen_rtx_MEM (mode, operands[0]);
5782
5783   emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5784   DONE;
5785 })
5786
5787 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5788  [(match_operand:DI 0 "register_operand" "r")
5789   (match_operand:VSTRUCT 1 "register_operand" "w")
5790   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5791   (match_operand:SI 2 "immediate_operand")]
5792   "TARGET_SIMD"
5793 {
5794   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5795   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5796                      * <VSTRUCT:nregs>);
5797
5798   emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5799                 mem, operands[1], operands[2]));
5800   DONE;
5801 })
5802
5803 (define_expand "aarch64_st1<VALL_F16:mode>"
5804  [(match_operand:DI 0 "register_operand")
5805   (match_operand:VALL_F16 1 "register_operand")]
5806   "TARGET_SIMD"
5807 {
5808   machine_mode mode = <VALL_F16:MODE>mode;
5809   rtx mem = gen_rtx_MEM (mode, operands[0]);
5810
5811   if (BYTES_BIG_ENDIAN)
5812     emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5813   else
5814     emit_move_insn (mem, operands[1]);
5815   DONE;
5816 })
5817
5818 ;; Expander for builtins to insert vector registers into large
5819 ;; opaque integer modes.
5820
5821 ;; Q-register list.  We don't need a D-reg inserter as we zero
5822 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5823
5824 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5825  [(match_operand:VSTRUCT 0 "register_operand" "+w")
5826   (match_operand:VSTRUCT 1 "register_operand" "0")
5827   (match_operand:VQ 2 "register_operand" "w")
5828   (match_operand:SI 3 "immediate_operand" "i")]
5829   "TARGET_SIMD"
5830 {
5831   int part = INTVAL (operands[3]);
5832   int offset = part * 16;
5833
5834   emit_move_insn (operands[0], operands[1]);
5835   emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5836                   operands[2]);
5837   DONE;
5838 })
5839
5840 ;; Standard pattern name vec_init<mode><Vel>.
5841
5842 (define_expand "vec_init<mode><Vel>"
5843   [(match_operand:VALL_F16 0 "register_operand" "")
5844    (match_operand 1 "" "")]
5845   "TARGET_SIMD"
5846 {
5847   aarch64_expand_vector_init (operands[0], operands[1]);
5848   DONE;
5849 })
5850
5851 (define_insn "*aarch64_simd_ld1r<mode>"
5852   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5853         (vec_duplicate:VALL_F16
5854           (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5855   "TARGET_SIMD"
5856   "ld1r\\t{%0.<Vtype>}, %1"
5857   [(set_attr "type" "neon_load1_all_lanes")]
5858 )
5859
5860 (define_insn "aarch64_simd_ld1<mode>_x2"
5861   [(set (match_operand:OI 0 "register_operand" "=w")
5862         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5863                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5864                    UNSPEC_LD1))]
5865   "TARGET_SIMD"
5866   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5867   [(set_attr "type" "neon_load1_2reg<q>")]
5868 )
5869
5870 (define_insn "aarch64_simd_ld1<mode>_x2"
5871   [(set (match_operand:OI 0 "register_operand" "=w")
5872         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5873                     (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5874                    UNSPEC_LD1))]
5875   "TARGET_SIMD"
5876   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5877   [(set_attr "type" "neon_load1_2reg<q>")]
5878 )
5879
5880
5881 (define_insn "aarch64_frecpe<mode>"
5882   [(set (match_operand:VHSDF 0 "register_operand" "=w")
5883         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5884          UNSPEC_FRECPE))]
5885   "TARGET_SIMD"
5886   "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5887   [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5888 )
5889
5890 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5891   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5892         (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5893          FRECP))]
5894   "TARGET_SIMD"
5895   "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5896   [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5897 )
5898
5899 (define_insn "aarch64_frecps<mode>"
5900   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5901         (unspec:VHSDF_HSDF
5902           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5903           (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5904           UNSPEC_FRECPS))]
5905   "TARGET_SIMD"
5906   "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5907   [(set_attr "type" "neon_fp_recps_<stype><q>")]
5908 )
5909
5910 (define_insn "aarch64_urecpe<mode>"
5911   [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5912         (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5913                 UNSPEC_URECPE))]
5914  "TARGET_SIMD"
5915  "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5916   [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5917
5918 ;; Standard pattern name vec_extract<mode><Vel>.
5919
5920 (define_expand "vec_extract<mode><Vel>"
5921   [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5922    (match_operand:VALL_F16 1 "register_operand" "")
5923    (match_operand:SI 2 "immediate_operand" "")]
5924   "TARGET_SIMD"
5925 {
5926     emit_insn
5927       (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5928     DONE;
5929 })
5930
5931 ;; aes
5932
5933 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5934   [(set (match_operand:V16QI 0 "register_operand" "=w")
5935         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%0")
5936                        (match_operand:V16QI 2 "register_operand" "w")]
5937          CRYPTO_AES))]
5938   "TARGET_SIMD && TARGET_AES"
5939   "aes<aes_op>\\t%0.16b, %2.16b"
5940   [(set_attr "type" "crypto_aese")]
5941 )
5942
5943 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
5944   [(set (match_operand:V16QI 0 "register_operand" "=w")
5945         (unspec:V16QI [(xor:V16QI
5946                         (match_operand:V16QI 1 "register_operand" "%0")
5947                         (match_operand:V16QI 2 "register_operand" "w"))
5948                        (match_operand:V16QI 3 "aarch64_simd_imm_zero" "")]
5949                        CRYPTO_AES))]
5950   "TARGET_SIMD && TARGET_AES"
5951   "aes<aes_op>\\t%0.16b, %2.16b"
5952   [(set_attr "type" "crypto_aese")]
5953 )
5954
5955 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
5956   [(set (match_operand:V16QI 0 "register_operand" "=w")
5957         (unspec:V16QI [(match_operand:V16QI 3 "aarch64_simd_imm_zero" "")
5958         (xor:V16QI (match_operand:V16QI 1 "register_operand" "%0")
5959                    (match_operand:V16QI 2 "register_operand" "w"))]
5960         CRYPTO_AES))]
5961   "TARGET_SIMD && TARGET_AES"
5962   "aes<aes_op>\\t%0.16b, %2.16b"
5963   [(set_attr "type" "crypto_aese")]
5964 )
5965
5966 ;; When AES/AESMC fusion is enabled we want the register allocation to
5967 ;; look like:
5968 ;;    AESE Vn, _
5969 ;;    AESMC Vn, Vn
5970 ;; So prefer to tie operand 1 to operand 0 when fusing.
5971
5972 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5973   [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5974         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5975          CRYPTO_AESMC))]
5976   "TARGET_SIMD && TARGET_AES"
5977   "aes<aesmc_op>\\t%0.16b, %1.16b"
5978   [(set_attr "type" "crypto_aesmc")
5979    (set_attr_alternative "enabled"
5980      [(if_then_else (match_test
5981                        "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5982                      (const_string "yes" )
5983                      (const_string "no"))
5984       (const_string "yes")])]
5985 )
5986
5987 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
5988 ;; and enforce the register dependency without scheduling or register
5989 ;; allocation messing up the order or introducing moves inbetween.
5990 ;;  Mash the two together during combine.
5991
5992 (define_insn "*aarch64_crypto_aese_fused"
5993   [(set (match_operand:V16QI 0 "register_operand" "=&w")
5994         (unspec:V16QI
5995           [(unspec:V16QI
5996             [(match_operand:V16QI 1 "register_operand" "0")
5997              (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE)
5998           ] UNSPEC_AESMC))]
5999   "TARGET_SIMD && TARGET_AES
6000    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6001   "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
6002   [(set_attr "type" "crypto_aese")
6003    (set_attr "length" "8")]
6004 )
6005
6006 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
6007 ;; and enforce the register dependency without scheduling or register
6008 ;; allocation messing up the order or introducing moves inbetween.
6009 ;;  Mash the two together during combine.
6010
6011 (define_insn "*aarch64_crypto_aesd_fused"
6012   [(set (match_operand:V16QI 0 "register_operand" "=&w")
6013         (unspec:V16QI
6014           [(unspec:V16QI
6015             [(match_operand:V16QI 1 "register_operand" "0")
6016              (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD)
6017           ] UNSPEC_AESIMC))]
6018   "TARGET_SIMD && TARGET_AES
6019    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6020   "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
6021   [(set_attr "type" "crypto_aese")
6022    (set_attr "length" "8")]
6023 )
6024
6025 ;; sha1
6026
6027 (define_insn "aarch64_crypto_sha1hsi"
6028   [(set (match_operand:SI 0 "register_operand" "=w")
6029         (unspec:SI [(match_operand:SI 1
6030                        "register_operand" "w")]
6031          UNSPEC_SHA1H))]
6032   "TARGET_SIMD && TARGET_SHA2"
6033   "sha1h\\t%s0, %s1"
6034   [(set_attr "type" "crypto_sha1_fast")]
6035 )
6036
6037 (define_insn "aarch64_crypto_sha1hv4si"
6038   [(set (match_operand:SI 0 "register_operand" "=w")
6039         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6040                      (parallel [(const_int 0)]))]
6041          UNSPEC_SHA1H))]
6042   "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
6043   "sha1h\\t%s0, %s1"
6044   [(set_attr "type" "crypto_sha1_fast")]
6045 )
6046
6047 (define_insn "aarch64_be_crypto_sha1hv4si"
6048   [(set (match_operand:SI 0 "register_operand" "=w")
6049         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6050                      (parallel [(const_int 3)]))]
6051          UNSPEC_SHA1H))]
6052   "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
6053   "sha1h\\t%s0, %s1"
6054   [(set_attr "type" "crypto_sha1_fast")]
6055 )
6056
6057 (define_insn "aarch64_crypto_sha1su1v4si"
6058   [(set (match_operand:V4SI 0 "register_operand" "=w")
6059         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6060                       (match_operand:V4SI 2 "register_operand" "w")]
6061          UNSPEC_SHA1SU1))]
6062   "TARGET_SIMD && TARGET_SHA2"
6063   "sha1su1\\t%0.4s, %2.4s"
6064   [(set_attr "type" "crypto_sha1_fast")]
6065 )
6066
6067 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
6068   [(set (match_operand:V4SI 0 "register_operand" "=w")
6069         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6070                       (match_operand:SI 2 "register_operand" "w")
6071                       (match_operand:V4SI 3 "register_operand" "w")]
6072          CRYPTO_SHA1))]
6073   "TARGET_SIMD && TARGET_SHA2"
6074   "sha1<sha1_op>\\t%q0, %s2, %3.4s"
6075   [(set_attr "type" "crypto_sha1_slow")]
6076 )
6077
6078 (define_insn "aarch64_crypto_sha1su0v4si"
6079   [(set (match_operand:V4SI 0 "register_operand" "=w")
6080         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6081                       (match_operand:V4SI 2 "register_operand" "w")
6082                       (match_operand:V4SI 3 "register_operand" "w")]
6083          UNSPEC_SHA1SU0))]
6084   "TARGET_SIMD && TARGET_SHA2"
6085   "sha1su0\\t%0.4s, %2.4s, %3.4s"
6086   [(set_attr "type" "crypto_sha1_xor")]
6087 )
6088
6089 ;; sha256
6090
6091 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6092   [(set (match_operand:V4SI 0 "register_operand" "=w")
6093         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6094                       (match_operand:V4SI 2 "register_operand" "w")
6095                       (match_operand:V4SI 3 "register_operand" "w")]
6096          CRYPTO_SHA256))]
6097   "TARGET_SIMD && TARGET_SHA2"
6098   "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
6099   [(set_attr "type" "crypto_sha256_slow")]
6100 )
6101
6102 (define_insn "aarch64_crypto_sha256su0v4si"
6103   [(set (match_operand:V4SI 0 "register_operand" "=w")
6104         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6105                       (match_operand:V4SI 2 "register_operand" "w")]
6106          UNSPEC_SHA256SU0))]
6107   "TARGET_SIMD && TARGET_SHA2"
6108   "sha256su0\\t%0.4s, %2.4s"
6109   [(set_attr "type" "crypto_sha256_fast")]
6110 )
6111
6112 (define_insn "aarch64_crypto_sha256su1v4si"
6113   [(set (match_operand:V4SI 0 "register_operand" "=w")
6114         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6115                       (match_operand:V4SI 2 "register_operand" "w")
6116                       (match_operand:V4SI 3 "register_operand" "w")]
6117          UNSPEC_SHA256SU1))]
6118   "TARGET_SIMD && TARGET_SHA2"
6119   "sha256su1\\t%0.4s, %2.4s, %3.4s"
6120   [(set_attr "type" "crypto_sha256_slow")]
6121 )
6122
6123 ;; sha512
6124
6125 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6126   [(set (match_operand:V2DI 0 "register_operand" "=w")
6127         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6128                       (match_operand:V2DI 2 "register_operand" "w")
6129                       (match_operand:V2DI 3 "register_operand" "w")]
6130          CRYPTO_SHA512))]
6131   "TARGET_SIMD && TARGET_SHA3"
6132   "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6133   [(set_attr "type" "crypto_sha512")]
6134 )
6135
6136 (define_insn "aarch64_crypto_sha512su0qv2di"
6137   [(set (match_operand:V2DI 0 "register_operand" "=w")
6138         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6139                       (match_operand:V2DI 2 "register_operand" "w")]
6140          UNSPEC_SHA512SU0))]
6141   "TARGET_SIMD && TARGET_SHA3"
6142   "sha512su0\\t%0.2d, %2.2d"
6143   [(set_attr "type" "crypto_sha512")]
6144 )
6145
6146 (define_insn "aarch64_crypto_sha512su1qv2di"
6147   [(set (match_operand:V2DI 0 "register_operand" "=w")
6148         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6149                       (match_operand:V2DI 2 "register_operand" "w")
6150                       (match_operand:V2DI 3 "register_operand" "w")]
6151          UNSPEC_SHA512SU1))]
6152   "TARGET_SIMD && TARGET_SHA3"
6153   "sha512su1\\t%0.2d, %2.2d, %3.2d"
6154   [(set_attr "type" "crypto_sha512")]
6155 )
6156
6157 ;; sha3
6158
6159 (define_insn "eor3q<mode>4"
6160   [(set (match_operand:VQ_I 0 "register_operand" "=w")
6161         (xor:VQ_I
6162          (xor:VQ_I
6163           (match_operand:VQ_I 2 "register_operand" "w")
6164           (match_operand:VQ_I 3 "register_operand" "w"))
6165          (match_operand:VQ_I 1 "register_operand" "w")))]
6166   "TARGET_SIMD && TARGET_SHA3"
6167   "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6168   [(set_attr "type" "crypto_sha3")]
6169 )
6170
6171 (define_insn "aarch64_rax1qv2di"
6172   [(set (match_operand:V2DI 0 "register_operand" "=w")
6173         (xor:V2DI
6174          (rotate:V2DI
6175           (match_operand:V2DI 2 "register_operand" "w")
6176           (const_int 1))
6177          (match_operand:V2DI 1 "register_operand" "w")))]
6178   "TARGET_SIMD && TARGET_SHA3"
6179   "rax1\\t%0.2d, %1.2d, %2.2d"
6180   [(set_attr "type" "crypto_sha3")]
6181 )
6182
6183 (define_insn "aarch64_xarqv2di"
6184   [(set (match_operand:V2DI 0 "register_operand" "=w")
6185         (rotatert:V2DI
6186          (xor:V2DI
6187           (match_operand:V2DI 1 "register_operand" "%w")
6188           (match_operand:V2DI 2 "register_operand" "w"))
6189          (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6190   "TARGET_SIMD && TARGET_SHA3"
6191   "xar\\t%0.2d, %1.2d, %2.2d, %3"
6192   [(set_attr "type" "crypto_sha3")]
6193 )
6194
6195 (define_insn "bcaxq<mode>4"
6196   [(set (match_operand:VQ_I 0 "register_operand" "=w")
6197         (xor:VQ_I
6198          (and:VQ_I
6199           (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6200           (match_operand:VQ_I 2 "register_operand" "w"))
6201          (match_operand:VQ_I 1 "register_operand" "w")))]
6202   "TARGET_SIMD && TARGET_SHA3"
6203   "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6204   [(set_attr "type" "crypto_sha3")]
6205 )
6206
6207 ;; SM3
6208
6209 (define_insn "aarch64_sm3ss1qv4si"
6210   [(set (match_operand:V4SI 0 "register_operand" "=w")
6211         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6212                       (match_operand:V4SI 2 "register_operand" "w")
6213                       (match_operand:V4SI 3 "register_operand" "w")]
6214          UNSPEC_SM3SS1))]
6215   "TARGET_SIMD && TARGET_SM4"
6216   "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6217   [(set_attr "type" "crypto_sm3")]
6218 )
6219
6220
6221 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6222   [(set (match_operand:V4SI 0 "register_operand" "=w")
6223         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6224                       (match_operand:V4SI 2 "register_operand" "w")
6225                       (match_operand:V4SI 3 "register_operand" "w")
6226                       (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6227          CRYPTO_SM3TT))]
6228   "TARGET_SIMD && TARGET_SM4"
6229   "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6230   [(set_attr "type" "crypto_sm3")]
6231 )
6232
6233 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6234   [(set (match_operand:V4SI 0 "register_operand" "=w")
6235         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6236                       (match_operand:V4SI 2 "register_operand" "w")
6237                       (match_operand:V4SI 3 "register_operand" "w")]
6238          CRYPTO_SM3PART))]
6239   "TARGET_SIMD && TARGET_SM4"
6240   "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6241   [(set_attr "type" "crypto_sm3")]
6242 )
6243
6244 ;; SM4
6245
6246 (define_insn "aarch64_sm4eqv4si"
6247   [(set (match_operand:V4SI 0 "register_operand" "=w")
6248         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6249                       (match_operand:V4SI 2 "register_operand" "w")]
6250          UNSPEC_SM4E))]
6251   "TARGET_SIMD && TARGET_SM4"
6252   "sm4e\\t%0.4s, %2.4s"
6253   [(set_attr "type" "crypto_sm4")]
6254 )
6255
6256 (define_insn "aarch64_sm4ekeyqv4si"
6257   [(set (match_operand:V4SI 0 "register_operand" "=w")
6258         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6259                       (match_operand:V4SI 2 "register_operand" "w")]
6260          UNSPEC_SM4EKEY))]
6261   "TARGET_SIMD && TARGET_SM4"
6262   "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6263   [(set_attr "type" "crypto_sm4")]
6264 )
6265
6266 ;; fp16fml
6267
6268 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6269   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6270         (unspec:VDQSF
6271          [(match_operand:VDQSF 1 "register_operand" "0")
6272           (match_operand:<VFMLA_W> 2 "register_operand" "w")
6273           (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6274          VFMLA16_LOW))]
6275   "TARGET_F16FML"
6276 {
6277   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6278                                             <nunits> * 2, false);
6279   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6280                                             <nunits> * 2, false);
6281
6282   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6283                                                                 operands[1],
6284                                                                 operands[2],
6285                                                                 operands[3],
6286                                                                 p1, p2));
6287   DONE;
6288
6289 })
6290
6291 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6292   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6293         (unspec:VDQSF
6294          [(match_operand:VDQSF 1 "register_operand" "0")
6295           (match_operand:<VFMLA_W> 2 "register_operand" "w")
6296           (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6297          VFMLA16_HIGH))]
6298   "TARGET_F16FML"
6299 {
6300   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6301   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6302
6303   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6304                                                                  operands[1],
6305                                                                  operands[2],
6306                                                                  operands[3],
6307                                                                  p1, p2));
6308   DONE;
6309 })
6310
6311 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6312   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6313         (fma:VDQSF
6314          (float_extend:VDQSF
6315           (vec_select:<VFMLA_SEL_W>
6316            (match_operand:<VFMLA_W> 2 "register_operand" "w")
6317            (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6318          (float_extend:VDQSF
6319           (vec_select:<VFMLA_SEL_W>
6320            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6321            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6322          (match_operand:VDQSF 1 "register_operand" "0")))]
6323   "TARGET_F16FML"
6324   "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6325   [(set_attr "type" "neon_fp_mul_s")]
6326 )
6327
6328 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6329   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6330         (fma:VDQSF
6331          (float_extend:VDQSF
6332           (neg:<VFMLA_SEL_W>
6333            (vec_select:<VFMLA_SEL_W>
6334             (match_operand:<VFMLA_W> 2 "register_operand" "w")
6335             (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6336          (float_extend:VDQSF
6337           (vec_select:<VFMLA_SEL_W>
6338            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6339            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6340          (match_operand:VDQSF 1 "register_operand" "0")))]
6341   "TARGET_F16FML"
6342   "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6343   [(set_attr "type" "neon_fp_mul_s")]
6344 )
6345
6346 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6347   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6348         (fma:VDQSF
6349          (float_extend:VDQSF
6350           (vec_select:<VFMLA_SEL_W>
6351            (match_operand:<VFMLA_W> 2 "register_operand" "w")
6352            (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6353          (float_extend:VDQSF
6354           (vec_select:<VFMLA_SEL_W>
6355            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6356            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6357          (match_operand:VDQSF 1 "register_operand" "0")))]
6358   "TARGET_F16FML"
6359   "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6360   [(set_attr "type" "neon_fp_mul_s")]
6361 )
6362
6363 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6364   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6365         (fma:VDQSF
6366          (float_extend:VDQSF
6367           (neg:<VFMLA_SEL_W>
6368            (vec_select:<VFMLA_SEL_W>
6369             (match_operand:<VFMLA_W> 2 "register_operand" "w")
6370             (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6371          (float_extend:VDQSF
6372           (vec_select:<VFMLA_SEL_W>
6373            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6374            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6375          (match_operand:VDQSF 1 "register_operand" "0")))]
6376   "TARGET_F16FML"
6377   "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6378   [(set_attr "type" "neon_fp_mul_s")]
6379 )
6380
6381 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6382   [(set (match_operand:V2SF 0 "register_operand" "")
6383         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6384                            (match_operand:V4HF 2 "register_operand" "")
6385                            (match_operand:V4HF 3 "register_operand" "")
6386                            (match_operand:SI 4 "aarch64_imm2" "")]
6387          VFMLA16_LOW))]
6388   "TARGET_F16FML"
6389 {
6390     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6391     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6392
6393     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6394                                                             operands[1],
6395                                                             operands[2],
6396                                                             operands[3],
6397                                                             p1, lane));
6398     DONE;
6399 }
6400 )
6401
6402 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6403   [(set (match_operand:V2SF 0 "register_operand" "")
6404         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6405                            (match_operand:V4HF 2 "register_operand" "")
6406                            (match_operand:V4HF 3 "register_operand" "")
6407                            (match_operand:SI 4 "aarch64_imm2" "")]
6408          VFMLA16_HIGH))]
6409   "TARGET_F16FML"
6410 {
6411     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6412     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6413
6414     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6415                                                              operands[1],
6416                                                              operands[2],
6417                                                              operands[3],
6418                                                              p1, lane));
6419     DONE;
6420 })
6421
6422 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6423   [(set (match_operand:V2SF 0 "register_operand" "=w")
6424         (fma:V2SF
6425          (float_extend:V2SF
6426            (vec_select:V2HF
6427             (match_operand:V4HF 2 "register_operand" "w")
6428             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6429          (float_extend:V2SF
6430            (vec_duplicate:V2HF
6431             (vec_select:HF
6432              (match_operand:V4HF 3 "register_operand" "x")
6433              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6434          (match_operand:V2SF 1 "register_operand" "0")))]
6435   "TARGET_F16FML"
6436   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6437   [(set_attr "type" "neon_fp_mul_s")]
6438 )
6439
6440 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6441   [(set (match_operand:V2SF 0 "register_operand" "=w")
6442         (fma:V2SF
6443          (float_extend:V2SF
6444           (neg:V2HF
6445            (vec_select:V2HF
6446             (match_operand:V4HF 2 "register_operand" "w")
6447             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6448          (float_extend:V2SF
6449           (vec_duplicate:V2HF
6450            (vec_select:HF
6451             (match_operand:V4HF 3 "register_operand" "x")
6452             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6453          (match_operand:V2SF 1 "register_operand" "0")))]
6454   "TARGET_F16FML"
6455   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6456   [(set_attr "type" "neon_fp_mul_s")]
6457 )
6458
6459 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6460   [(set (match_operand:V2SF 0 "register_operand" "=w")
6461         (fma:V2SF
6462          (float_extend:V2SF
6463            (vec_select:V2HF
6464             (match_operand:V4HF 2 "register_operand" "w")
6465             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6466          (float_extend:V2SF
6467            (vec_duplicate:V2HF
6468             (vec_select:HF
6469              (match_operand:V4HF 3 "register_operand" "x")
6470              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6471          (match_operand:V2SF 1 "register_operand" "0")))]
6472   "TARGET_F16FML"
6473   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6474   [(set_attr "type" "neon_fp_mul_s")]
6475 )
6476
6477 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6478   [(set (match_operand:V2SF 0 "register_operand" "=w")
6479         (fma:V2SF
6480          (float_extend:V2SF
6481            (neg:V2HF
6482             (vec_select:V2HF
6483              (match_operand:V4HF 2 "register_operand" "w")
6484              (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6485          (float_extend:V2SF
6486            (vec_duplicate:V2HF
6487             (vec_select:HF
6488              (match_operand:V4HF 3 "register_operand" "x")
6489              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6490          (match_operand:V2SF 1 "register_operand" "0")))]
6491   "TARGET_F16FML"
6492   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6493   [(set_attr "type" "neon_fp_mul_s")]
6494 )
6495
6496 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6497   [(set (match_operand:V4SF 0 "register_operand" "")
6498         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6499                            (match_operand:V8HF 2 "register_operand" "")
6500                            (match_operand:V8HF 3 "register_operand" "")
6501                            (match_operand:SI 4 "aarch64_lane_imm3" "")]
6502          VFMLA16_LOW))]
6503   "TARGET_F16FML"
6504 {
6505     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6506     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6507
6508     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6509                                                               operands[1],
6510                                                               operands[2],
6511                                                               operands[3],
6512                                                               p1, lane));
6513     DONE;
6514 })
6515
6516 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6517   [(set (match_operand:V4SF 0 "register_operand" "")
6518         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6519                            (match_operand:V8HF 2 "register_operand" "")
6520                            (match_operand:V8HF 3 "register_operand" "")
6521                            (match_operand:SI 4 "aarch64_lane_imm3" "")]
6522          VFMLA16_HIGH))]
6523   "TARGET_F16FML"
6524 {
6525     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6526     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6527
6528     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6529                                                                operands[1],
6530                                                                operands[2],
6531                                                                operands[3],
6532                                                                p1, lane));
6533     DONE;
6534 })
6535
6536 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6537   [(set (match_operand:V4SF 0 "register_operand" "=w")
6538         (fma:V4SF
6539          (float_extend:V4SF
6540           (vec_select:V4HF
6541             (match_operand:V8HF 2 "register_operand" "w")
6542             (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6543          (float_extend:V4SF
6544           (vec_duplicate:V4HF
6545            (vec_select:HF
6546             (match_operand:V8HF 3 "register_operand" "x")
6547             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6548          (match_operand:V4SF 1 "register_operand" "0")))]
6549   "TARGET_F16FML"
6550   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6551   [(set_attr "type" "neon_fp_mul_s")]
6552 )
6553
6554 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6555   [(set (match_operand:V4SF 0 "register_operand" "=w")
6556         (fma:V4SF
6557           (float_extend:V4SF
6558            (neg:V4HF
6559             (vec_select:V4HF
6560              (match_operand:V8HF 2 "register_operand" "w")
6561              (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6562          (float_extend:V4SF
6563           (vec_duplicate:V4HF
6564            (vec_select:HF
6565             (match_operand:V8HF 3 "register_operand" "x")
6566             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6567          (match_operand:V4SF 1 "register_operand" "0")))]
6568   "TARGET_F16FML"
6569   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6570   [(set_attr "type" "neon_fp_mul_s")]
6571 )
6572
6573 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6574   [(set (match_operand:V4SF 0 "register_operand" "=w")
6575         (fma:V4SF
6576          (float_extend:V4SF
6577           (vec_select:V4HF
6578             (match_operand:V8HF 2 "register_operand" "w")
6579             (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6580          (float_extend:V4SF
6581           (vec_duplicate:V4HF
6582            (vec_select:HF
6583             (match_operand:V8HF 3 "register_operand" "x")
6584             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6585          (match_operand:V4SF 1 "register_operand" "0")))]
6586   "TARGET_F16FML"
6587   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6588   [(set_attr "type" "neon_fp_mul_s")]
6589 )
6590
6591 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6592   [(set (match_operand:V4SF 0 "register_operand" "=w")
6593         (fma:V4SF
6594          (float_extend:V4SF
6595           (neg:V4HF
6596            (vec_select:V4HF
6597             (match_operand:V8HF 2 "register_operand" "w")
6598             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6599          (float_extend:V4SF
6600           (vec_duplicate:V4HF
6601            (vec_select:HF
6602             (match_operand:V8HF 3 "register_operand" "x")
6603             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6604          (match_operand:V4SF 1 "register_operand" "0")))]
6605   "TARGET_F16FML"
6606   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6607   [(set_attr "type" "neon_fp_mul_s")]
6608 )
6609
6610 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6611   [(set (match_operand:V2SF 0 "register_operand" "")
6612         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6613                       (match_operand:V4HF 2 "register_operand" "")
6614                       (match_operand:V8HF 3 "register_operand" "")
6615                       (match_operand:SI 4 "aarch64_lane_imm3" "")]
6616          VFMLA16_LOW))]
6617   "TARGET_F16FML"
6618 {
6619     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6620     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6621
6622     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6623                                                              operands[1],
6624                                                              operands[2],
6625                                                              operands[3],
6626                                                              p1, lane));
6627     DONE;
6628
6629 })
6630
6631 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6632   [(set (match_operand:V2SF 0 "register_operand" "")
6633         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6634                       (match_operand:V4HF 2 "register_operand" "")
6635                       (match_operand:V8HF 3 "register_operand" "")
6636                       (match_operand:SI 4 "aarch64_lane_imm3" "")]
6637          VFMLA16_HIGH))]
6638   "TARGET_F16FML"
6639 {
6640     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6641     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6642
6643     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6644                                                               operands[1],
6645                                                               operands[2],
6646                                                               operands[3],
6647                                                               p1, lane));
6648     DONE;
6649
6650 })
6651
6652 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6653   [(set (match_operand:V2SF 0 "register_operand" "=w")
6654         (fma:V2SF
6655          (float_extend:V2SF
6656            (vec_select:V2HF
6657             (match_operand:V4HF 2 "register_operand" "w")
6658             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6659          (float_extend:V2SF
6660           (vec_duplicate:V2HF
6661            (vec_select:HF
6662             (match_operand:V8HF 3 "register_operand" "x")
6663             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6664          (match_operand:V2SF 1 "register_operand" "0")))]
6665   "TARGET_F16FML"
6666   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6667   [(set_attr "type" "neon_fp_mul_s")]
6668 )
6669
6670 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6671   [(set (match_operand:V2SF 0 "register_operand" "=w")
6672         (fma:V2SF
6673          (float_extend:V2SF
6674           (neg:V2HF
6675            (vec_select:V2HF
6676             (match_operand:V4HF 2 "register_operand" "w")
6677             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6678          (float_extend:V2SF
6679           (vec_duplicate:V2HF
6680            (vec_select:HF
6681             (match_operand:V8HF 3 "register_operand" "x")
6682             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6683          (match_operand:V2SF 1 "register_operand" "0")))]
6684   "TARGET_F16FML"
6685   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6686   [(set_attr "type" "neon_fp_mul_s")]
6687 )
6688
6689 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6690   [(set (match_operand:V2SF 0 "register_operand" "=w")
6691         (fma:V2SF
6692          (float_extend:V2SF
6693            (vec_select:V2HF
6694             (match_operand:V4HF 2 "register_operand" "w")
6695             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6696          (float_extend:V2SF
6697           (vec_duplicate:V2HF
6698            (vec_select:HF
6699             (match_operand:V8HF 3 "register_operand" "x")
6700             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6701          (match_operand:V2SF 1 "register_operand" "0")))]
6702   "TARGET_F16FML"
6703   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6704   [(set_attr "type" "neon_fp_mul_s")]
6705 )
6706
6707 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6708   [(set (match_operand:V2SF 0 "register_operand" "=w")
6709         (fma:V2SF
6710          (float_extend:V2SF
6711           (neg:V2HF
6712            (vec_select:V2HF
6713             (match_operand:V4HF 2 "register_operand" "w")
6714             (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6715          (float_extend:V2SF
6716           (vec_duplicate:V2HF
6717            (vec_select:HF
6718             (match_operand:V8HF 3 "register_operand" "x")
6719             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6720          (match_operand:V2SF 1 "register_operand" "0")))]
6721   "TARGET_F16FML"
6722   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6723   [(set_attr "type" "neon_fp_mul_s")]
6724 )
6725
6726 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6727   [(set (match_operand:V4SF 0 "register_operand" "")
6728         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6729                       (match_operand:V8HF 2 "register_operand" "")
6730                       (match_operand:V4HF 3 "register_operand" "")
6731                       (match_operand:SI 4 "aarch64_imm2" "")]
6732          VFMLA16_LOW))]
6733   "TARGET_F16FML"
6734 {
6735     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6736     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6737
6738     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6739                                                              operands[1],
6740                                                              operands[2],
6741                                                              operands[3],
6742                                                              p1, lane));
6743     DONE;
6744 })
6745
6746 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6747   [(set (match_operand:V4SF 0 "register_operand" "")
6748         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6749                       (match_operand:V8HF 2 "register_operand" "")
6750                       (match_operand:V4HF 3 "register_operand" "")
6751                       (match_operand:SI 4 "aarch64_imm2" "")]
6752          VFMLA16_HIGH))]
6753   "TARGET_F16FML"
6754 {
6755     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6756     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6757
6758     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6759                                                               operands[1],
6760                                                               operands[2],
6761                                                               operands[3],
6762                                                               p1, lane));
6763     DONE;
6764 })
6765
6766 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6767   [(set (match_operand:V4SF 0 "register_operand" "=w")
6768         (fma:V4SF
6769          (float_extend:V4SF
6770           (vec_select:V4HF
6771            (match_operand:V8HF 2 "register_operand" "w")
6772            (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6773          (float_extend:V4SF
6774           (vec_duplicate:V4HF
6775            (vec_select:HF
6776             (match_operand:V4HF 3 "register_operand" "x")
6777             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6778          (match_operand:V4SF 1 "register_operand" "0")))]
6779   "TARGET_F16FML"
6780   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6781   [(set_attr "type" "neon_fp_mul_s")]
6782 )
6783
6784 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6785   [(set (match_operand:V4SF 0 "register_operand" "=w")
6786         (fma:V4SF
6787          (float_extend:V4SF
6788           (neg:V4HF
6789            (vec_select:V4HF
6790             (match_operand:V8HF 2 "register_operand" "w")
6791             (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6792          (float_extend:V4SF
6793           (vec_duplicate:V4HF
6794            (vec_select:HF
6795             (match_operand:V4HF 3 "register_operand" "x")
6796             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6797          (match_operand:V4SF 1 "register_operand" "0")))]
6798   "TARGET_F16FML"
6799   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6800   [(set_attr "type" "neon_fp_mul_s")]
6801 )
6802
6803 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6804   [(set (match_operand:V4SF 0 "register_operand" "=w")
6805         (fma:V4SF
6806          (float_extend:V4SF
6807           (vec_select:V4HF
6808            (match_operand:V8HF 2 "register_operand" "w")
6809            (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6810          (float_extend:V4SF
6811           (vec_duplicate:V4HF
6812            (vec_select:HF
6813             (match_operand:V4HF 3 "register_operand" "x")
6814             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6815          (match_operand:V4SF 1 "register_operand" "0")))]
6816   "TARGET_F16FML"
6817   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6818   [(set_attr "type" "neon_fp_mul_s")]
6819 )
6820
6821 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6822   [(set (match_operand:V4SF 0 "register_operand" "=w")
6823         (fma:V4SF
6824          (float_extend:V4SF
6825           (neg:V4HF
6826            (vec_select:V4HF
6827             (match_operand:V8HF 2 "register_operand" "w")
6828             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6829          (float_extend:V4SF
6830           (vec_duplicate:V4HF
6831            (vec_select:HF
6832             (match_operand:V4HF 3 "register_operand" "x")
6833             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6834          (match_operand:V4SF 1 "register_operand" "0")))]
6835   "TARGET_F16FML"
6836   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6837   [(set_attr "type" "neon_fp_mul_s")]
6838 )
6839
6840 ;; pmull
6841
6842 (define_insn "aarch64_crypto_pmulldi"
6843   [(set (match_operand:TI 0 "register_operand" "=w")
6844         (unspec:TI  [(match_operand:DI 1 "register_operand" "w")
6845                      (match_operand:DI 2 "register_operand" "w")]
6846                     UNSPEC_PMULL))]
6847  "TARGET_SIMD && TARGET_AES"
6848  "pmull\\t%0.1q, %1.1d, %2.1d"
6849   [(set_attr "type" "crypto_pmull")]
6850 )
6851
6852 (define_insn "aarch64_crypto_pmullv2di"
6853  [(set (match_operand:TI 0 "register_operand" "=w")
6854        (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
6855                    (match_operand:V2DI 2 "register_operand" "w")]
6856                   UNSPEC_PMULL2))]
6857   "TARGET_SIMD && TARGET_AES"
6858   "pmull2\\t%0.1q, %1.2d, %2.2d"
6859   [(set_attr "type" "crypto_pmull")]
6860 )