gcc/config/aarch64/aarch64-simd.md

   1 ;; Machine description for AArch64 AdvSIMD architecture.
   2 ;; Copyright (C) 2011-2018 Free Software Foundation, Inc.
   3 ;; Contributed by ARM Ltd.
   4 ;;
   5 ;; This file is part of GCC.
   6 ;;
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published by
   9 ;; the Free Software Foundation; either version 3, or (at your option)
  10 ;; any later version.
  11 ;;
  12 ;; GCC is distributed in the hope that it will be useful, but
  13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 ;; General Public License for more details.
  16 ;;
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 (define_expand "mov<mode>"
  22   [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
  23         (match_operand:VALL_F16 1 "general_operand" ""))]
  24   "TARGET_SIMD"
  25   "
  26   /* Force the operand into a register if it is not an
  27      immediate whose use can be replaced with xzr.
  28      If the mode is 16 bytes wide, then we will be doing
  29      a stp in DI mode, so we check the validity of that.
  30      If the mode is 8 bytes wide, then we will do doing a
  31      normal str, so the check need not apply.  */
  32   if (GET_CODE (operands[0]) == MEM
  33       && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
  34            && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
  35                 && aarch64_mem_pair_operand (operands[0], DImode))
  36                || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
  37       operands[1] = force_reg (<MODE>mode, operands[1]);
  38   "
  39 )
  40
  41 (define_expand "movmisalign<mode>"
  42   [(set (match_operand:VALL 0 "nonimmediate_operand" "")
  43         (match_operand:VALL 1 "general_operand" ""))]
  44   "TARGET_SIMD"
  45 {
  46   /* This pattern is not permitted to fail during expansion: if both arguments
  47      are non-registers (e.g. memory := constant, which can be created by the
  48      auto-vectorizer), force operand 1 into a register.  */
  49   if (!register_operand (operands[0], <MODE>mode)
  50       && !register_operand (operands[1], <MODE>mode))
  51     operands[1] = force_reg (<MODE>mode, operands[1]);
  52 })
  53
  54 (define_insn "aarch64_simd_dup<mode>"
  55   [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
  56         (vec_duplicate:VDQ_I
  57           (match_operand:<VEL> 1 "register_operand" "w,?r")))]
  58   "TARGET_SIMD"
  59   "@
  60    dup\\t%0.<Vtype>, %1.<Vetype>[0]
  61    dup\\t%0.<Vtype>, %<vw>1"
  62   [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
  63 )
  64
  65 (define_insn "aarch64_simd_dup<mode>"
  66   [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
  67         (vec_duplicate:VDQF_F16
  68           (match_operand:<VEL> 1 "register_operand" "w")))]
  69   "TARGET_SIMD"
  70   "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
  71   [(set_attr "type" "neon_dup<q>")]
  72 )
  73
  74 (define_insn "aarch64_dup_lane<mode>"
  75   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
  76         (vec_duplicate:VALL_F16
  77           (vec_select:<VEL>
  78             (match_operand:VALL_F16 1 "register_operand" "w")
  79             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  80           )))]
  81   "TARGET_SIMD"
  82   {
  83     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
  84     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
  85   }
  86   [(set_attr "type" "neon_dup<q>")]
  87 )
  88
  89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
  90   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
  91         (vec_duplicate:VALL_F16_NO_V2Q
  92           (vec_select:<VEL>
  93             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
  94             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  95           )))]
  96   "TARGET_SIMD"
  97   {
  98     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
  99     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
 100   }
 101   [(set_attr "type" "neon_dup<q>")]
 102 )
 103
 104 (define_insn "*aarch64_simd_mov<VD:mode>"
 105   [(set (match_operand:VD 0 "nonimmediate_operand"
 106                 "=w, m,  m,  w, ?r, ?w, ?r, w")
 107         (match_operand:VD 1 "general_operand"
 108                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 109   "TARGET_SIMD
 110    && (register_operand (operands[0], <MODE>mode)
 111        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 112 {
 113    switch (which_alternative)
 114      {
 115      case 0: return "ldr\t%d0, %1";
 116      case 1: return "str\txzr, %0";
 117      case 2: return "str\t%d1, %0";
 118      case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 119      case 4: return "umov\t%0, %1.d[0]";
 120      case 5: return "fmov\t%d0, %1";
 121      case 6: return "mov\t%0, %1";
 122      case 7:
 123         return aarch64_output_simd_mov_immediate (operands[1], 64);
 124      default: gcc_unreachable ();
 125      }
 126 }
 127   [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
 128                      neon_logic<q>, neon_to_gp<q>, f_mcr,\
 129                      mov_reg, neon_move<q>")]
 130 )
 131
 132 (define_insn "*aarch64_simd_mov<VQ:mode>"
 133   [(set (match_operand:VQ 0 "nonimmediate_operand"
 134                 "=w, Umq,  m,  w, ?r, ?w, ?r, w")
 135         (match_operand:VQ 1 "general_operand"
 136                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 137   "TARGET_SIMD
 138    && (register_operand (operands[0], <MODE>mode)
 139        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 140 {
 141   switch (which_alternative)
 142     {
 143     case 0:
 144         return "ldr\t%q0, %1";
 145     case 1:
 146         return "stp\txzr, xzr, %0";
 147     case 2:
 148         return "str\t%q1, %0";
 149     case 3:
 150         return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 151     case 4:
 152     case 5:
 153     case 6:
 154         return "#";
 155     case 7:
 156         return aarch64_output_simd_mov_immediate (operands[1], 128);
 157     default:
 158         gcc_unreachable ();
 159     }
 160 }
 161   [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
 162                      neon_logic<q>, multiple, multiple,\
 163                      multiple, neon_move<q>")
 164    (set_attr "length" "4,4,4,4,8,8,8,4")]
 165 )
 166
 167 ;; When storing lane zero we can use the normal STR and its more permissive
 168 ;; addressing modes.
 169
 170 (define_insn "aarch64_store_lane0<mode>"
 171   [(set (match_operand:<VEL> 0 "memory_operand" "=m")
 172         (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
 173                         (parallel [(match_operand 2 "const_int_operand" "n")])))]
 174   "TARGET_SIMD
 175    && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
 176   "str\\t%<Vetype>1, %0"
 177   [(set_attr "type" "neon_store1_1reg<q>")]
 178 )
 179
 180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
 181   [(set (match_operand:DREG 0 "register_operand" "=w")
 182         (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
 183    (set (match_operand:DREG2 2 "register_operand" "=w")
 184         (match_operand:DREG2 3 "memory_operand" "m"))]
 185   "TARGET_SIMD
 186    && rtx_equal_p (XEXP (operands[3], 0),
 187                    plus_constant (Pmode,
 188                                   XEXP (operands[1], 0),
 189                                   GET_MODE_SIZE (<DREG:MODE>mode)))"
 190   "ldp\\t%d0, %d2, %1"
 191   [(set_attr "type" "neon_ldp")]
 192 )
 193
 194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
 195   [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
 196         (match_operand:DREG 1 "register_operand" "w"))
 197    (set (match_operand:DREG2 2 "memory_operand" "=m")
 198         (match_operand:DREG2 3 "register_operand" "w"))]
 199   "TARGET_SIMD
 200    && rtx_equal_p (XEXP (operands[2], 0),
 201                    plus_constant (Pmode,
 202                                   XEXP (operands[0], 0),
 203                                   GET_MODE_SIZE (<DREG:MODE>mode)))"
 204   "stp\\t%d1, %d3, %0"
 205   [(set_attr "type" "neon_stp")]
 206 )
 207
 208 (define_split
 209   [(set (match_operand:VQ 0 "register_operand" "")
 210       (match_operand:VQ 1 "register_operand" ""))]
 211   "TARGET_SIMD && reload_completed
 212    && GP_REGNUM_P (REGNO (operands[0]))
 213    && GP_REGNUM_P (REGNO (operands[1]))"
 214   [(const_int 0)]
 215 {
 216   aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
 217   DONE;
 218 })
 219
 220 (define_split
 221   [(set (match_operand:VQ 0 "register_operand" "")
 222         (match_operand:VQ 1 "register_operand" ""))]
 223   "TARGET_SIMD && reload_completed
 224    && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
 225        || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
 226   [(const_int 0)]
 227 {
 228   aarch64_split_simd_move (operands[0], operands[1]);
 229   DONE;
 230 })
 231
 232 (define_expand "aarch64_split_simd_mov<mode>"
 233   [(set (match_operand:VQ 0)
 234         (match_operand:VQ 1))]
 235   "TARGET_SIMD"
 236   {
 237     rtx dst = operands[0];
 238     rtx src = operands[1];
 239
 240     if (GP_REGNUM_P (REGNO (src)))
 241       {
 242         rtx src_low_part = gen_lowpart (<VHALF>mode, src);
 243         rtx src_high_part = gen_highpart (<VHALF>mode, src);
 244
 245         emit_insn
 246           (gen_move_lo_quad_<mode> (dst, src_low_part));
 247         emit_insn
 248           (gen_move_hi_quad_<mode> (dst, src_high_part));
 249       }
 250
 251     else
 252       {
 253         rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
 254         rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
 255         rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
 256         rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
 257
 258         emit_insn
 259           (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
 260         emit_insn
 261           (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
 262       }
 263     DONE;
 264   }
 265 )
 266
 267 (define_insn "aarch64_simd_mov_from_<mode>low"
 268   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
 269         (vec_select:<VHALF>
 270           (match_operand:VQ 1 "register_operand" "w")
 271           (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
 272   "TARGET_SIMD && reload_completed"
 273   "umov\t%0, %1.d[0]"
 274   [(set_attr "type" "neon_to_gp<q>")
 275    (set_attr "length" "4")
 276   ])
 277
 278 (define_insn "aarch64_simd_mov_from_<mode>high"
 279   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
 280         (vec_select:<VHALF>
 281           (match_operand:VQ 1 "register_operand" "w")
 282           (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
 283   "TARGET_SIMD && reload_completed"
 284   "umov\t%0, %1.d[1]"
 285   [(set_attr "type" "neon_to_gp<q>")
 286    (set_attr "length" "4")
 287   ])
 288
 289 (define_insn "orn<mode>3"
 290  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 291        (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 292                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 293  "TARGET_SIMD"
 294  "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 295   [(set_attr "type" "neon_logic<q>")]
 296 )
 297
 298 (define_insn "bic<mode>3"
 299  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 300        (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 301                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 302  "TARGET_SIMD"
 303  "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 304   [(set_attr "type" "neon_logic<q>")]
 305 )
 306
 307 (define_insn "add<mode>3"
 308   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 309         (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 310                   (match_operand:VDQ_I 2 "register_operand" "w")))]
 311   "TARGET_SIMD"
 312   "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 313   [(set_attr "type" "neon_add<q>")]
 314 )
 315
 316 (define_insn "sub<mode>3"
 317   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 318         (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 319                    (match_operand:VDQ_I 2 "register_operand" "w")))]
 320   "TARGET_SIMD"
 321   "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 322   [(set_attr "type" "neon_sub<q>")]
 323 )
 324
 325 (define_insn "mul<mode>3"
 326   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 327         (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
 328                    (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
 329   "TARGET_SIMD"
 330   "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 331   [(set_attr "type" "neon_mul_<Vetype><q>")]
 332 )
 333
 334 (define_insn "bswap<mode>2"
 335   [(set (match_operand:VDQHSD 0 "register_operand" "=w")
 336         (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
 337   "TARGET_SIMD"
 338   "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
 339   [(set_attr "type" "neon_rev<q>")]
 340 )
 341
 342 (define_insn "aarch64_rbit<mode>"
 343   [(set (match_operand:VB 0 "register_operand" "=w")
 344         (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
 345                    UNSPEC_RBIT))]
 346   "TARGET_SIMD"
 347   "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
 348   [(set_attr "type" "neon_rbit")]
 349 )
 350
 351 (define_expand "ctz<mode>2"
 352   [(set (match_operand:VS 0 "register_operand")
 353         (ctz:VS (match_operand:VS 1 "register_operand")))]
 354   "TARGET_SIMD"
 355   {
 356      emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
 357      rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
 358                                              <MODE>mode, 0);
 359      emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
 360      emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
 361      DONE;
 362   }
 363 )
 364
 365 (define_expand "xorsign<mode>3"
 366   [(match_operand:VHSDF 0 "register_operand")
 367    (match_operand:VHSDF 1 "register_operand")
 368    (match_operand:VHSDF 2 "register_operand")]
 369   "TARGET_SIMD"
 370 {
 371
 372   machine_mode imode = <V_INT_EQUIV>mode;
 373   rtx v_bitmask = gen_reg_rtx (imode);
 374   rtx op1x = gen_reg_rtx (imode);
 375   rtx op2x = gen_reg_rtx (imode);
 376
 377   rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
 378   rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
 379
 380   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 381
 382   emit_move_insn (v_bitmask,
 383                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 384                                                      HOST_WIDE_INT_M1U << bits));
 385
 386   emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
 387   emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
 388   emit_move_insn (operands[0],
 389                   lowpart_subreg (<MODE>mode, op1x, imode));
 390   DONE;
 391 }
 392 )
 393
 394 ;; These instructions map to the __builtins for the Dot Product operations.
 395 (define_insn "aarch64_<sur>dot<vsi2qi>"
 396   [(set (match_operand:VS 0 "register_operand" "=w")
 397         (plus:VS (match_operand:VS 1 "register_operand" "0")
 398                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 399                             (match_operand:<VSI2QI> 3 "register_operand" "w")]
 400                 DOTPROD)))]
 401   "TARGET_DOTPROD"
 402   "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
 403   [(set_attr "type" "neon_dot")]
 404 )
 405
 406 ;; These expands map to the Dot Product optab the vectorizer checks for.
 407 ;; The auto-vectorizer expects a dot product builtin that also does an
 408 ;; accumulation into the provided register.
 409 ;; Given the following pattern
 410 ;;
 411 ;; for (i=0; i<len; i++) {
 412 ;;     c = a[i] * b[i];
 413 ;;     r += c;
 414 ;; }
 415 ;; return result;
 416 ;;
 417 ;; This can be auto-vectorized to
 418 ;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
 419 ;;
 420 ;; given enough iterations.  However the vectorizer can keep unrolling the loop
 421 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
 422 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
 423 ;; ...
 424 ;;
 425 ;; and so the vectorizer provides r, in which the result has to be accumulated.
 426 (define_expand "<sur>dot_prod<vsi2qi>"
 427   [(set (match_operand:VS 0 "register_operand")
 428         (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
 429                             (match_operand:<VSI2QI> 2 "register_operand")]
 430                  DOTPROD)
 431                 (match_operand:VS 3 "register_operand")))]
 432   "TARGET_DOTPROD"
 433 {
 434   emit_insn (
 435     gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
 436                                     operands[2]));
 437   emit_insn (gen_rtx_SET (operands[0], operands[3]));
 438   DONE;
 439 })
 440
 441 ;; These instructions map to the __builtins for the Dot Product
 442 ;; indexed operations.
 443 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
 444   [(set (match_operand:VS 0 "register_operand" "=w")
 445         (plus:VS (match_operand:VS 1 "register_operand" "0")
 446                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 447                             (match_operand:V8QI 3 "register_operand" "<h_con>")
 448                             (match_operand:SI 4 "immediate_operand" "i")]
 449                 DOTPROD)))]
 450   "TARGET_DOTPROD"
 451   {
 452     operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
 453     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 454   }
 455   [(set_attr "type" "neon_dot")]
 456 )
 457
 458 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
 459   [(set (match_operand:VS 0 "register_operand" "=w")
 460         (plus:VS (match_operand:VS 1 "register_operand" "0")
 461                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 462                             (match_operand:V16QI 3 "register_operand" "<h_con>")
 463                             (match_operand:SI 4 "immediate_operand" "i")]
 464                 DOTPROD)))]
 465   "TARGET_DOTPROD"
 466   {
 467     operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
 468     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 469   }
 470   [(set_attr "type" "neon_dot")]
 471 )
 472
 473 (define_expand "copysign<mode>3"
 474   [(match_operand:VHSDF 0 "register_operand")
 475    (match_operand:VHSDF 1 "register_operand")
 476    (match_operand:VHSDF 2 "register_operand")]
 477   "TARGET_FLOAT && TARGET_SIMD"
 478 {
 479   rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
 480   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 481
 482   emit_move_insn (v_bitmask,
 483                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 484                                                      HOST_WIDE_INT_M1U << bits));
 485   emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
 486                                          operands[2], operands[1]));
 487   DONE;
 488 }
 489 )
 490
 491 (define_insn "*aarch64_mul3_elt<mode>"
 492  [(set (match_operand:VMUL 0 "register_operand" "=w")
 493     (mult:VMUL
 494       (vec_duplicate:VMUL
 495           (vec_select:<VEL>
 496             (match_operand:VMUL 1 "register_operand" "<h_con>")
 497             (parallel [(match_operand:SI 2 "immediate_operand")])))
 498       (match_operand:VMUL 3 "register_operand" "w")))]
 499   "TARGET_SIMD"
 500   {
 501     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
 502     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 503   }
 504   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 505 )
 506
 507 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
 508   [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
 509      (mult:VMUL_CHANGE_NLANES
 510        (vec_duplicate:VMUL_CHANGE_NLANES
 511           (vec_select:<VEL>
 512             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
 513             (parallel [(match_operand:SI 2 "immediate_operand")])))
 514       (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
 515   "TARGET_SIMD"
 516   {
 517     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
 518     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 519   }
 520   [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
 521 )
 522
 523 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
 524  [(set (match_operand:VMUL 0 "register_operand" "=w")
 525     (mult:VMUL
 526       (vec_duplicate:VMUL
 527             (match_operand:<VEL> 1 "register_operand" "<h_con>"))
 528       (match_operand:VMUL 2 "register_operand" "w")))]
 529   "TARGET_SIMD"
 530   "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
 531   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 532 )
 533
 534 (define_insn "aarch64_rsqrte<mode>"
 535   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 536         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
 537                      UNSPEC_RSQRTE))]
 538   "TARGET_SIMD"
 539   "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 540   [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
 541
 542 (define_insn "aarch64_rsqrts<mode>"
 543   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 544         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
 545                             (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
 546          UNSPEC_RSQRTS))]
 547   "TARGET_SIMD"
 548   "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 549   [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
 550
 551 (define_expand "rsqrt<mode>2"
 552   [(set (match_operand:VALLF 0 "register_operand" "=w")
 553         (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
 554                      UNSPEC_RSQRT))]
 555   "TARGET_SIMD"
 556 {
 557   aarch64_emit_approx_sqrt (operands[0], operands[1], true);
 558   DONE;
 559 })
 560
 561 (define_insn "*aarch64_mul3_elt_to_64v2df"
 562   [(set (match_operand:DF 0 "register_operand" "=w")
 563      (mult:DF
 564        (vec_select:DF
 565          (match_operand:V2DF 1 "register_operand" "w")
 566          (parallel [(match_operand:SI 2 "immediate_operand")]))
 567        (match_operand:DF 3 "register_operand" "w")))]
 568   "TARGET_SIMD"
 569   {
 570     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
 571     return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
 572   }
 573   [(set_attr "type" "neon_fp_mul_d_scalar_q")]
 574 )
 575
 576 (define_insn "neg<mode>2"
 577   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 578         (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 579   "TARGET_SIMD"
 580   "neg\t%0.<Vtype>, %1.<Vtype>"
 581   [(set_attr "type" "neon_neg<q>")]
 582 )
 583
 584 (define_insn "abs<mode>2"
 585   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 586         (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 587   "TARGET_SIMD"
 588   "abs\t%0.<Vtype>, %1.<Vtype>"
 589   [(set_attr "type" "neon_abs<q>")]
 590 )
 591
 592 ;; The intrinsic version of integer ABS must not be allowed to
 593 ;; combine with any operation with an integerated ABS step, such
 594 ;; as SABD.
 595 (define_insn "aarch64_abs<mode>"
 596   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
 597           (unspec:VSDQ_I_DI
 598             [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
 599            UNSPEC_ABS))]
 600   "TARGET_SIMD"
 601   "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 602   [(set_attr "type" "neon_abs<q>")]
 603 )
 604
 605 (define_insn "abd<mode>_3"
 606   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 607         (abs:VDQ_BHSI (minus:VDQ_BHSI
 608                        (match_operand:VDQ_BHSI 1 "register_operand" "w")
 609                        (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
 610   "TARGET_SIMD"
 611   "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 612   [(set_attr "type" "neon_abd<q>")]
 613 )
 614
 615 (define_insn "aarch64_<sur>abdl2<mode>_3"
 616   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 617         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 618                           (match_operand:VDQV_S 2 "register_operand" "w")]
 619         ABDL2))]
 620   "TARGET_SIMD"
 621   "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
 622   [(set_attr "type" "neon_abd<q>")]
 623 )
 624
 625 (define_insn "aarch64_<sur>abal<mode>_4"
 626   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 627         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 628                           (match_operand:VDQV_S 2 "register_operand" "w")
 629                          (match_operand:<VDBLW> 3 "register_operand" "0")]
 630         ABAL))]
 631   "TARGET_SIMD"
 632   "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
 633   [(set_attr "type" "neon_arith_acc<q>")]
 634 )
 635
 636 (define_insn "aarch64_<sur>adalp<mode>_3"
 637   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 638         (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
 639                           (match_operand:<VDBLW> 2 "register_operand" "0")]
 640         ADALP))]
 641   "TARGET_SIMD"
 642   "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
 643   [(set_attr "type" "neon_reduc_add<q>")]
 644 )
 645
 646 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
 647 ;; inputs in operands 1 and 2.  The sequence also has to perform a widening
 648 ;; reduction of the difference into a V4SI vector and accumulate that into
 649 ;; operand 3 before copying that into the result operand 0.
 650 ;; Perform that with a sequence of:
 651 ;; UABDL2       tmp.8h, op1.16b, op2.16b
 652 ;; UABAL        tmp.8h, op1.16b, op2.16b
 653 ;; UADALP       op3.4s, tmp.8h
 654 ;; MOV          op0, op3 // should be eliminated in later passes.
 655 ;; The signed version just uses the signed variants of the above instructions.
 656
 657 (define_expand "<sur>sadv16qi"
 658   [(use (match_operand:V4SI 0 "register_operand"))
 659    (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
 660                   (use (match_operand:V16QI 2 "register_operand"))] ABAL)
 661    (use (match_operand:V4SI 3 "register_operand"))]
 662   "TARGET_SIMD"
 663   {
 664     rtx reduc = gen_reg_rtx (V8HImode);
 665     emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
 666                                                operands[2]));
 667     emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
 668                                               operands[2], reduc));
 669     emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
 670                                               operands[3]));
 671     emit_move_insn (operands[0], operands[3]);
 672     DONE;
 673   }
 674 )
 675
 676 (define_insn "aba<mode>_3"
 677   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 678         (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
 679                          (match_operand:VDQ_BHSI 1 "register_operand" "w")
 680                          (match_operand:VDQ_BHSI 2 "register_operand" "w")))
 681                        (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
 682   "TARGET_SIMD"
 683   "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 684   [(set_attr "type" "neon_arith_acc<q>")]
 685 )
 686
 687 (define_insn "fabd<mode>3"
 688   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 689         (abs:VHSDF_HSDF
 690           (minus:VHSDF_HSDF
 691             (match_operand:VHSDF_HSDF 1 "register_operand" "w")
 692             (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
 693   "TARGET_SIMD"
 694   "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 695   [(set_attr "type" "neon_fp_abd_<stype><q>")]
 696 )
 697
 698 ;; For AND (vector, register) and BIC (vector, immediate)
 699 (define_insn "and<mode>3"
 700   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
 701         (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
 702                    (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
 703   "TARGET_SIMD"
 704   {
 705     switch (which_alternative)
 706       {
 707       case 0:
 708         return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
 709       case 1:
 710         return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
 711                                                   AARCH64_CHECK_BIC);
 712       default:
 713         gcc_unreachable ();
 714       }
 715   }
 716   [(set_attr "type" "neon_logic<q>")]
 717 )
 718
 719 ;; For ORR (vector, register) and ORR (vector, immediate)
 720 (define_insn "ior<mode>3"
 721   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
 722         (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
 723                    (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
 724   "TARGET_SIMD"
 725   {
 726     switch (which_alternative)
 727       {
 728       case 0:
 729         return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
 730       case 1:
 731         return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
 732                                                   AARCH64_CHECK_ORR);
 733       default:
 734         gcc_unreachable ();
 735       }
 736   }
 737   [(set_attr "type" "neon_logic<q>")]
 738 )
 739
 740 (define_insn "xor<mode>3"
 741   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 742         (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 743                  (match_operand:VDQ_I 2 "register_operand" "w")))]
 744   "TARGET_SIMD"
 745   "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
 746   [(set_attr "type" "neon_logic<q>")]
 747 )
 748
 749 (define_insn "one_cmpl<mode>2"
 750   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 751         (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 752   "TARGET_SIMD"
 753   "not\t%0.<Vbtype>, %1.<Vbtype>"
 754   [(set_attr "type" "neon_logic<q>")]
 755 )
 756
 757 (define_insn "aarch64_simd_vec_set<mode>"
 758   [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
 759         (vec_merge:VALL_F16
 760             (vec_duplicate:VALL_F16
 761                 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
 762             (match_operand:VALL_F16 3 "register_operand" "0,0,0")
 763             (match_operand:SI 2 "immediate_operand" "i,i,i")))]
 764   "TARGET_SIMD"
 765   {
 766    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 767    operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
 768    switch (which_alternative)
 769      {
 770      case 0:
 771         return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
 772      case 1:
 773         return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
 774      case 2:
 775         return "ld1\\t{%0.<Vetype>}[%p2], %1";
 776      default:
 777         gcc_unreachable ();
 778      }
 779   }
 780   [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
 781 )
 782
 783 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
 784   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
 785         (vec_merge:VALL_F16
 786             (vec_duplicate:VALL_F16
 787               (vec_select:<VEL>
 788                 (match_operand:VALL_F16 3 "register_operand" "w")
 789                 (parallel
 790                   [(match_operand:SI 4 "immediate_operand" "i")])))
 791             (match_operand:VALL_F16 1 "register_operand" "0")
 792             (match_operand:SI 2 "immediate_operand" "i")))]
 793   "TARGET_SIMD"
 794   {
 795     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 796     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
 797     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
 798
 799     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
 800   }
 801   [(set_attr "type" "neon_ins<q>")]
 802 )
 803
 804 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
 805   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
 806         (vec_merge:VALL_F16_NO_V2Q
 807             (vec_duplicate:VALL_F16_NO_V2Q
 808               (vec_select:<VEL>
 809                 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
 810                 (parallel
 811                   [(match_operand:SI 4 "immediate_operand" "i")])))
 812             (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
 813             (match_operand:SI 2 "immediate_operand" "i")))]
 814   "TARGET_SIMD"
 815   {
 816     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 817     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
 818     operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
 819                                            INTVAL (operands[4]));
 820
 821     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
 822   }
 823   [(set_attr "type" "neon_ins<q>")]
 824 )
 825
 826 (define_insn "aarch64_simd_lshr<mode>"
 827  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 828        (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 829                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
 830  "TARGET_SIMD"
 831  "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
 832   [(set_attr "type" "neon_shift_imm<q>")]
 833 )
 834
 835 (define_insn "aarch64_simd_ashr<mode>"
 836  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 837        (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 838                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
 839  "TARGET_SIMD"
 840  "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
 841   [(set_attr "type" "neon_shift_imm<q>")]
 842 )
 843
 844 (define_insn "aarch64_simd_imm_shl<mode>"
 845  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 846        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 847                    (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm" "Dl")))]
 848  "TARGET_SIMD"
 849   "shl\t%0.<Vtype>, %1.<Vtype>, %2"
 850   [(set_attr "type" "neon_shift_imm<q>")]
 851 )
 852
 853 (define_insn "aarch64_simd_reg_sshl<mode>"
 854  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 855        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 856                    (match_operand:VDQ_I 2 "register_operand" "w")))]
 857  "TARGET_SIMD"
 858  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 859   [(set_attr "type" "neon_shift_reg<q>")]
 860 )
 861
 862 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
 863  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 864        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
 865                     (match_operand:VDQ_I 2 "register_operand" "w")]
 866                    UNSPEC_ASHIFT_UNSIGNED))]
 867  "TARGET_SIMD"
 868  "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 869   [(set_attr "type" "neon_shift_reg<q>")]
 870 )
 871
 872 (define_insn "aarch64_simd_reg_shl<mode>_signed"
 873  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 874        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
 875                     (match_operand:VDQ_I 2 "register_operand" "w")]
 876                    UNSPEC_ASHIFT_SIGNED))]
 877  "TARGET_SIMD"
 878  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 879   [(set_attr "type" "neon_shift_reg<q>")]
 880 )
 881
 882 (define_expand "ashl<mode>3"
 883   [(match_operand:VDQ_I 0 "register_operand" "")
 884    (match_operand:VDQ_I 1 "register_operand" "")
 885    (match_operand:SI  2 "general_operand" "")]
 886  "TARGET_SIMD"
 887 {
 888   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
 889   int shift_amount;
 890
 891   if (CONST_INT_P (operands[2]))
 892     {
 893       shift_amount = INTVAL (operands[2]);
 894       if (shift_amount >= 0 && shift_amount < bit_width)
 895         {
 896           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 897                                                        shift_amount);
 898           emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
 899                                                      operands[1],
 900                                                      tmp));
 901           DONE;
 902         }
 903       else
 904         {
 905           operands[2] = force_reg (SImode, operands[2]);
 906         }
 907     }
 908   else if (MEM_P (operands[2]))
 909     {
 910       operands[2] = force_reg (SImode, operands[2]);
 911     }
 912
 913   if (REG_P (operands[2]))
 914     {
 915       rtx tmp = gen_reg_rtx (<MODE>mode);
 916       emit_insn (gen_aarch64_simd_dup<mode> (tmp,
 917                                              convert_to_mode (<VEL>mode,
 918                                                               operands[2],
 919                                                               0)));
 920       emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
 921                                                   tmp));
 922       DONE;
 923     }
 924   else
 925     FAIL;
 926 }
 927 )
 928
 929 (define_expand "lshr<mode>3"
 930   [(match_operand:VDQ_I 0 "register_operand" "")
 931    (match_operand:VDQ_I 1 "register_operand" "")
 932    (match_operand:SI  2 "general_operand" "")]
 933  "TARGET_SIMD"
 934 {
 935   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
 936   int shift_amount;
 937
 938   if (CONST_INT_P (operands[2]))
 939     {
 940       shift_amount = INTVAL (operands[2]);
 941       if (shift_amount > 0 && shift_amount <= bit_width)
 942         {
 943           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 944                                                        shift_amount);
 945           emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
 946                                                   operands[1],
 947                                                   tmp));
 948           DONE;
 949         }
 950       else
 951         operands[2] = force_reg (SImode, operands[2]);
 952     }
 953   else if (MEM_P (operands[2]))
 954     {
 955       operands[2] = force_reg (SImode, operands[2]);
 956     }
 957
 958   if (REG_P (operands[2]))
 959     {
 960       rtx tmp = gen_reg_rtx (SImode);
 961       rtx tmp1 = gen_reg_rtx (<MODE>mode);
 962       emit_insn (gen_negsi2 (tmp, operands[2]));
 963       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
 964                                              convert_to_mode (<VEL>mode,
 965                                                               tmp, 0)));
 966       emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
 967                                                           operands[1],
 968                                                           tmp1));
 969       DONE;
 970     }
 971   else
 972     FAIL;
 973 }
 974 )
 975
 976 (define_expand "ashr<mode>3"
 977   [(match_operand:VDQ_I 0 "register_operand" "")
 978    (match_operand:VDQ_I 1 "register_operand" "")
 979    (match_operand:SI  2 "general_operand" "")]
 980  "TARGET_SIMD"
 981 {
 982   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
 983   int shift_amount;
 984
 985   if (CONST_INT_P (operands[2]))
 986     {
 987       shift_amount = INTVAL (operands[2]);
 988       if (shift_amount > 0 && shift_amount <= bit_width)
 989         {
 990           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 991                                                        shift_amount);
 992           emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
 993                                                   operands[1],
 994                                                   tmp));
 995           DONE;
 996         }
 997       else
 998         operands[2] = force_reg (SImode, operands[2]);
 999     }
1000   else if (MEM_P (operands[2]))
1001     {
1002       operands[2] = force_reg (SImode, operands[2]);
1003     }
1004
1005   if (REG_P (operands[2]))
1006     {
1007       rtx tmp = gen_reg_rtx (SImode);
1008       rtx tmp1 = gen_reg_rtx (<MODE>mode);
1009       emit_insn (gen_negsi2 (tmp, operands[2]));
1010       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1011                                              convert_to_mode (<VEL>mode,
1012                                                               tmp, 0)));
1013       emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
1014                                                         operands[1],
1015                                                         tmp1));
1016       DONE;
1017     }
1018   else
1019     FAIL;
1020 }
1021 )
1022
1023 (define_expand "vashl<mode>3"
1024  [(match_operand:VDQ_I 0 "register_operand" "")
1025   (match_operand:VDQ_I 1 "register_operand" "")
1026   (match_operand:VDQ_I 2 "register_operand" "")]
1027  "TARGET_SIMD"
1028 {
1029   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1030                                               operands[2]));
1031   DONE;
1032 })
1033
1034 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1035 ;; Negating individual lanes most certainly offsets the
1036 ;; gain from vectorization.
1037 (define_expand "vashr<mode>3"
1038  [(match_operand:VDQ_BHSI 0 "register_operand" "")
1039   (match_operand:VDQ_BHSI 1 "register_operand" "")
1040   (match_operand:VDQ_BHSI 2 "register_operand" "")]
1041  "TARGET_SIMD"
1042 {
1043   rtx neg = gen_reg_rtx (<MODE>mode);
1044   emit (gen_neg<mode>2 (neg, operands[2]));
1045   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1046                                                     neg));
1047   DONE;
1048 })
1049
1050 ;; DI vector shift
1051 (define_expand "aarch64_ashr_simddi"
1052   [(match_operand:DI 0 "register_operand" "=w")
1053    (match_operand:DI 1 "register_operand" "w")
1054    (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1055   "TARGET_SIMD"
1056   {
1057     /* An arithmetic shift right by 64 fills the result with copies of the sign
1058        bit, just like asr by 63 - however the standard pattern does not handle
1059        a shift by 64.  */
1060     if (INTVAL (operands[2]) == 64)
1061       operands[2] = GEN_INT (63);
1062     emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1063     DONE;
1064   }
1065 )
1066
1067 (define_expand "vlshr<mode>3"
1068  [(match_operand:VDQ_BHSI 0 "register_operand" "")
1069   (match_operand:VDQ_BHSI 1 "register_operand" "")
1070   (match_operand:VDQ_BHSI 2 "register_operand" "")]
1071  "TARGET_SIMD"
1072 {
1073   rtx neg = gen_reg_rtx (<MODE>mode);
1074   emit (gen_neg<mode>2 (neg, operands[2]));
1075   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1076                                                       neg));
1077   DONE;
1078 })
1079
1080 (define_expand "aarch64_lshr_simddi"
1081   [(match_operand:DI 0 "register_operand" "=w")
1082    (match_operand:DI 1 "register_operand" "w")
1083    (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1084   "TARGET_SIMD"
1085   {
1086     if (INTVAL (operands[2]) == 64)
1087       emit_move_insn (operands[0], const0_rtx);
1088     else
1089       emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1090     DONE;
1091   }
1092 )
1093
1094 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1095 (define_insn "vec_shr_<mode>"
1096   [(set (match_operand:VD 0 "register_operand" "=w")
1097         (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1098                     (match_operand:SI 2 "immediate_operand" "i")]
1099                    UNSPEC_VEC_SHR))]
1100   "TARGET_SIMD"
1101   {
1102     if (BYTES_BIG_ENDIAN)
1103       return "shl %d0, %d1, %2";
1104     else
1105       return "ushr %d0, %d1, %2";
1106   }
1107   [(set_attr "type" "neon_shift_imm")]
1108 )
1109
1110 (define_expand "vec_set<mode>"
1111   [(match_operand:VALL_F16 0 "register_operand" "+w")
1112    (match_operand:<VEL> 1 "register_operand" "w")
1113    (match_operand:SI 2 "immediate_operand" "")]
1114   "TARGET_SIMD"
1115   {
1116     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1117     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1118                                           GEN_INT (elem), operands[0]));
1119     DONE;
1120   }
1121 )
1122
1123
1124 (define_insn "aarch64_mla<mode>"
1125  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1126        (plus:VDQ_BHSI (mult:VDQ_BHSI
1127                         (match_operand:VDQ_BHSI 2 "register_operand" "w")
1128                         (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1129                       (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1130  "TARGET_SIMD"
1131  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1132   [(set_attr "type" "neon_mla_<Vetype><q>")]
1133 )
1134
1135 (define_insn "*aarch64_mla_elt<mode>"
1136  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1137        (plus:VDQHS
1138          (mult:VDQHS
1139            (vec_duplicate:VDQHS
1140               (vec_select:<VEL>
1141                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1142                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1143            (match_operand:VDQHS 3 "register_operand" "w"))
1144          (match_operand:VDQHS 4 "register_operand" "0")))]
1145  "TARGET_SIMD"
1146   {
1147     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1148     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1149   }
1150   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1151 )
1152
1153 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1154  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1155        (plus:VDQHS
1156          (mult:VDQHS
1157            (vec_duplicate:VDQHS
1158               (vec_select:<VEL>
1159                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1160                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1161            (match_operand:VDQHS 3 "register_operand" "w"))
1162          (match_operand:VDQHS 4 "register_operand" "0")))]
1163  "TARGET_SIMD"
1164   {
1165     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1166     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1167   }
1168   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1169 )
1170
1171 (define_insn "*aarch64_mla_elt_merge<mode>"
1172   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1173         (plus:VDQHS
1174           (mult:VDQHS (vec_duplicate:VDQHS
1175                   (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1176                 (match_operand:VDQHS 2 "register_operand" "w"))
1177           (match_operand:VDQHS 3 "register_operand" "0")))]
1178  "TARGET_SIMD"
1179  "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1180   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1181 )
1182
1183 (define_insn "aarch64_mls<mode>"
1184  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1185        (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1186                    (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1187                               (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1188  "TARGET_SIMD"
1189  "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1190   [(set_attr "type" "neon_mla_<Vetype><q>")]
1191 )
1192
1193 (define_insn "*aarch64_mls_elt<mode>"
1194  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1195        (minus:VDQHS
1196          (match_operand:VDQHS 4 "register_operand" "0")
1197          (mult:VDQHS
1198            (vec_duplicate:VDQHS
1199               (vec_select:<VEL>
1200                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1201                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1202            (match_operand:VDQHS 3 "register_operand" "w"))))]
1203  "TARGET_SIMD"
1204   {
1205     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1206     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1207   }
1208   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1209 )
1210
1211 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1212  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1213        (minus:VDQHS
1214          (match_operand:VDQHS 4 "register_operand" "0")
1215          (mult:VDQHS
1216            (vec_duplicate:VDQHS
1217               (vec_select:<VEL>
1218                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1219                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1220            (match_operand:VDQHS 3 "register_operand" "w"))))]
1221  "TARGET_SIMD"
1222   {
1223     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1224     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1225   }
1226   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1227 )
1228
1229 (define_insn "*aarch64_mls_elt_merge<mode>"
1230   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1231         (minus:VDQHS
1232           (match_operand:VDQHS 1 "register_operand" "0")
1233           (mult:VDQHS (vec_duplicate:VDQHS
1234                   (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1235                 (match_operand:VDQHS 3 "register_operand" "w"))))]
1236   "TARGET_SIMD"
1237   "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1238   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1239 )
1240
1241 ;; Max/Min operations.
1242 (define_insn "<su><maxmin><mode>3"
1243  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1244        (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1245                     (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1246  "TARGET_SIMD"
1247  "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1248   [(set_attr "type" "neon_minmax<q>")]
1249 )
1250
1251 (define_expand "<su><maxmin>v2di3"
1252  [(set (match_operand:V2DI 0 "register_operand" "")
1253        (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1254                     (match_operand:V2DI 2 "register_operand" "")))]
1255  "TARGET_SIMD"
1256 {
1257   enum rtx_code cmp_operator;
1258   rtx cmp_fmt;
1259
1260   switch (<CODE>)
1261     {
1262     case UMIN:
1263       cmp_operator = LTU;
1264       break;
1265     case SMIN:
1266       cmp_operator = LT;
1267       break;
1268     case UMAX:
1269       cmp_operator = GTU;
1270       break;
1271     case SMAX:
1272       cmp_operator = GT;
1273       break;
1274     default:
1275       gcc_unreachable ();
1276     }
1277
1278   cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1279   emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1280               operands[2], cmp_fmt, operands[1], operands[2]));
1281   DONE;
1282 })
1283
1284 ;; Pairwise Integer Max/Min operations.
1285 (define_insn "aarch64_<maxmin_uns>p<mode>"
1286  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1287        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1288                          (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1289                         MAXMINV))]
1290  "TARGET_SIMD"
1291  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1292   [(set_attr "type" "neon_minmax<q>")]
1293 )
1294
1295 ;; Pairwise FP Max/Min operations.
1296 (define_insn "aarch64_<maxmin_uns>p<mode>"
1297  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1298        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1299                       (match_operand:VHSDF 2 "register_operand" "w")]
1300                       FMAXMINV))]
1301  "TARGET_SIMD"
1302  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1303   [(set_attr "type" "neon_minmax<q>")]
1304 )
1305
1306 ;; vec_concat gives a new vector with the low elements from operand 1, and
1307 ;; the high elements from operand 2.  That is to say, given op1 = { a, b }
1308 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1309 ;; What that means, is that the RTL descriptions of the below patterns
1310 ;; need to change depending on endianness.
1311
1312 ;; Move to the low architectural bits of the register.
1313 ;; On little-endian this is { operand, zeroes }
1314 ;; On big-endian this is { zeroes, operand }
1315
1316 (define_insn "move_lo_quad_internal_<mode>"
1317   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1318         (vec_concat:VQ_NO2E
1319           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1320           (vec_duplicate:<VHALF> (const_int 0))))]
1321   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1322   "@
1323    dup\\t%d0, %1.d[0]
1324    fmov\\t%d0, %1
1325    dup\\t%d0, %1"
1326   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1327    (set_attr "simd" "yes,*,yes")
1328    (set_attr "fp" "*,yes,*")
1329    (set_attr "length" "4")]
1330 )
1331
1332 (define_insn "move_lo_quad_internal_<mode>"
1333   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1334         (vec_concat:VQ_2E
1335           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1336           (const_int 0)))]
1337   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1338   "@
1339    dup\\t%d0, %1.d[0]
1340    fmov\\t%d0, %1
1341    dup\\t%d0, %1"
1342   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1343    (set_attr "simd" "yes,*,yes")
1344    (set_attr "fp" "*,yes,*")
1345    (set_attr "length" "4")]
1346 )
1347
1348 (define_insn "move_lo_quad_internal_be_<mode>"
1349   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1350         (vec_concat:VQ_NO2E
1351           (vec_duplicate:<VHALF> (const_int 0))
1352           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1353   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1354   "@
1355    dup\\t%d0, %1.d[0]
1356    fmov\\t%d0, %1
1357    dup\\t%d0, %1"
1358   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1359    (set_attr "simd" "yes,*,yes")
1360    (set_attr "fp" "*,yes,*")
1361    (set_attr "length" "4")]
1362 )
1363
1364 (define_insn "move_lo_quad_internal_be_<mode>"
1365   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1366         (vec_concat:VQ_2E
1367           (const_int 0)
1368           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1369   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1370   "@
1371    dup\\t%d0, %1.d[0]
1372    fmov\\t%d0, %1
1373    dup\\t%d0, %1"
1374   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1375    (set_attr "simd" "yes,*,yes")
1376    (set_attr "fp" "*,yes,*")
1377    (set_attr "length" "4")]
1378 )
1379
1380 (define_expand "move_lo_quad_<mode>"
1381   [(match_operand:VQ 0 "register_operand")
1382    (match_operand:VQ 1 "register_operand")]
1383   "TARGET_SIMD"
1384 {
1385   if (BYTES_BIG_ENDIAN)
1386     emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1387   else
1388     emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1389   DONE;
1390 }
1391 )
1392
1393 ;; Move operand1 to the high architectural bits of the register, keeping
1394 ;; the low architectural bits of operand2.
1395 ;; For little-endian this is { operand2, operand1 }
1396 ;; For big-endian this is { operand1, operand2 }
1397
1398 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1399   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1400         (vec_concat:VQ
1401           (vec_select:<VHALF>
1402                 (match_dup 0)
1403                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1404           (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1405   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1406   "@
1407    ins\\t%0.d[1], %1.d[0]
1408    ins\\t%0.d[1], %1"
1409   [(set_attr "type" "neon_ins")]
1410 )
1411
1412 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1413   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1414         (vec_concat:VQ
1415           (match_operand:<VHALF> 1 "register_operand" "w,r")
1416           (vec_select:<VHALF>
1417                 (match_dup 0)
1418                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1419   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1420   "@
1421    ins\\t%0.d[1], %1.d[0]
1422    ins\\t%0.d[1], %1"
1423   [(set_attr "type" "neon_ins")]
1424 )
1425
1426 (define_expand "move_hi_quad_<mode>"
1427  [(match_operand:VQ 0 "register_operand" "")
1428   (match_operand:<VHALF> 1 "register_operand" "")]
1429  "TARGET_SIMD"
1430 {
1431   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1432   if (BYTES_BIG_ENDIAN)
1433     emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1434                     operands[1], p));
1435   else
1436     emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1437                     operands[1], p));
1438   DONE;
1439 })
1440
1441 ;; Narrowing operations.
1442
1443 ;; For doubles.
1444 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1445  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1446        (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1447  "TARGET_SIMD"
1448  "xtn\\t%0.<Vntype>, %1.<Vtype>"
1449   [(set_attr "type" "neon_shift_imm_narrow_q")]
1450 )
1451
1452 (define_expand "vec_pack_trunc_<mode>"
1453  [(match_operand:<VNARROWD> 0 "register_operand" "")
1454   (match_operand:VDN 1 "register_operand" "")
1455   (match_operand:VDN 2 "register_operand" "")]
1456  "TARGET_SIMD"
1457 {
1458   rtx tempreg = gen_reg_rtx (<VDBL>mode);
1459   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1460   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1461
1462   emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1463   emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1464   emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1465   DONE;
1466 })
1467
1468 ;; For quads.
1469
1470 (define_insn "vec_pack_trunc_<mode>"
1471  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1472        (vec_concat:<VNARROWQ2>
1473          (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1474          (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1475  "TARGET_SIMD"
1476  {
1477    if (BYTES_BIG_ENDIAN)
1478      return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1479    else
1480      return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1481  }
1482   [(set_attr "type" "multiple")
1483    (set_attr "length" "8")]
1484 )
1485
1486 ;; Widening operations.
1487
1488 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1489   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1490         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1491                                (match_operand:VQW 1 "register_operand" "w")
1492                                (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1493                             )))]
1494   "TARGET_SIMD"
1495   "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1496   [(set_attr "type" "neon_shift_imm_long")]
1497 )
1498
1499 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1500   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1501         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1502                                (match_operand:VQW 1 "register_operand" "w")
1503                                (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1504                             )))]
1505   "TARGET_SIMD"
1506   "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1507   [(set_attr "type" "neon_shift_imm_long")]
1508 )
1509
1510 (define_expand "vec_unpack<su>_hi_<mode>"
1511   [(match_operand:<VWIDE> 0 "register_operand" "")
1512    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1513   "TARGET_SIMD"
1514   {
1515     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1516     emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1517                                                           operands[1], p));
1518     DONE;
1519   }
1520 )
1521
1522 (define_expand "vec_unpack<su>_lo_<mode>"
1523   [(match_operand:<VWIDE> 0 "register_operand" "")
1524    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1525   "TARGET_SIMD"
1526   {
1527     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1528     emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1529                                                           operands[1], p));
1530     DONE;
1531   }
1532 )
1533
1534 ;; Widening arithmetic.
1535
1536 (define_insn "*aarch64_<su>mlal_lo<mode>"
1537   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1538         (plus:<VWIDE>
1539           (mult:<VWIDE>
1540               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1541                  (match_operand:VQW 2 "register_operand" "w")
1542                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1543               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1544                  (match_operand:VQW 4 "register_operand" "w")
1545                  (match_dup 3))))
1546           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1547   "TARGET_SIMD"
1548   "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1549   [(set_attr "type" "neon_mla_<Vetype>_long")]
1550 )
1551
1552 (define_insn "*aarch64_<su>mlal_hi<mode>"
1553   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1554         (plus:<VWIDE>
1555           (mult:<VWIDE>
1556               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1557                  (match_operand:VQW 2 "register_operand" "w")
1558                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1559               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1560                  (match_operand:VQW 4 "register_operand" "w")
1561                  (match_dup 3))))
1562           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1563   "TARGET_SIMD"
1564   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1565   [(set_attr "type" "neon_mla_<Vetype>_long")]
1566 )
1567
1568 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1569   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1570         (minus:<VWIDE>
1571           (match_operand:<VWIDE> 1 "register_operand" "0")
1572           (mult:<VWIDE>
1573               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1574                  (match_operand:VQW 2 "register_operand" "w")
1575                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1576               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1577                  (match_operand:VQW 4 "register_operand" "w")
1578                  (match_dup 3))))))]
1579   "TARGET_SIMD"
1580   "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1581   [(set_attr "type" "neon_mla_<Vetype>_long")]
1582 )
1583
1584 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1585   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1586         (minus:<VWIDE>
1587           (match_operand:<VWIDE> 1 "register_operand" "0")
1588           (mult:<VWIDE>
1589               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1590                  (match_operand:VQW 2 "register_operand" "w")
1591                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1592               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1593                  (match_operand:VQW 4 "register_operand" "w")
1594                  (match_dup 3))))))]
1595   "TARGET_SIMD"
1596   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1597   [(set_attr "type" "neon_mla_<Vetype>_long")]
1598 )
1599
1600 (define_insn "*aarch64_<su>mlal<mode>"
1601   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1602         (plus:<VWIDE>
1603           (mult:<VWIDE>
1604             (ANY_EXTEND:<VWIDE>
1605               (match_operand:VD_BHSI 1 "register_operand" "w"))
1606             (ANY_EXTEND:<VWIDE>
1607               (match_operand:VD_BHSI 2 "register_operand" "w")))
1608           (match_operand:<VWIDE> 3 "register_operand" "0")))]
1609   "TARGET_SIMD"
1610   "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1611   [(set_attr "type" "neon_mla_<Vetype>_long")]
1612 )
1613
1614 (define_insn "*aarch64_<su>mlsl<mode>"
1615   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1616         (minus:<VWIDE>
1617           (match_operand:<VWIDE> 1 "register_operand" "0")
1618           (mult:<VWIDE>
1619             (ANY_EXTEND:<VWIDE>
1620               (match_operand:VD_BHSI 2 "register_operand" "w"))
1621             (ANY_EXTEND:<VWIDE>
1622               (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1623   "TARGET_SIMD"
1624   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1625   [(set_attr "type" "neon_mla_<Vetype>_long")]
1626 )
1627
1628 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1629  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1630        (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1631                            (match_operand:VQW 1 "register_operand" "w")
1632                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1633                      (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1634                            (match_operand:VQW 2 "register_operand" "w")
1635                            (match_dup 3)))))]
1636   "TARGET_SIMD"
1637   "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1638   [(set_attr "type" "neon_mul_<Vetype>_long")]
1639 )
1640
1641 (define_expand "vec_widen_<su>mult_lo_<mode>"
1642   [(match_operand:<VWIDE> 0 "register_operand" "")
1643    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1644    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1645  "TARGET_SIMD"
1646  {
1647    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1648    emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1649                                                        operands[1],
1650                                                        operands[2], p));
1651    DONE;
1652  }
1653 )
1654
1655 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1656  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1657       (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1658                             (match_operand:VQW 1 "register_operand" "w")
1659                             (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1660                     (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1661                             (match_operand:VQW 2 "register_operand" "w")
1662                             (match_dup 3)))))]
1663   "TARGET_SIMD"
1664   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1665   [(set_attr "type" "neon_mul_<Vetype>_long")]
1666 )
1667
1668 (define_expand "vec_widen_<su>mult_hi_<mode>"
1669   [(match_operand:<VWIDE> 0 "register_operand" "")
1670    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1671    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1672  "TARGET_SIMD"
1673  {
1674    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1675    emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1676                                                        operands[1],
1677                                                        operands[2], p));
1678    DONE;
1679
1680  }
1681 )
1682
1683 ;; FP vector operations.
1684 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1685 ;; double-precision (64-bit) floating-point data types and arithmetic as
1686 ;; defined by the IEEE 754-2008 standard.  This makes them vectorizable
1687 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1688 ;;
1689 ;; Floating-point operations can raise an exception.  Vectorizing such
1690 ;; operations are safe because of reasons explained below.
1691 ;;
1692 ;; ARMv8 permits an extension to enable trapped floating-point
1693 ;; exception handling, however this is an optional feature.  In the
1694 ;; event of a floating-point exception being raised by vectorised
1695 ;; code then:
1696 ;; 1.  If trapped floating-point exceptions are available, then a trap
1697 ;;     will be taken when any lane raises an enabled exception.  A trap
1698 ;;     handler may determine which lane raised the exception.
1699 ;; 2.  Alternatively a sticky exception flag is set in the
1700 ;;     floating-point status register (FPSR).  Software may explicitly
1701 ;;     test the exception flags, in which case the tests will either
1702 ;;     prevent vectorisation, allowing precise identification of the
1703 ;;     failing operation, or if tested outside of vectorisable regions
1704 ;;     then the specific operation and lane are not of interest.
1705
1706 ;; FP arithmetic operations.
1707
1708 (define_insn "add<mode>3"
1709  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1710        (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1711                    (match_operand:VHSDF 2 "register_operand" "w")))]
1712  "TARGET_SIMD"
1713  "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1714   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1715 )
1716
1717 (define_insn "sub<mode>3"
1718  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1719        (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1720                     (match_operand:VHSDF 2 "register_operand" "w")))]
1721  "TARGET_SIMD"
1722  "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1723   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1724 )
1725
1726 (define_insn "mul<mode>3"
1727  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1728        (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1729                    (match_operand:VHSDF 2 "register_operand" "w")))]
1730  "TARGET_SIMD"
1731  "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1732   [(set_attr "type" "neon_fp_mul_<stype><q>")]
1733 )
1734
1735 (define_expand "div<mode>3"
1736  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1737        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1738                   (match_operand:VHSDF 2 "register_operand" "w")))]
1739  "TARGET_SIMD"
1740 {
1741   if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1742     DONE;
1743
1744   operands[1] = force_reg (<MODE>mode, operands[1]);
1745 })
1746
1747 (define_insn "*div<mode>3"
1748  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1749        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1750                  (match_operand:VHSDF 2 "register_operand" "w")))]
1751  "TARGET_SIMD"
1752  "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1753   [(set_attr "type" "neon_fp_div_<stype><q>")]
1754 )
1755
1756 (define_insn "neg<mode>2"
1757  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1758        (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1759  "TARGET_SIMD"
1760  "fneg\\t%0.<Vtype>, %1.<Vtype>"
1761   [(set_attr "type" "neon_fp_neg_<stype><q>")]
1762 )
1763
1764 (define_insn "abs<mode>2"
1765  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1766        (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1767  "TARGET_SIMD"
1768  "fabs\\t%0.<Vtype>, %1.<Vtype>"
1769   [(set_attr "type" "neon_fp_abs_<stype><q>")]
1770 )
1771
1772 (define_insn "fma<mode>4"
1773   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1774        (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1775                   (match_operand:VHSDF 2 "register_operand" "w")
1776                   (match_operand:VHSDF 3 "register_operand" "0")))]
1777   "TARGET_SIMD"
1778  "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1779   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1780 )
1781
1782 (define_insn "*aarch64_fma4_elt<mode>"
1783   [(set (match_operand:VDQF 0 "register_operand" "=w")
1784     (fma:VDQF
1785       (vec_duplicate:VDQF
1786         (vec_select:<VEL>
1787           (match_operand:VDQF 1 "register_operand" "<h_con>")
1788           (parallel [(match_operand:SI 2 "immediate_operand")])))
1789       (match_operand:VDQF 3 "register_operand" "w")
1790       (match_operand:VDQF 4 "register_operand" "0")))]
1791   "TARGET_SIMD"
1792   {
1793     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1794     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1795   }
1796   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1797 )
1798
1799 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1800   [(set (match_operand:VDQSF 0 "register_operand" "=w")
1801     (fma:VDQSF
1802       (vec_duplicate:VDQSF
1803         (vec_select:<VEL>
1804           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1805           (parallel [(match_operand:SI 2 "immediate_operand")])))
1806       (match_operand:VDQSF 3 "register_operand" "w")
1807       (match_operand:VDQSF 4 "register_operand" "0")))]
1808   "TARGET_SIMD"
1809   {
1810     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1811     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1812   }
1813   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1814 )
1815
1816 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1817   [(set (match_operand:VMUL 0 "register_operand" "=w")
1818     (fma:VMUL
1819       (vec_duplicate:VMUL
1820           (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1821       (match_operand:VMUL 2 "register_operand" "w")
1822       (match_operand:VMUL 3 "register_operand" "0")))]
1823   "TARGET_SIMD"
1824   "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1825   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1826 )
1827
1828 (define_insn "*aarch64_fma4_elt_to_64v2df"
1829   [(set (match_operand:DF 0 "register_operand" "=w")
1830     (fma:DF
1831         (vec_select:DF
1832           (match_operand:V2DF 1 "register_operand" "w")
1833           (parallel [(match_operand:SI 2 "immediate_operand")]))
1834       (match_operand:DF 3 "register_operand" "w")
1835       (match_operand:DF 4 "register_operand" "0")))]
1836   "TARGET_SIMD"
1837   {
1838     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1839     return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1840   }
1841   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1842 )
1843
1844 (define_insn "fnma<mode>4"
1845   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1846         (fma:VHSDF
1847           (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1848           (match_operand:VHSDF 2 "register_operand" "w")
1849           (match_operand:VHSDF 3 "register_operand" "0")))]
1850   "TARGET_SIMD"
1851   "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1852   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1853 )
1854
1855 (define_insn "*aarch64_fnma4_elt<mode>"
1856   [(set (match_operand:VDQF 0 "register_operand" "=w")
1857     (fma:VDQF
1858       (neg:VDQF
1859         (match_operand:VDQF 3 "register_operand" "w"))
1860       (vec_duplicate:VDQF
1861         (vec_select:<VEL>
1862           (match_operand:VDQF 1 "register_operand" "<h_con>")
1863           (parallel [(match_operand:SI 2 "immediate_operand")])))
1864       (match_operand:VDQF 4 "register_operand" "0")))]
1865   "TARGET_SIMD"
1866   {
1867     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1868     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1869   }
1870   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1871 )
1872
1873 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1874   [(set (match_operand:VDQSF 0 "register_operand" "=w")
1875     (fma:VDQSF
1876       (neg:VDQSF
1877         (match_operand:VDQSF 3 "register_operand" "w"))
1878       (vec_duplicate:VDQSF
1879         (vec_select:<VEL>
1880           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1881           (parallel [(match_operand:SI 2 "immediate_operand")])))
1882       (match_operand:VDQSF 4 "register_operand" "0")))]
1883   "TARGET_SIMD"
1884   {
1885     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1886     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1887   }
1888   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1889 )
1890
1891 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1892   [(set (match_operand:VMUL 0 "register_operand" "=w")
1893     (fma:VMUL
1894       (neg:VMUL
1895         (match_operand:VMUL 2 "register_operand" "w"))
1896       (vec_duplicate:VMUL
1897         (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1898       (match_operand:VMUL 3 "register_operand" "0")))]
1899   "TARGET_SIMD"
1900   "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1901   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1902 )
1903
1904 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1905   [(set (match_operand:DF 0 "register_operand" "=w")
1906     (fma:DF
1907       (vec_select:DF
1908         (match_operand:V2DF 1 "register_operand" "w")
1909         (parallel [(match_operand:SI 2 "immediate_operand")]))
1910       (neg:DF
1911         (match_operand:DF 3 "register_operand" "w"))
1912       (match_operand:DF 4 "register_operand" "0")))]
1913   "TARGET_SIMD"
1914   {
1915     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1916     return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1917   }
1918   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1919 )
1920
1921 ;; Vector versions of the floating-point frint patterns.
1922 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1923 (define_insn "<frint_pattern><mode>2"
1924   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1925         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1926                        FRINT))]
1927   "TARGET_SIMD"
1928   "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1929   [(set_attr "type" "neon_fp_round_<stype><q>")]
1930 )
1931
1932 ;; Vector versions of the fcvt standard patterns.
1933 ;; Expands to lbtrunc, lround, lceil, lfloor
1934 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1935   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1936         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1937                                [(match_operand:VHSDF 1 "register_operand" "w")]
1938                                FCVT)))]
1939   "TARGET_SIMD"
1940   "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1941   [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1942 )
1943
1944 ;; HF Scalar variants of related SIMD instructions.
1945 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1946   [(set (match_operand:HI 0 "register_operand" "=w")
1947         (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1948                       FCVT)))]
1949   "TARGET_SIMD_F16INST"
1950   "fcvt<frint_suffix><su>\t%h0, %h1"
1951   [(set_attr "type" "neon_fp_to_int_s")]
1952 )
1953
1954 (define_insn "<optab>_trunchfhi2"
1955   [(set (match_operand:HI 0 "register_operand" "=w")
1956         (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1957   "TARGET_SIMD_F16INST"
1958   "fcvtz<su>\t%h0, %h1"
1959   [(set_attr "type" "neon_fp_to_int_s")]
1960 )
1961
1962 (define_insn "<optab>hihf2"
1963   [(set (match_operand:HF 0 "register_operand" "=w")
1964         (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1965   "TARGET_SIMD_F16INST"
1966   "<su_optab>cvtf\t%h0, %h1"
1967   [(set_attr "type" "neon_int_to_fp_s")]
1968 )
1969
1970 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1971   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1972         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1973                                [(mult:VDQF
1974          (match_operand:VDQF 1 "register_operand" "w")
1975          (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1976                                UNSPEC_FRINTZ)))]
1977   "TARGET_SIMD
1978    && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1979                 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1980   {
1981     int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1982     char buf[64];
1983     snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1984     output_asm_insn (buf, operands);
1985     return "";
1986   }
1987   [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1988 )
1989
1990 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
1991   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1992         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1993                                [(match_operand:VHSDF 1 "register_operand")]
1994                                 UNSPEC_FRINTZ)))]
1995   "TARGET_SIMD"
1996   {})
1997
1998 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
1999   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2000         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2001                                [(match_operand:VHSDF 1 "register_operand")]
2002                                 UNSPEC_FRINTZ)))]
2003   "TARGET_SIMD"
2004   {})
2005
2006 (define_expand "ftrunc<VHSDF:mode>2"
2007   [(set (match_operand:VHSDF 0 "register_operand")
2008         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2009                        UNSPEC_FRINTZ))]
2010   "TARGET_SIMD"
2011   {})
2012
2013 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2014   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2015         (FLOATUORS:VHSDF
2016           (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2017   "TARGET_SIMD"
2018   "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2019   [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2020 )
2021
2022 ;; Conversions between vectors of floats and doubles.
2023 ;; Contains a mix of patterns to match standard pattern names
2024 ;; and those for intrinsics.
2025
2026 ;; Float widening operations.
2027
2028 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2029   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2030         (float_extend:<VWIDE> (vec_select:<VHALF>
2031                                (match_operand:VQ_HSF 1 "register_operand" "w")
2032                                (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2033                             )))]
2034   "TARGET_SIMD"
2035   "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2036   [(set_attr "type" "neon_fp_cvt_widen_s")]
2037 )
2038
2039 ;; Convert between fixed-point and floating-point (vector modes)
2040
2041 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2042   [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2043         (unspec:<VHSDF:FCVT_TARGET>
2044           [(match_operand:VHSDF 1 "register_operand" "w")
2045            (match_operand:SI 2 "immediate_operand" "i")]
2046          FCVT_F2FIXED))]
2047   "TARGET_SIMD"
2048   "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2049   [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2050 )
2051
2052 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2053   [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2054         (unspec:<VDQ_HSDI:FCVT_TARGET>
2055           [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2056            (match_operand:SI 2 "immediate_operand" "i")]
2057          FCVT_FIXED2F))]
2058   "TARGET_SIMD"
2059   "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2060   [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2061 )
2062
2063 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2064 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2065 ;; the meaning of HI and LO changes depending on the target endianness.
2066 ;; While elsewhere we map the higher numbered elements of a vector to
2067 ;; the lower architectural lanes of the vector, for these patterns we want
2068 ;; to always treat "hi" as referring to the higher architectural lanes.
2069 ;; Consequently, while the patterns below look inconsistent with our
2070 ;; other big-endian patterns their behavior is as required.
2071
2072 (define_expand "vec_unpacks_lo_<mode>"
2073   [(match_operand:<VWIDE> 0 "register_operand" "")
2074    (match_operand:VQ_HSF 1 "register_operand" "")]
2075   "TARGET_SIMD"
2076   {
2077     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2078     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2079                                                        operands[1], p));
2080     DONE;
2081   }
2082 )
2083
2084 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2085   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2086         (float_extend:<VWIDE> (vec_select:<VHALF>
2087                                (match_operand:VQ_HSF 1 "register_operand" "w")
2088                                (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2089                             )))]
2090   "TARGET_SIMD"
2091   "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2092   [(set_attr "type" "neon_fp_cvt_widen_s")]
2093 )
2094
2095 (define_expand "vec_unpacks_hi_<mode>"
2096   [(match_operand:<VWIDE> 0 "register_operand" "")
2097    (match_operand:VQ_HSF 1 "register_operand" "")]
2098   "TARGET_SIMD"
2099   {
2100     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2101     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2102                                                        operands[1], p));
2103     DONE;
2104   }
2105 )
2106 (define_insn "aarch64_float_extend_lo_<Vwide>"
2107   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2108         (float_extend:<VWIDE>
2109           (match_operand:VDF 1 "register_operand" "w")))]
2110   "TARGET_SIMD"
2111   "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2112   [(set_attr "type" "neon_fp_cvt_widen_s")]
2113 )
2114
2115 ;; Float narrowing operations.
2116
2117 (define_insn "aarch64_float_truncate_lo_<mode>"
2118   [(set (match_operand:VDF 0 "register_operand" "=w")
2119       (float_truncate:VDF
2120         (match_operand:<VWIDE> 1 "register_operand" "w")))]
2121   "TARGET_SIMD"
2122   "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2123   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2124 )
2125
2126 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2127   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2128     (vec_concat:<VDBL>
2129       (match_operand:VDF 1 "register_operand" "0")
2130       (float_truncate:VDF
2131         (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2132   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2133   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2134   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2135 )
2136
2137 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2138   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2139     (vec_concat:<VDBL>
2140       (float_truncate:VDF
2141         (match_operand:<VWIDE> 2 "register_operand" "w"))
2142       (match_operand:VDF 1 "register_operand" "0")))]
2143   "TARGET_SIMD && BYTES_BIG_ENDIAN"
2144   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2145   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2146 )
2147
2148 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2149   [(match_operand:<VDBL> 0 "register_operand" "=w")
2150    (match_operand:VDF 1 "register_operand" "0")
2151    (match_operand:<VWIDE> 2 "register_operand" "w")]
2152   "TARGET_SIMD"
2153 {
2154   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2155                              ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2156                              : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2157   emit_insn (gen (operands[0], operands[1], operands[2]));
2158   DONE;
2159 }
2160 )
2161
2162 (define_expand "vec_pack_trunc_v2df"
2163   [(set (match_operand:V4SF 0 "register_operand")
2164       (vec_concat:V4SF
2165         (float_truncate:V2SF
2166             (match_operand:V2DF 1 "register_operand"))
2167         (float_truncate:V2SF
2168             (match_operand:V2DF 2 "register_operand"))
2169           ))]
2170   "TARGET_SIMD"
2171   {
2172     rtx tmp = gen_reg_rtx (V2SFmode);
2173     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2174     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2175
2176     emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2177     emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2178                                                    tmp, operands[hi]));
2179     DONE;
2180   }
2181 )
2182
2183 (define_expand "vec_pack_trunc_df"
2184   [(set (match_operand:V2SF 0 "register_operand")
2185       (vec_concat:V2SF
2186         (float_truncate:SF
2187             (match_operand:DF 1 "register_operand"))
2188         (float_truncate:SF
2189             (match_operand:DF 2 "register_operand"))
2190           ))]
2191   "TARGET_SIMD"
2192   {
2193     rtx tmp = gen_reg_rtx (V2SFmode);
2194     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2195     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2196
2197     emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2198     emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2199     emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2200     DONE;
2201   }
2202 )
2203
2204 ;; FP Max/Min
2205 ;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
2206 ;; expression like:
2207 ;;      a = (b < c) ? b : c;
2208 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
2209 ;; either explicitly or indirectly via -ffast-math.
2210 ;;
2211 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2212 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2213 ;; operand will be returned when both operands are zero (i.e. they may not
2214 ;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
2215 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2216 ;; NaNs.
2217
2218 (define_insn "<su><maxmin><mode>3"
2219   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2220         (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2221                        (match_operand:VHSDF 2 "register_operand" "w")))]
2222   "TARGET_SIMD"
2223   "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2224   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2225 )
2226
2227 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2228 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2229 ;; which implement the IEEE fmax ()/fmin () functions.
2230 (define_insn "<maxmin_uns><mode>3"
2231   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2232        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2233                       (match_operand:VHSDF 2 "register_operand" "w")]
2234                       FMAXMIN_UNS))]
2235   "TARGET_SIMD"
2236   "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2237   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2238 )
2239
2240 ;; 'across lanes' add.
2241
2242 (define_expand "reduc_plus_scal_<mode>"
2243   [(match_operand:<VEL> 0 "register_operand" "=w")
2244    (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2245                UNSPEC_ADDV)]
2246   "TARGET_SIMD"
2247   {
2248     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2249     rtx scratch = gen_reg_rtx (<MODE>mode);
2250     emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2251     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2252     DONE;
2253   }
2254 )
2255
2256 (define_insn "aarch64_faddp<mode>"
2257  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2258        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2259                       (match_operand:VHSDF 2 "register_operand" "w")]
2260         UNSPEC_FADDV))]
2261  "TARGET_SIMD"
2262  "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2263   [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2264 )
2265
2266 (define_insn "aarch64_reduc_plus_internal<mode>"
2267  [(set (match_operand:VDQV 0 "register_operand" "=w")
2268        (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2269                     UNSPEC_ADDV))]
2270  "TARGET_SIMD"
2271  "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2272   [(set_attr "type" "neon_reduc_add<q>")]
2273 )
2274
2275 (define_insn "aarch64_reduc_plus_internalv2si"
2276  [(set (match_operand:V2SI 0 "register_operand" "=w")
2277        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2278                     UNSPEC_ADDV))]
2279  "TARGET_SIMD"
2280  "addp\\t%0.2s, %1.2s, %1.2s"
2281   [(set_attr "type" "neon_reduc_add")]
2282 )
2283
2284 (define_insn "reduc_plus_scal_<mode>"
2285  [(set (match_operand:<VEL> 0 "register_operand" "=w")
2286        (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2287                    UNSPEC_FADDV))]
2288  "TARGET_SIMD"
2289  "faddp\\t%<Vetype>0, %1.<Vtype>"
2290   [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2291 )
2292
2293 (define_expand "reduc_plus_scal_v4sf"
2294  [(set (match_operand:SF 0 "register_operand")
2295        (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2296                     UNSPEC_FADDV))]
2297  "TARGET_SIMD"
2298 {
2299   rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2300   rtx scratch = gen_reg_rtx (V4SFmode);
2301   emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2302   emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2303   emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2304   DONE;
2305 })
2306
2307 (define_insn "clrsb<mode>2"
2308   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2309         (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2310   "TARGET_SIMD"
2311   "cls\\t%0.<Vtype>, %1.<Vtype>"
2312   [(set_attr "type" "neon_cls<q>")]
2313 )
2314
2315 (define_insn "clz<mode>2"
2316  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2317        (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2318  "TARGET_SIMD"
2319  "clz\\t%0.<Vtype>, %1.<Vtype>"
2320   [(set_attr "type" "neon_cls<q>")]
2321 )
2322
2323 (define_insn "popcount<mode>2"
2324   [(set (match_operand:VB 0 "register_operand" "=w")
2325         (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2326   "TARGET_SIMD"
2327   "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2328   [(set_attr "type" "neon_cnt<q>")]
2329 )
2330
2331 ;; 'across lanes' max and min ops.
2332
2333 ;; Template for outputting a scalar, so we can create __builtins which can be
2334 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function.  (This is FP smax/smin).
2335 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2336   [(match_operand:<VEL> 0 "register_operand")
2337    (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2338                   FMAXMINV)]
2339   "TARGET_SIMD"
2340   {
2341     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2342     rtx scratch = gen_reg_rtx (<MODE>mode);
2343     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2344                                                               operands[1]));
2345     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2346     DONE;
2347   }
2348 )
2349
2350 ;; Likewise for integer cases, signed and unsigned.
2351 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2352   [(match_operand:<VEL> 0 "register_operand")
2353    (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2354                     MAXMINV)]
2355   "TARGET_SIMD"
2356   {
2357     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2358     rtx scratch = gen_reg_rtx (<MODE>mode);
2359     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2360                                                               operands[1]));
2361     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2362     DONE;
2363   }
2364 )
2365
2366 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2367  [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2368        (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2369                     MAXMINV))]
2370  "TARGET_SIMD"
2371  "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2372   [(set_attr "type" "neon_reduc_minmax<q>")]
2373 )
2374
2375 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2376  [(set (match_operand:V2SI 0 "register_operand" "=w")
2377        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2378                     MAXMINV))]
2379  "TARGET_SIMD"
2380  "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2381   [(set_attr "type" "neon_reduc_minmax")]
2382 )
2383
2384 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2385  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2386        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2387                       FMAXMINV))]
2388  "TARGET_SIMD"
2389  "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2390   [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2391 )
2392
2393 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2394 ;; allocation.
2395 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2396 ;; to select.
2397 ;;
2398 ;; Thus our BSL is of the form:
2399 ;;   op0 = bsl (mask, op2, op3)
2400 ;; We can use any of:
2401 ;;
2402 ;;   if (op0 = mask)
2403 ;;     bsl mask, op1, op2
2404 ;;   if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2405 ;;     bit op0, op2, mask
2406 ;;   if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2407 ;;     bif op0, op1, mask
2408 ;;
2409 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2410 ;; Some forms of straight-line code may generate the equivalent form
2411 ;; in *aarch64_simd_bsl<mode>_alt.
2412
2413 (define_insn "aarch64_simd_bsl<mode>_internal"
2414   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2415         (xor:VDQ_I
2416            (and:VDQ_I
2417              (xor:VDQ_I
2418                (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2419                (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2420              (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2421           (match_dup:<V_INT_EQUIV> 3)
2422         ))]
2423   "TARGET_SIMD"
2424   "@
2425   bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2426   bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2427   bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2428   [(set_attr "type" "neon_bsl<q>")]
2429 )
2430
2431 ;; We need this form in addition to the above pattern to match the case
2432 ;; when combine tries merging three insns such that the second operand of
2433 ;; the outer XOR matches the second operand of the inner XOR rather than
2434 ;; the first.  The two are equivalent but since recog doesn't try all
2435 ;; permutations of commutative operations, we have to have a separate pattern.
2436
2437 (define_insn "*aarch64_simd_bsl<mode>_alt"
2438   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2439         (xor:VDQ_I
2440            (and:VDQ_I
2441              (xor:VDQ_I
2442                (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2443                (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2444               (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2445           (match_dup:<V_INT_EQUIV> 2)))]
2446   "TARGET_SIMD"
2447   "@
2448   bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2449   bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2450   bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2451   [(set_attr "type" "neon_bsl<q>")]
2452 )
2453
2454 ;; DImode is special, we want to avoid computing operations which are
2455 ;; more naturally computed in general purpose registers in the vector
2456 ;; registers.  If we do that, we need to move all three operands from general
2457 ;; purpose registers to vector registers, then back again.  However, we
2458 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2459 ;; optimizations based on the component operations of a BSL.
2460 ;;
2461 ;; That means we need a splitter back to the individual operations, if they
2462 ;; would be better calculated on the integer side.
2463
2464 (define_insn_and_split "aarch64_simd_bsldi_internal"
2465   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2466         (xor:DI
2467            (and:DI
2468              (xor:DI
2469                (match_operand:DI 3 "register_operand" "w,0,w,r")
2470                (match_operand:DI 2 "register_operand" "w,w,0,r"))
2471              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2472           (match_dup:DI 3)
2473         ))]
2474   "TARGET_SIMD"
2475   "@
2476   bsl\\t%0.8b, %2.8b, %3.8b
2477   bit\\t%0.8b, %2.8b, %1.8b
2478   bif\\t%0.8b, %3.8b, %1.8b
2479   #"
2480   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2481   [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2482 {
2483   /* Split back to individual operations.  If we're before reload, and
2484      able to create a temporary register, do so.  If we're after reload,
2485      we've got an early-clobber destination register, so use that.
2486      Otherwise, we can't create pseudos and we can't yet guarantee that
2487      operands[0] is safe to write, so FAIL to split.  */
2488
2489   rtx scratch;
2490   if (reload_completed)
2491     scratch = operands[0];
2492   else if (can_create_pseudo_p ())
2493     scratch = gen_reg_rtx (DImode);
2494   else
2495     FAIL;
2496
2497   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2498   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2499   emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2500   DONE;
2501 }
2502   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2503    (set_attr "length" "4,4,4,12")]
2504 )
2505
2506 (define_insn_and_split "aarch64_simd_bsldi_alt"
2507   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2508         (xor:DI
2509            (and:DI
2510              (xor:DI
2511                (match_operand:DI 3 "register_operand" "w,w,0,r")
2512                (match_operand:DI 2 "register_operand" "w,0,w,r"))
2513              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2514           (match_dup:DI 2)
2515         ))]
2516   "TARGET_SIMD"
2517   "@
2518   bsl\\t%0.8b, %3.8b, %2.8b
2519   bit\\t%0.8b, %3.8b, %1.8b
2520   bif\\t%0.8b, %2.8b, %1.8b
2521   #"
2522   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2523   [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2524 {
2525   /* Split back to individual operations.  If we're before reload, and
2526      able to create a temporary register, do so.  If we're after reload,
2527      we've got an early-clobber destination register, so use that.
2528      Otherwise, we can't create pseudos and we can't yet guarantee that
2529      operands[0] is safe to write, so FAIL to split.  */
2530
2531   rtx scratch;
2532   if (reload_completed)
2533     scratch = operands[0];
2534   else if (can_create_pseudo_p ())
2535     scratch = gen_reg_rtx (DImode);
2536   else
2537     FAIL;
2538
2539   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2540   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2541   emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2542   DONE;
2543 }
2544   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2545    (set_attr "length" "4,4,4,12")]
2546 )
2547
2548 (define_expand "aarch64_simd_bsl<mode>"
2549   [(match_operand:VALLDIF 0 "register_operand")
2550    (match_operand:<V_INT_EQUIV> 1 "register_operand")
2551    (match_operand:VALLDIF 2 "register_operand")
2552    (match_operand:VALLDIF 3 "register_operand")]
2553  "TARGET_SIMD"
2554 {
2555   /* We can't alias operands together if they have different modes.  */
2556   rtx tmp = operands[0];
2557   if (FLOAT_MODE_P (<MODE>mode))
2558     {
2559       operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2560       operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2561       tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2562     }
2563   operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2564   emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2565                                                          operands[1],
2566                                                          operands[2],
2567                                                          operands[3]));
2568   if (tmp != operands[0])
2569     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2570
2571   DONE;
2572 })
2573
2574 (define_expand "vcond_mask_<mode><v_int_equiv>"
2575   [(match_operand:VALLDI 0 "register_operand")
2576    (match_operand:VALLDI 1 "nonmemory_operand")
2577    (match_operand:VALLDI 2 "nonmemory_operand")
2578    (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2579   "TARGET_SIMD"
2580 {
2581   /* If we have (a = (P) ? -1 : 0);
2582      Then we can simply move the generated mask (result must be int).  */
2583   if (operands[1] == CONSTM1_RTX (<MODE>mode)
2584       && operands[2] == CONST0_RTX (<MODE>mode))
2585     emit_move_insn (operands[0], operands[3]);
2586   /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask.  */
2587   else if (operands[1] == CONST0_RTX (<MODE>mode)
2588            && operands[2] == CONSTM1_RTX (<MODE>mode))
2589     emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2590   else
2591     {
2592       if (!REG_P (operands[1]))
2593         operands[1] = force_reg (<MODE>mode, operands[1]);
2594       if (!REG_P (operands[2]))
2595         operands[2] = force_reg (<MODE>mode, operands[2]);
2596       emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2597                                              operands[1], operands[2]));
2598     }
2599
2600   DONE;
2601 })
2602
2603 ;; Patterns comparing two vectors to produce a mask.
2604
2605 (define_expand "vec_cmp<mode><mode>"
2606   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2607           (match_operator 1 "comparison_operator"
2608             [(match_operand:VSDQ_I_DI 2 "register_operand")
2609              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2610   "TARGET_SIMD"
2611 {
2612   rtx mask = operands[0];
2613   enum rtx_code code = GET_CODE (operands[1]);
2614
2615   switch (code)
2616     {
2617     case NE:
2618     case LE:
2619     case LT:
2620     case GE:
2621     case GT:
2622     case EQ:
2623       if (operands[3] == CONST0_RTX (<MODE>mode))
2624         break;
2625
2626       /* Fall through.  */
2627     default:
2628       if (!REG_P (operands[3]))
2629         operands[3] = force_reg (<MODE>mode, operands[3]);
2630
2631       break;
2632     }
2633
2634   switch (code)
2635     {
2636     case LT:
2637       emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2638       break;
2639
2640     case GE:
2641       emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2642       break;
2643
2644     case LE:
2645       emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2646       break;
2647
2648     case GT:
2649       emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2650       break;
2651
2652     case LTU:
2653       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2654       break;
2655
2656     case GEU:
2657       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2658       break;
2659
2660     case LEU:
2661       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2662       break;
2663
2664     case GTU:
2665       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2666       break;
2667
2668     case NE:
2669       /* Handle NE as !EQ.  */
2670       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2671       emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2672       break;
2673
2674     case EQ:
2675       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2676       break;
2677
2678     default:
2679       gcc_unreachable ();
2680     }
2681
2682   DONE;
2683 })
2684
2685 (define_expand "vec_cmp<mode><v_int_equiv>"
2686   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2687         (match_operator 1 "comparison_operator"
2688             [(match_operand:VDQF 2 "register_operand")
2689              (match_operand:VDQF 3 "nonmemory_operand")]))]
2690   "TARGET_SIMD"
2691 {
2692   int use_zero_form = 0;
2693   enum rtx_code code = GET_CODE (operands[1]);
2694   rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2695
2696   rtx (*comparison) (rtx, rtx, rtx) = NULL;
2697
2698   switch (code)
2699     {
2700     case LE:
2701     case LT:
2702     case GE:
2703     case GT:
2704     case EQ:
2705       if (operands[3] == CONST0_RTX (<MODE>mode))
2706         {
2707           use_zero_form = 1;
2708           break;
2709         }
2710       /* Fall through.  */
2711     default:
2712       if (!REG_P (operands[3]))
2713         operands[3] = force_reg (<MODE>mode, operands[3]);
2714
2715       break;
2716     }
2717
2718   switch (code)
2719     {
2720     case LT:
2721       if (use_zero_form)
2722         {
2723           comparison = gen_aarch64_cmlt<mode>;
2724           break;
2725         }
2726       /* Fall through.  */
2727     case UNLT:
2728       std::swap (operands[2], operands[3]);
2729       /* Fall through.  */
2730     case UNGT:
2731     case GT:
2732       comparison = gen_aarch64_cmgt<mode>;
2733       break;
2734     case LE:
2735       if (use_zero_form)
2736         {
2737           comparison = gen_aarch64_cmle<mode>;
2738           break;
2739         }
2740       /* Fall through.  */
2741     case UNLE:
2742       std::swap (operands[2], operands[3]);
2743       /* Fall through.  */
2744     case UNGE:
2745     case GE:
2746       comparison = gen_aarch64_cmge<mode>;
2747       break;
2748     case NE:
2749     case EQ:
2750       comparison = gen_aarch64_cmeq<mode>;
2751       break;
2752     case UNEQ:
2753     case ORDERED:
2754     case UNORDERED:
2755     case LTGT:
2756       break;
2757     default:
2758       gcc_unreachable ();
2759     }
2760
2761   switch (code)
2762     {
2763     case UNGE:
2764     case UNGT:
2765     case UNLE:
2766     case UNLT:
2767       {
2768         /* All of the above must not raise any FP exceptions.  Thus we first
2769            check each operand for NaNs and force any elements containing NaN to
2770            zero before using them in the compare.
2771            Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2772                                      (cm<cc> (isnan (a) ? 0.0 : a,
2773                                               isnan (b) ? 0.0 : b))
2774            We use the following transformations for doing the comparisions:
2775            a UNGE b -> a GE b
2776            a UNGT b -> a GT b
2777            a UNLE b -> b GE a
2778            a UNLT b -> b GT a.  */
2779
2780         rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2781         rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2782         rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2783         emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2784         emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2785         emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2786         emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2787                                           lowpart_subreg (<V_INT_EQUIV>mode,
2788                                                           operands[2],
2789                                                           <MODE>mode)));
2790         emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2791                                           lowpart_subreg (<V_INT_EQUIV>mode,
2792                                                           operands[3],
2793                                                           <MODE>mode)));
2794         gcc_assert (comparison != NULL);
2795         emit_insn (comparison (operands[0],
2796                                lowpart_subreg (<MODE>mode,
2797                                                tmp0, <V_INT_EQUIV>mode),
2798                                lowpart_subreg (<MODE>mode,
2799                                                tmp1, <V_INT_EQUIV>mode)));
2800         emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2801       }
2802       break;
2803
2804     case LT:
2805     case LE:
2806     case GT:
2807     case GE:
2808     case EQ:
2809     case NE:
2810       /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
2811          As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
2812          a GE b -> a GE b
2813          a GT b -> a GT b
2814          a LE b -> b GE a
2815          a LT b -> b GT a
2816          a EQ b -> a EQ b
2817          a NE b -> ~(a EQ b)  */
2818       gcc_assert (comparison != NULL);
2819       emit_insn (comparison (operands[0], operands[2], operands[3]));
2820       if (code == NE)
2821         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2822       break;
2823
2824     case LTGT:
2825       /* LTGT is not guranteed to not generate a FP exception.  So let's
2826          go the faster way : ((a > b) || (b > a)).  */
2827       emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2828                                          operands[2], operands[3]));
2829       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2830       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2831       break;
2832
2833     case ORDERED:
2834     case UNORDERED:
2835     case UNEQ:
2836       /* cmeq (a, a) & cmeq (b, b).  */
2837       emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2838                                          operands[2], operands[2]));
2839       emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2840       emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2841
2842       if (code == UNORDERED)
2843         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2844       else if (code == UNEQ)
2845         {
2846           emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
2847           emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
2848         }
2849       break;
2850
2851     default:
2852       gcc_unreachable ();
2853     }
2854
2855   DONE;
2856 })
2857
2858 (define_expand "vec_cmpu<mode><mode>"
2859   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2860           (match_operator 1 "comparison_operator"
2861             [(match_operand:VSDQ_I_DI 2 "register_operand")
2862              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2863   "TARGET_SIMD"
2864 {
2865   emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2866                                       operands[2], operands[3]));
2867   DONE;
2868 })
2869
2870 (define_expand "vcond<mode><mode>"
2871   [(set (match_operand:VALLDI 0 "register_operand")
2872         (if_then_else:VALLDI
2873           (match_operator 3 "comparison_operator"
2874             [(match_operand:VALLDI 4 "register_operand")
2875              (match_operand:VALLDI 5 "nonmemory_operand")])
2876           (match_operand:VALLDI 1 "nonmemory_operand")
2877           (match_operand:VALLDI 2 "nonmemory_operand")))]
2878   "TARGET_SIMD"
2879 {
2880   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2881   enum rtx_code code = GET_CODE (operands[3]);
2882
2883   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2884      it as well as switch operands 1/2 in order to avoid the additional
2885      NOT instruction.  */
2886   if (code == NE)
2887     {
2888       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2889                                     operands[4], operands[5]);
2890       std::swap (operands[1], operands[2]);
2891     }
2892   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2893                                              operands[4], operands[5]));
2894   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2895                                                  operands[2], mask));
2896
2897   DONE;
2898 })
2899
2900 (define_expand "vcond<v_cmp_mixed><mode>"
2901   [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2902         (if_then_else:<V_cmp_mixed>
2903           (match_operator 3 "comparison_operator"
2904             [(match_operand:VDQF_COND 4 "register_operand")
2905              (match_operand:VDQF_COND 5 "nonmemory_operand")])
2906           (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2907           (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2908   "TARGET_SIMD"
2909 {
2910   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2911   enum rtx_code code = GET_CODE (operands[3]);
2912
2913   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2914      it as well as switch operands 1/2 in order to avoid the additional
2915      NOT instruction.  */
2916   if (code == NE)
2917     {
2918       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2919                                     operands[4], operands[5]);
2920       std::swap (operands[1], operands[2]);
2921     }
2922   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2923                                              operands[4], operands[5]));
2924   emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
2925                                                 operands[0], operands[1],
2926                                                 operands[2], mask));
2927
2928   DONE;
2929 })
2930
2931 (define_expand "vcondu<mode><mode>"
2932   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2933         (if_then_else:VSDQ_I_DI
2934           (match_operator 3 "comparison_operator"
2935             [(match_operand:VSDQ_I_DI 4 "register_operand")
2936              (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2937           (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2938           (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2939   "TARGET_SIMD"
2940 {
2941   rtx mask = gen_reg_rtx (<MODE>mode);
2942   enum rtx_code code = GET_CODE (operands[3]);
2943
2944   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2945      it as well as switch operands 1/2 in order to avoid the additional
2946      NOT instruction.  */
2947   if (code == NE)
2948     {
2949       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2950                                     operands[4], operands[5]);
2951       std::swap (operands[1], operands[2]);
2952     }
2953   emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2954                                       operands[4], operands[5]));
2955   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2956                                                  operands[2], mask));
2957   DONE;
2958 })
2959
2960 (define_expand "vcondu<mode><v_cmp_mixed>"
2961   [(set (match_operand:VDQF 0 "register_operand")
2962         (if_then_else:VDQF
2963           (match_operator 3 "comparison_operator"
2964             [(match_operand:<V_cmp_mixed> 4 "register_operand")
2965              (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2966           (match_operand:VDQF 1 "nonmemory_operand")
2967           (match_operand:VDQF 2 "nonmemory_operand")))]
2968   "TARGET_SIMD"
2969 {
2970   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2971   enum rtx_code code = GET_CODE (operands[3]);
2972
2973   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2974      it as well as switch operands 1/2 in order to avoid the additional
2975      NOT instruction.  */
2976   if (code == NE)
2977     {
2978       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2979                                     operands[4], operands[5]);
2980       std::swap (operands[1], operands[2]);
2981     }
2982   emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
2983                                                   mask, operands[3],
2984                                                   operands[4], operands[5]));
2985   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2986                                                  operands[2], mask));
2987   DONE;
2988 })
2989
2990 ;; Patterns for AArch64 SIMD Intrinsics.
2991
2992 ;; Lane extraction with sign extension to general purpose register.
2993 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2994   [(set (match_operand:GPI 0 "register_operand" "=r")
2995         (sign_extend:GPI
2996           (vec_select:<VEL>
2997             (match_operand:VDQQH 1 "register_operand" "w")
2998             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2999   "TARGET_SIMD"
3000   {
3001     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3002     return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3003   }
3004   [(set_attr "type" "neon_to_gp<q>")]
3005 )
3006
3007 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
3008   [(set (match_operand:SI 0 "register_operand" "=r")
3009         (zero_extend:SI
3010           (vec_select:<VEL>
3011             (match_operand:VDQQH 1 "register_operand" "w")
3012             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3013   "TARGET_SIMD"
3014   {
3015     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3016     return "umov\\t%w0, %1.<Vetype>[%2]";
3017   }
3018   [(set_attr "type" "neon_to_gp<q>")]
3019 )
3020
3021 ;; Lane extraction of a value, neither sign nor zero extension
3022 ;; is guaranteed so upper bits should be considered undefined.
3023 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3024 (define_insn "aarch64_get_lane<mode>"
3025   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3026         (vec_select:<VEL>
3027           (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3028           (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3029   "TARGET_SIMD"
3030   {
3031     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3032     switch (which_alternative)
3033       {
3034         case 0:
3035           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3036         case 1:
3037           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3038         case 2:
3039           return "st1\\t{%1.<Vetype>}[%2], %0";
3040         default:
3041           gcc_unreachable ();
3042       }
3043   }
3044   [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3045 )
3046
3047 (define_insn "load_pair_lanes<mode>"
3048   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3049         (vec_concat:<VDBL>
3050            (match_operand:VDC 1 "memory_operand" "Utq")
3051            (match_operand:VDC 2 "memory_operand" "m")))]
3052   "TARGET_SIMD && !STRICT_ALIGNMENT
3053    && rtx_equal_p (XEXP (operands[2], 0),
3054                    plus_constant (Pmode,
3055                                   XEXP (operands[1], 0),
3056                                   GET_MODE_SIZE (<MODE>mode)))"
3057   "ldr\\t%q0, %1"
3058   [(set_attr "type" "neon_load1_1reg_q")]
3059 )
3060
3061 (define_insn "store_pair_lanes<mode>"
3062   [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Uml, Uml")
3063         (vec_concat:<VDBL>
3064            (match_operand:VDC 1 "register_operand" "w, r")
3065            (match_operand:VDC 2 "register_operand" "w, r")))]
3066   "TARGET_SIMD"
3067   "@
3068    stp\\t%d1, %d2, %y0
3069    stp\\t%x1, %x2, %y0"
3070   [(set_attr "type" "neon_stp, store_16")]
3071 )
3072
3073 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3074 ;; dest vector.
3075
3076 (define_insn "*aarch64_combinez<mode>"
3077   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3078         (vec_concat:<VDBL>
3079           (match_operand:VDC 1 "general_operand" "w,?r,m")
3080           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3081   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3082   "@
3083    mov\\t%0.8b, %1.8b
3084    fmov\t%d0, %1
3085    ldr\\t%d0, %1"
3086   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3087    (set_attr "simd" "yes,*,yes")
3088    (set_attr "fp" "*,yes,*")]
3089 )
3090
3091 (define_insn "*aarch64_combinez_be<mode>"
3092   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3093         (vec_concat:<VDBL>
3094           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3095           (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3096   "TARGET_SIMD && BYTES_BIG_ENDIAN"
3097   "@
3098    mov\\t%0.8b, %1.8b
3099    fmov\t%d0, %1
3100    ldr\\t%d0, %1"
3101   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3102    (set_attr "simd" "yes,*,yes")
3103    (set_attr "fp" "*,yes,*")]
3104 )
3105
3106 (define_expand "aarch64_combine<mode>"
3107   [(match_operand:<VDBL> 0 "register_operand")
3108    (match_operand:VDC 1 "register_operand")
3109    (match_operand:VDC 2 "register_operand")]
3110   "TARGET_SIMD"
3111 {
3112   aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3113
3114   DONE;
3115 }
3116 )
3117
3118 (define_expand "aarch64_simd_combine<mode>"
3119   [(match_operand:<VDBL> 0 "register_operand")
3120    (match_operand:VDC 1 "register_operand")
3121    (match_operand:VDC 2 "register_operand")]
3122   "TARGET_SIMD"
3123   {
3124     emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3125     emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3126     DONE;
3127   }
3128 [(set_attr "type" "multiple")]
3129 )
3130
3131 ;; <su><addsub>l<q>.
3132
3133 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3134  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3135        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3136                            (match_operand:VQW 1 "register_operand" "w")
3137                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3138                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3139                            (match_operand:VQW 2 "register_operand" "w")
3140                            (match_dup 3)))))]
3141   "TARGET_SIMD"
3142   "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3143   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3144 )
3145
3146 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3147  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3148        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3149                            (match_operand:VQW 1 "register_operand" "w")
3150                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3151                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3152                            (match_operand:VQW 2 "register_operand" "w")
3153                            (match_dup 3)))))]
3154   "TARGET_SIMD"
3155   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3156   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3157 )
3158
3159
3160 (define_expand "aarch64_saddl2<mode>"
3161   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3162    (match_operand:VQW 1 "register_operand" "w")
3163    (match_operand:VQW 2 "register_operand" "w")]
3164   "TARGET_SIMD"
3165 {
3166   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3167   emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3168                                                   operands[2], p));
3169   DONE;
3170 })
3171
3172 (define_expand "aarch64_uaddl2<mode>"
3173   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3174    (match_operand:VQW 1 "register_operand" "w")
3175    (match_operand:VQW 2 "register_operand" "w")]
3176   "TARGET_SIMD"
3177 {
3178   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3179   emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3180                                                   operands[2], p));
3181   DONE;
3182 })
3183
3184 (define_expand "aarch64_ssubl2<mode>"
3185   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3186    (match_operand:VQW 1 "register_operand" "w")
3187    (match_operand:VQW 2 "register_operand" "w")]
3188   "TARGET_SIMD"
3189 {
3190   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3191   emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3192                                                 operands[2], p));
3193   DONE;
3194 })
3195
3196 (define_expand "aarch64_usubl2<mode>"
3197   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3198    (match_operand:VQW 1 "register_operand" "w")
3199    (match_operand:VQW 2 "register_operand" "w")]
3200   "TARGET_SIMD"
3201 {
3202   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3203   emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3204                                                 operands[2], p));
3205   DONE;
3206 })
3207
3208 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3209  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3210        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3211                            (match_operand:VD_BHSI 1 "register_operand" "w"))
3212                        (ANY_EXTEND:<VWIDE>
3213                            (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3214   "TARGET_SIMD"
3215   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3216   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3217 )
3218
3219 ;; <su><addsub>w<q>.
3220
3221 (define_expand "widen_ssum<mode>3"
3222   [(set (match_operand:<VDBLW> 0 "register_operand" "")
3223         (plus:<VDBLW> (sign_extend:<VDBLW>
3224                         (match_operand:VQW 1 "register_operand" ""))
3225                       (match_operand:<VDBLW> 2 "register_operand" "")))]
3226   "TARGET_SIMD"
3227   {
3228     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3229     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3230
3231     emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3232                                                 operands[1], p));
3233     emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3234     DONE;
3235   }
3236 )
3237
3238 (define_expand "widen_ssum<mode>3"
3239   [(set (match_operand:<VWIDE> 0 "register_operand" "")
3240         (plus:<VWIDE> (sign_extend:<VWIDE>
3241                         (match_operand:VD_BHSI 1 "register_operand" ""))
3242                       (match_operand:<VWIDE> 2 "register_operand" "")))]
3243   "TARGET_SIMD"
3244 {
3245   emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3246   DONE;
3247 })
3248
3249 (define_expand "widen_usum<mode>3"
3250   [(set (match_operand:<VDBLW> 0 "register_operand" "")
3251         (plus:<VDBLW> (zero_extend:<VDBLW>
3252                         (match_operand:VQW 1 "register_operand" ""))
3253                       (match_operand:<VDBLW> 2 "register_operand" "")))]
3254   "TARGET_SIMD"
3255   {
3256     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3257     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3258
3259     emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3260                                                  operands[1], p));
3261     emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3262     DONE;
3263   }
3264 )
3265
3266 (define_expand "widen_usum<mode>3"
3267   [(set (match_operand:<VWIDE> 0 "register_operand" "")
3268         (plus:<VWIDE> (zero_extend:<VWIDE>
3269                         (match_operand:VD_BHSI 1 "register_operand" ""))
3270                       (match_operand:<VWIDE> 2 "register_operand" "")))]
3271   "TARGET_SIMD"
3272 {
3273   emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3274   DONE;
3275 })
3276
3277 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
3278   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3279         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3280                         (ANY_EXTEND:<VWIDE>
3281                           (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3282   "TARGET_SIMD"
3283   "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3284   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3285 )
3286
3287 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
3288   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3289         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3290                         (ANY_EXTEND:<VWIDE>
3291                           (vec_select:<VHALF>
3292                            (match_operand:VQW 2 "register_operand" "w")
3293                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3294   "TARGET_SIMD"
3295   "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3296   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3297 )
3298
3299 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
3300   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3301         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3302                         (ANY_EXTEND:<VWIDE>
3303                           (vec_select:<VHALF>
3304                            (match_operand:VQW 2 "register_operand" "w")
3305                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3306   "TARGET_SIMD"
3307   "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3308   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3309 )
3310
3311 (define_expand "aarch64_saddw2<mode>"
3312   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3313    (match_operand:<VWIDE> 1 "register_operand" "w")
3314    (match_operand:VQW 2 "register_operand" "w")]
3315   "TARGET_SIMD"
3316 {
3317   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3318   emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3319                                                 operands[2], p));
3320   DONE;
3321 })
3322
3323 (define_expand "aarch64_uaddw2<mode>"
3324   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3325    (match_operand:<VWIDE> 1 "register_operand" "w")
3326    (match_operand:VQW 2 "register_operand" "w")]
3327   "TARGET_SIMD"
3328 {
3329   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3330   emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3331                                                 operands[2], p));
3332   DONE;
3333 })
3334
3335
3336 (define_expand "aarch64_ssubw2<mode>"
3337   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3338    (match_operand:<VWIDE> 1 "register_operand" "w")
3339    (match_operand:VQW 2 "register_operand" "w")]
3340   "TARGET_SIMD"
3341 {
3342   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3343   emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3344                                                 operands[2], p));
3345   DONE;
3346 })
3347
3348 (define_expand "aarch64_usubw2<mode>"
3349   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3350    (match_operand:<VWIDE> 1 "register_operand" "w")
3351    (match_operand:VQW 2 "register_operand" "w")]
3352   "TARGET_SIMD"
3353 {
3354   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3355   emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3356                                                 operands[2], p));
3357   DONE;
3358 })
3359
3360 ;; <su><r>h<addsub>.
3361
3362 (define_insn "aarch64_<sur>h<addsub><mode>"
3363   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3364         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3365                       (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3366                      HADDSUB))]
3367   "TARGET_SIMD"
3368   "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3369   [(set_attr "type" "neon_<addsub>_halve<q>")]
3370 )
3371
3372 ;; <r><addsub>hn<q>.
3373
3374 (define_insn "aarch64_<sur><addsub>hn<mode>"
3375   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3376         (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3377                             (match_operand:VQN 2 "register_operand" "w")]
3378                            ADDSUBHN))]
3379   "TARGET_SIMD"
3380   "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3381   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3382 )
3383
3384 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3385   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3386         (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3387                              (match_operand:VQN 2 "register_operand" "w")
3388                              (match_operand:VQN 3 "register_operand" "w")]
3389                             ADDSUBHN2))]
3390   "TARGET_SIMD"
3391   "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3392   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3393 )
3394
3395 ;; pmul.
3396
3397 (define_insn "aarch64_pmul<mode>"
3398   [(set (match_operand:VB 0 "register_operand" "=w")
3399         (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3400                     (match_operand:VB 2 "register_operand" "w")]
3401                    UNSPEC_PMUL))]
3402  "TARGET_SIMD"
3403  "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3404   [(set_attr "type" "neon_mul_<Vetype><q>")]
3405 )
3406
3407 ;; fmulx.
3408
3409 (define_insn "aarch64_fmulx<mode>"
3410   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3411         (unspec:VHSDF_HSDF
3412           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3413            (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3414            UNSPEC_FMULX))]
3415  "TARGET_SIMD"
3416  "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3417  [(set_attr "type" "neon_fp_mul_<stype>")]
3418 )
3419
3420 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3421
3422 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3423   [(set (match_operand:VDQSF 0 "register_operand" "=w")
3424         (unspec:VDQSF
3425          [(match_operand:VDQSF 1 "register_operand" "w")
3426           (vec_duplicate:VDQSF
3427            (vec_select:<VEL>
3428             (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3429             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3430          UNSPEC_FMULX))]
3431   "TARGET_SIMD"
3432   {
3433     operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3434     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3435   }
3436   [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3437 )
3438
3439 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3440
3441 (define_insn "*aarch64_mulx_elt<mode>"
3442   [(set (match_operand:VDQF 0 "register_operand" "=w")
3443         (unspec:VDQF
3444          [(match_operand:VDQF 1 "register_operand" "w")
3445           (vec_duplicate:VDQF
3446            (vec_select:<VEL>
3447             (match_operand:VDQF 2 "register_operand" "w")
3448             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3449          UNSPEC_FMULX))]
3450   "TARGET_SIMD"
3451   {
3452     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3453     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3454   }
3455   [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3456 )
3457
3458 ;; vmulxq_lane
3459
3460 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3461   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3462         (unspec:VHSDF
3463          [(match_operand:VHSDF 1 "register_operand" "w")
3464           (vec_duplicate:VHSDF
3465             (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3466          UNSPEC_FMULX))]
3467   "TARGET_SIMD"
3468   "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3469   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3470 )
3471
3472 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3473 ;; vmulxd_lane_f64 ==  vmulx_lane_f64
3474 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3475
3476 (define_insn "*aarch64_vgetfmulx<mode>"
3477   [(set (match_operand:<VEL> 0 "register_operand" "=w")
3478         (unspec:<VEL>
3479          [(match_operand:<VEL> 1 "register_operand" "w")
3480           (vec_select:<VEL>
3481            (match_operand:VDQF 2 "register_operand" "w")
3482             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3483          UNSPEC_FMULX))]
3484   "TARGET_SIMD"
3485   {
3486     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3487     return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3488   }
3489   [(set_attr "type" "fmul<Vetype>")]
3490 )
3491 ;; <su>q<addsub>
3492
3493 (define_insn "aarch64_<su_optab><optab><mode>"
3494   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3495         (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3496                           (match_operand:VSDQ_I 2 "register_operand" "w")))]
3497   "TARGET_SIMD"
3498   "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3499   [(set_attr "type" "neon_<optab><q>")]
3500 )
3501
3502 ;; suqadd and usqadd
3503
3504 (define_insn "aarch64_<sur>qadd<mode>"
3505   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3506         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3507                         (match_operand:VSDQ_I 2 "register_operand" "w")]
3508                        USSUQADD))]
3509   "TARGET_SIMD"
3510   "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3511   [(set_attr "type" "neon_qadd<q>")]
3512 )
3513
3514 ;; sqmovun
3515
3516 (define_insn "aarch64_sqmovun<mode>"
3517   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3518         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3519                             UNSPEC_SQXTUN))]
3520    "TARGET_SIMD"
3521    "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3522    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3523 )
3524
3525 ;; sqmovn and uqmovn
3526
3527 (define_insn "aarch64_<sur>qmovn<mode>"
3528   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3529         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3530                             SUQMOVN))]
3531   "TARGET_SIMD"
3532   "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3533    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3534 )
3535
3536 ;; <su>q<absneg>
3537
3538 (define_insn "aarch64_s<optab><mode>"
3539   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3540         (UNQOPS:VSDQ_I
3541           (match_operand:VSDQ_I 1 "register_operand" "w")))]
3542   "TARGET_SIMD"
3543   "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3544   [(set_attr "type" "neon_<optab><q>")]
3545 )
3546
3547 ;; sq<r>dmulh.
3548
3549 (define_insn "aarch64_sq<r>dmulh<mode>"
3550   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3551         (unspec:VSDQ_HSI
3552           [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3553            (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3554          VQDMULH))]
3555   "TARGET_SIMD"
3556   "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3557   [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3558 )
3559
3560 ;; sq<r>dmulh_lane
3561
3562 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3563   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3564         (unspec:VDQHS
3565           [(match_operand:VDQHS 1 "register_operand" "w")
3566            (vec_select:<VEL>
3567              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3568              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3569          VQDMULH))]
3570   "TARGET_SIMD"
3571   "*
3572    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3573    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3574   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3575 )
3576
3577 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3578   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3579         (unspec:VDQHS
3580           [(match_operand:VDQHS 1 "register_operand" "w")
3581            (vec_select:<VEL>
3582              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3583              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3584          VQDMULH))]
3585   "TARGET_SIMD"
3586   "*
3587    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3588    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3589   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3590 )
3591
3592 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3593   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3594         (unspec:SD_HSI
3595           [(match_operand:SD_HSI 1 "register_operand" "w")
3596            (vec_select:<VEL>
3597              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3598              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3599          VQDMULH))]
3600   "TARGET_SIMD"
3601   "*
3602    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3603    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3604   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3605 )
3606
3607 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3608   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3609         (unspec:SD_HSI
3610           [(match_operand:SD_HSI 1 "register_operand" "w")
3611            (vec_select:<VEL>
3612              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3613              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3614          VQDMULH))]
3615   "TARGET_SIMD"
3616   "*
3617    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3618    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3619   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3620 )
3621
3622 ;; sqrdml[as]h.
3623
3624 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3625   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3626         (unspec:VSDQ_HSI
3627           [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3628            (match_operand:VSDQ_HSI 2 "register_operand" "w")
3629            (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3630           SQRDMLH_AS))]
3631    "TARGET_SIMD_RDMA"
3632    "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3633    [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3634 )
3635
3636 ;; sqrdml[as]h_lane.
3637
3638 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3639   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3640         (unspec:VDQHS
3641           [(match_operand:VDQHS 1 "register_operand" "0")
3642            (match_operand:VDQHS 2 "register_operand" "w")
3643            (vec_select:<VEL>
3644              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3645              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3646           SQRDMLH_AS))]
3647    "TARGET_SIMD_RDMA"
3648    {
3649      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3650      return
3651       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3652    }
3653    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3654 )
3655
3656 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3657   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3658         (unspec:SD_HSI
3659           [(match_operand:SD_HSI 1 "register_operand" "0")
3660            (match_operand:SD_HSI 2 "register_operand" "w")
3661            (vec_select:<VEL>
3662              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3663              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3664           SQRDMLH_AS))]
3665    "TARGET_SIMD_RDMA"
3666    {
3667      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3668      return
3669       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3670    }
3671    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3672 )
3673
3674 ;; sqrdml[as]h_laneq.
3675
3676 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3677   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3678         (unspec:VDQHS
3679           [(match_operand:VDQHS 1 "register_operand" "0")
3680            (match_operand:VDQHS 2 "register_operand" "w")
3681            (vec_select:<VEL>
3682              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3683              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3684           SQRDMLH_AS))]
3685    "TARGET_SIMD_RDMA"
3686    {
3687      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3688      return
3689       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3690    }
3691    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3692 )
3693
3694 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3695   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3696         (unspec:SD_HSI
3697           [(match_operand:SD_HSI 1 "register_operand" "0")
3698            (match_operand:SD_HSI 2 "register_operand" "w")
3699            (vec_select:<VEL>
3700              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3701              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3702           SQRDMLH_AS))]
3703    "TARGET_SIMD_RDMA"
3704    {
3705      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3706      return
3707       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3708    }
3709    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3710 )
3711
3712 ;; vqdml[sa]l
3713
3714 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3715   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3716         (SBINQOPS:<VWIDE>
3717           (match_operand:<VWIDE> 1 "register_operand" "0")
3718           (ss_ashift:<VWIDE>
3719               (mult:<VWIDE>
3720                 (sign_extend:<VWIDE>
3721                       (match_operand:VSD_HSI 2 "register_operand" "w"))
3722                 (sign_extend:<VWIDE>
3723                       (match_operand:VSD_HSI 3 "register_operand" "w")))
3724               (const_int 1))))]
3725   "TARGET_SIMD"
3726   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3727   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3728 )
3729
3730 ;; vqdml[sa]l_lane
3731
3732 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3733   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3734         (SBINQOPS:<VWIDE>
3735           (match_operand:<VWIDE> 1 "register_operand" "0")
3736           (ss_ashift:<VWIDE>
3737             (mult:<VWIDE>
3738               (sign_extend:<VWIDE>
3739                 (match_operand:VD_HSI 2 "register_operand" "w"))
3740               (sign_extend:<VWIDE>
3741                 (vec_duplicate:VD_HSI
3742                   (vec_select:<VEL>
3743                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3744                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3745               ))
3746             (const_int 1))))]
3747   "TARGET_SIMD"
3748   {
3749     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3750     return
3751       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3752   }
3753   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3754 )
3755
3756 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3757   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3758         (SBINQOPS:<VWIDE>
3759           (match_operand:<VWIDE> 1 "register_operand" "0")
3760           (ss_ashift:<VWIDE>
3761             (mult:<VWIDE>
3762               (sign_extend:<VWIDE>
3763                 (match_operand:VD_HSI 2 "register_operand" "w"))
3764               (sign_extend:<VWIDE>
3765                 (vec_duplicate:VD_HSI
3766                   (vec_select:<VEL>
3767                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3768                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3769               ))
3770             (const_int 1))))]
3771   "TARGET_SIMD"
3772   {
3773     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3774     return
3775       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3776   }
3777   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3778 )
3779
3780 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3781   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3782         (SBINQOPS:<VWIDE>
3783           (match_operand:<VWIDE> 1 "register_operand" "0")
3784           (ss_ashift:<VWIDE>
3785             (mult:<VWIDE>
3786               (sign_extend:<VWIDE>
3787                 (match_operand:SD_HSI 2 "register_operand" "w"))
3788               (sign_extend:<VWIDE>
3789                 (vec_select:<VEL>
3790                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3791                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3792               )
3793             (const_int 1))))]
3794   "TARGET_SIMD"
3795   {
3796     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3797     return
3798       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3799   }
3800   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3801 )
3802
3803 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3804   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3805         (SBINQOPS:<VWIDE>
3806           (match_operand:<VWIDE> 1 "register_operand" "0")
3807           (ss_ashift:<VWIDE>
3808             (mult:<VWIDE>
3809               (sign_extend:<VWIDE>
3810                 (match_operand:SD_HSI 2 "register_operand" "w"))
3811               (sign_extend:<VWIDE>
3812                 (vec_select:<VEL>
3813                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3814                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3815               )
3816             (const_int 1))))]
3817   "TARGET_SIMD"
3818   {
3819     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3820     return
3821       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3822   }
3823   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3824 )
3825
3826 ;; vqdml[sa]l_n
3827
3828 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3829   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3830         (SBINQOPS:<VWIDE>
3831           (match_operand:<VWIDE> 1 "register_operand" "0")
3832           (ss_ashift:<VWIDE>
3833               (mult:<VWIDE>
3834                 (sign_extend:<VWIDE>
3835                       (match_operand:VD_HSI 2 "register_operand" "w"))
3836                 (sign_extend:<VWIDE>
3837                   (vec_duplicate:VD_HSI
3838                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3839               (const_int 1))))]
3840   "TARGET_SIMD"
3841   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3842   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3843 )
3844
3845 ;; sqdml[as]l2
3846
3847 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3848   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3849         (SBINQOPS:<VWIDE>
3850          (match_operand:<VWIDE> 1 "register_operand" "0")
3851          (ss_ashift:<VWIDE>
3852              (mult:<VWIDE>
3853                (sign_extend:<VWIDE>
3854                  (vec_select:<VHALF>
3855                      (match_operand:VQ_HSI 2 "register_operand" "w")
3856                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3857                (sign_extend:<VWIDE>
3858                  (vec_select:<VHALF>
3859                      (match_operand:VQ_HSI 3 "register_operand" "w")
3860                      (match_dup 4))))
3861              (const_int 1))))]
3862   "TARGET_SIMD"
3863   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3864   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3865 )
3866
3867 (define_expand "aarch64_sqdmlal2<mode>"
3868   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3869    (match_operand:<VWIDE> 1 "register_operand" "w")
3870    (match_operand:VQ_HSI 2 "register_operand" "w")
3871    (match_operand:VQ_HSI 3 "register_operand" "w")]
3872   "TARGET_SIMD"
3873 {
3874   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3875   emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3876                                                   operands[2], operands[3], p));
3877   DONE;
3878 })
3879
3880 (define_expand "aarch64_sqdmlsl2<mode>"
3881   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3882    (match_operand:<VWIDE> 1 "register_operand" "w")
3883    (match_operand:VQ_HSI 2 "register_operand" "w")
3884    (match_operand:VQ_HSI 3 "register_operand" "w")]
3885   "TARGET_SIMD"
3886 {
3887   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3888   emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3889                                                   operands[2], operands[3], p));
3890   DONE;
3891 })
3892
3893 ;; vqdml[sa]l2_lane
3894
3895 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3896   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3897         (SBINQOPS:<VWIDE>
3898           (match_operand:<VWIDE> 1 "register_operand" "0")
3899           (ss_ashift:<VWIDE>
3900               (mult:<VWIDE>
3901                 (sign_extend:<VWIDE>
3902                   (vec_select:<VHALF>
3903                     (match_operand:VQ_HSI 2 "register_operand" "w")
3904                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3905                 (sign_extend:<VWIDE>
3906                   (vec_duplicate:<VHALF>
3907                     (vec_select:<VEL>
3908                       (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3909                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3910                     ))))
3911               (const_int 1))))]
3912   "TARGET_SIMD"
3913   {
3914     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3915     return
3916      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3917   }
3918   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3919 )
3920
3921 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3922   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3923         (SBINQOPS:<VWIDE>
3924           (match_operand:<VWIDE> 1 "register_operand" "0")
3925           (ss_ashift:<VWIDE>
3926               (mult:<VWIDE>
3927                 (sign_extend:<VWIDE>
3928                   (vec_select:<VHALF>
3929                     (match_operand:VQ_HSI 2 "register_operand" "w")
3930                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3931                 (sign_extend:<VWIDE>
3932                   (vec_duplicate:<VHALF>
3933                     (vec_select:<VEL>
3934                       (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3935                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3936                     ))))
3937               (const_int 1))))]
3938   "TARGET_SIMD"
3939   {
3940     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3941     return
3942      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3943   }
3944   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3945 )
3946
3947 (define_expand "aarch64_sqdmlal2_lane<mode>"
3948   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3949    (match_operand:<VWIDE> 1 "register_operand" "w")
3950    (match_operand:VQ_HSI 2 "register_operand" "w")
3951    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3952    (match_operand:SI 4 "immediate_operand" "i")]
3953   "TARGET_SIMD"
3954 {
3955   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3956   emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3957                                                        operands[2], operands[3],
3958                                                        operands[4], p));
3959   DONE;
3960 })
3961
3962 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3963   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3964    (match_operand:<VWIDE> 1 "register_operand" "w")
3965    (match_operand:VQ_HSI 2 "register_operand" "w")
3966    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3967    (match_operand:SI 4 "immediate_operand" "i")]
3968   "TARGET_SIMD"
3969 {
3970   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3971   emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3972                                                        operands[2], operands[3],
3973                                                        operands[4], p));
3974   DONE;
3975 })
3976
3977 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3978   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3979    (match_operand:<VWIDE> 1 "register_operand" "w")
3980    (match_operand:VQ_HSI 2 "register_operand" "w")
3981    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3982    (match_operand:SI 4 "immediate_operand" "i")]
3983   "TARGET_SIMD"
3984 {
3985   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3986   emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3987                                                        operands[2], operands[3],
3988                                                        operands[4], p));
3989   DONE;
3990 })
3991
3992 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3993   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3994    (match_operand:<VWIDE> 1 "register_operand" "w")
3995    (match_operand:VQ_HSI 2 "register_operand" "w")
3996    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3997    (match_operand:SI 4 "immediate_operand" "i")]
3998   "TARGET_SIMD"
3999 {
4000   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4001   emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4002                                                        operands[2], operands[3],
4003                                                        operands[4], p));
4004   DONE;
4005 })
4006
4007 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4008   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4009         (SBINQOPS:<VWIDE>
4010           (match_operand:<VWIDE> 1 "register_operand" "0")
4011           (ss_ashift:<VWIDE>
4012             (mult:<VWIDE>
4013               (sign_extend:<VWIDE>
4014                 (vec_select:<VHALF>
4015                   (match_operand:VQ_HSI 2 "register_operand" "w")
4016                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4017               (sign_extend:<VWIDE>
4018                 (vec_duplicate:<VHALF>
4019                   (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4020             (const_int 1))))]
4021   "TARGET_SIMD"
4022   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4023   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4024 )
4025
4026 (define_expand "aarch64_sqdmlal2_n<mode>"
4027   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4028    (match_operand:<VWIDE> 1 "register_operand" "w")
4029    (match_operand:VQ_HSI 2 "register_operand" "w")
4030    (match_operand:<VEL> 3 "register_operand" "w")]
4031   "TARGET_SIMD"
4032 {
4033   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4034   emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4035                                                     operands[2], operands[3],
4036                                                     p));
4037   DONE;
4038 })
4039
4040 (define_expand "aarch64_sqdmlsl2_n<mode>"
4041   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4042    (match_operand:<VWIDE> 1 "register_operand" "w")
4043    (match_operand:VQ_HSI 2 "register_operand" "w")
4044    (match_operand:<VEL> 3 "register_operand" "w")]
4045   "TARGET_SIMD"
4046 {
4047   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4048   emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4049                                                     operands[2], operands[3],
4050                                                     p));
4051   DONE;
4052 })
4053
4054 ;; vqdmull
4055
4056 (define_insn "aarch64_sqdmull<mode>"
4057   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4058         (ss_ashift:<VWIDE>
4059              (mult:<VWIDE>
4060                (sign_extend:<VWIDE>
4061                      (match_operand:VSD_HSI 1 "register_operand" "w"))
4062                (sign_extend:<VWIDE>
4063                      (match_operand:VSD_HSI 2 "register_operand" "w")))
4064              (const_int 1)))]
4065   "TARGET_SIMD"
4066   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4067   [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4068 )
4069
4070 ;; vqdmull_lane
4071
4072 (define_insn "aarch64_sqdmull_lane<mode>"
4073   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4074         (ss_ashift:<VWIDE>
4075              (mult:<VWIDE>
4076                (sign_extend:<VWIDE>
4077                  (match_operand:VD_HSI 1 "register_operand" "w"))
4078                (sign_extend:<VWIDE>
4079                  (vec_duplicate:VD_HSI
4080                    (vec_select:<VEL>
4081                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4082                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4083                ))
4084              (const_int 1)))]
4085   "TARGET_SIMD"
4086   {
4087     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4088     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4089   }
4090   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4091 )
4092
4093 (define_insn "aarch64_sqdmull_laneq<mode>"
4094   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4095         (ss_ashift:<VWIDE>
4096              (mult:<VWIDE>
4097                (sign_extend:<VWIDE>
4098                  (match_operand:VD_HSI 1 "register_operand" "w"))
4099                (sign_extend:<VWIDE>
4100                  (vec_duplicate:VD_HSI
4101                    (vec_select:<VEL>
4102                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4103                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4104                ))
4105              (const_int 1)))]
4106   "TARGET_SIMD"
4107   {
4108     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4109     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4110   }
4111   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4112 )
4113
4114 (define_insn "aarch64_sqdmull_lane<mode>"
4115   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4116         (ss_ashift:<VWIDE>
4117              (mult:<VWIDE>
4118                (sign_extend:<VWIDE>
4119                  (match_operand:SD_HSI 1 "register_operand" "w"))
4120                (sign_extend:<VWIDE>
4121                  (vec_select:<VEL>
4122                    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4123                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4124                ))
4125              (const_int 1)))]
4126   "TARGET_SIMD"
4127   {
4128     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4129     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4130   }
4131   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4132 )
4133
4134 (define_insn "aarch64_sqdmull_laneq<mode>"
4135   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4136         (ss_ashift:<VWIDE>
4137              (mult:<VWIDE>
4138                (sign_extend:<VWIDE>
4139                  (match_operand:SD_HSI 1 "register_operand" "w"))
4140                (sign_extend:<VWIDE>
4141                  (vec_select:<VEL>
4142                    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4143                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4144                ))
4145              (const_int 1)))]
4146   "TARGET_SIMD"
4147   {
4148     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4149     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4150   }
4151   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4152 )
4153
4154 ;; vqdmull_n
4155
4156 (define_insn "aarch64_sqdmull_n<mode>"
4157   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4158         (ss_ashift:<VWIDE>
4159              (mult:<VWIDE>
4160                (sign_extend:<VWIDE>
4161                  (match_operand:VD_HSI 1 "register_operand" "w"))
4162                (sign_extend:<VWIDE>
4163                  (vec_duplicate:VD_HSI
4164                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4165                )
4166              (const_int 1)))]
4167   "TARGET_SIMD"
4168   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4169   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4170 )
4171
4172 ;; vqdmull2
4173
4174
4175
4176 (define_insn "aarch64_sqdmull2<mode>_internal"
4177   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4178         (ss_ashift:<VWIDE>
4179              (mult:<VWIDE>
4180                (sign_extend:<VWIDE>
4181                  (vec_select:<VHALF>
4182                    (match_operand:VQ_HSI 1 "register_operand" "w")
4183                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4184                (sign_extend:<VWIDE>
4185                  (vec_select:<VHALF>
4186                    (match_operand:VQ_HSI 2 "register_operand" "w")
4187                    (match_dup 3)))
4188                )
4189              (const_int 1)))]
4190   "TARGET_SIMD"
4191   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4192   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4193 )
4194
4195 (define_expand "aarch64_sqdmull2<mode>"
4196   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4197    (match_operand:VQ_HSI 1 "register_operand" "w")
4198    (match_operand:VQ_HSI 2 "register_operand" "w")]
4199   "TARGET_SIMD"
4200 {
4201   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4202   emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4203                                                   operands[2], p));
4204   DONE;
4205 })
4206
4207 ;; vqdmull2_lane
4208
4209 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4210   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4211         (ss_ashift:<VWIDE>
4212              (mult:<VWIDE>
4213                (sign_extend:<VWIDE>
4214                  (vec_select:<VHALF>
4215                    (match_operand:VQ_HSI 1 "register_operand" "w")
4216                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4217                (sign_extend:<VWIDE>
4218                  (vec_duplicate:<VHALF>
4219                    (vec_select:<VEL>
4220                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4221                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4222                ))
4223              (const_int 1)))]
4224   "TARGET_SIMD"
4225   {
4226     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4227     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4228   }
4229   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4230 )
4231
4232 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4233   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4234         (ss_ashift:<VWIDE>
4235              (mult:<VWIDE>
4236                (sign_extend:<VWIDE>
4237                  (vec_select:<VHALF>
4238                    (match_operand:VQ_HSI 1 "register_operand" "w")
4239                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4240                (sign_extend:<VWIDE>
4241                  (vec_duplicate:<VHALF>
4242                    (vec_select:<VEL>
4243                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4244                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4245                ))
4246              (const_int 1)))]
4247   "TARGET_SIMD"
4248   {
4249     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4250     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4251   }
4252   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4253 )
4254
4255 (define_expand "aarch64_sqdmull2_lane<mode>"
4256   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4257    (match_operand:VQ_HSI 1 "register_operand" "w")
4258    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4259    (match_operand:SI 3 "immediate_operand" "i")]
4260   "TARGET_SIMD"
4261 {
4262   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4263   emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4264                                                        operands[2], operands[3],
4265                                                        p));
4266   DONE;
4267 })
4268
4269 (define_expand "aarch64_sqdmull2_laneq<mode>"
4270   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4271    (match_operand:VQ_HSI 1 "register_operand" "w")
4272    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4273    (match_operand:SI 3 "immediate_operand" "i")]
4274   "TARGET_SIMD"
4275 {
4276   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4277   emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4278                                                        operands[2], operands[3],
4279                                                        p));
4280   DONE;
4281 })
4282
4283 ;; vqdmull2_n
4284
4285 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4286   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4287         (ss_ashift:<VWIDE>
4288              (mult:<VWIDE>
4289                (sign_extend:<VWIDE>
4290                  (vec_select:<VHALF>
4291                    (match_operand:VQ_HSI 1 "register_operand" "w")
4292                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4293                (sign_extend:<VWIDE>
4294                  (vec_duplicate:<VHALF>
4295                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4296                )
4297              (const_int 1)))]
4298   "TARGET_SIMD"
4299   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4300   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4301 )
4302
4303 (define_expand "aarch64_sqdmull2_n<mode>"
4304   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4305    (match_operand:VQ_HSI 1 "register_operand" "w")
4306    (match_operand:<VEL> 2 "register_operand" "w")]
4307   "TARGET_SIMD"
4308 {
4309   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4310   emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4311                                                     operands[2], p));
4312   DONE;
4313 })
4314
4315 ;; vshl
4316
4317 (define_insn "aarch64_<sur>shl<mode>"
4318   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4319         (unspec:VSDQ_I_DI
4320           [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4321            (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4322          VSHL))]
4323   "TARGET_SIMD"
4324   "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4325   [(set_attr "type" "neon_shift_reg<q>")]
4326 )
4327
4328
4329 ;; vqshl
4330
4331 (define_insn "aarch64_<sur>q<r>shl<mode>"
4332   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4333         (unspec:VSDQ_I
4334           [(match_operand:VSDQ_I 1 "register_operand" "w")
4335            (match_operand:VSDQ_I 2 "register_operand" "w")]
4336          VQSHL))]
4337   "TARGET_SIMD"
4338   "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4339   [(set_attr "type" "neon_sat_shift_reg<q>")]
4340 )
4341
4342 ;; vshll_n
4343
4344 (define_insn "aarch64_<sur>shll_n<mode>"
4345   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4346         (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4347                          (match_operand:SI 2
4348                            "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4349                          VSHLL))]
4350   "TARGET_SIMD"
4351   {
4352     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4353       return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4354     else
4355       return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4356   }
4357   [(set_attr "type" "neon_shift_imm_long")]
4358 )
4359
4360 ;; vshll_high_n
4361
4362 (define_insn "aarch64_<sur>shll2_n<mode>"
4363   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4364         (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4365                          (match_operand:SI 2 "immediate_operand" "i")]
4366                          VSHLL))]
4367   "TARGET_SIMD"
4368   {
4369     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4370       return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4371     else
4372       return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4373   }
4374   [(set_attr "type" "neon_shift_imm_long")]
4375 )
4376
4377 ;; vrshr_n
4378
4379 (define_insn "aarch64_<sur>shr_n<mode>"
4380   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4381         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4382                            (match_operand:SI 2
4383                              "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4384                           VRSHR_N))]
4385   "TARGET_SIMD"
4386   "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4387   [(set_attr "type" "neon_sat_shift_imm<q>")]
4388 )
4389
4390 ;; v(r)sra_n
4391
4392 (define_insn "aarch64_<sur>sra_n<mode>"
4393   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4394         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4395                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4396                        (match_operand:SI 3
4397                          "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4398                       VSRA))]
4399   "TARGET_SIMD"
4400   "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4401   [(set_attr "type" "neon_shift_acc<q>")]
4402 )
4403
4404 ;; vs<lr>i_n
4405
4406 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4407   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4408         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4409                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4410                        (match_operand:SI 3
4411                          "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4412                       VSLRI))]
4413   "TARGET_SIMD"
4414   "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4415   [(set_attr "type" "neon_shift_imm<q>")]
4416 )
4417
4418 ;; vqshl(u)
4419
4420 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4421   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4422         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4423                        (match_operand:SI 2
4424                          "aarch64_simd_shift_imm_<ve_mode>" "i")]
4425                       VQSHL_N))]
4426   "TARGET_SIMD"
4427   "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4428   [(set_attr "type" "neon_sat_shift_imm<q>")]
4429 )
4430
4431
4432 ;; vq(r)shr(u)n_n
4433
4434 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4435   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4436         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4437                             (match_operand:SI 2
4438                               "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4439                            VQSHRN_N))]
4440   "TARGET_SIMD"
4441   "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4442   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4443 )
4444
4445
4446 ;; cm(eq|ge|gt|lt|le)
4447 ;; Note, we have constraints for Dz and Z as different expanders
4448 ;; have different ideas of what should be passed to this pattern.
4449
4450 (define_insn "aarch64_cm<optab><mode>"
4451   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4452         (neg:<V_INT_EQUIV>
4453           (COMPARISONS:<V_INT_EQUIV>
4454             (match_operand:VDQ_I 1 "register_operand" "w,w")
4455             (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4456           )))]
4457   "TARGET_SIMD"
4458   "@
4459   cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4460   cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4461   [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4462 )
4463
4464 (define_insn_and_split "aarch64_cm<optab>di"
4465   [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4466         (neg:DI
4467           (COMPARISONS:DI
4468             (match_operand:DI 1 "register_operand" "w,w,r")
4469             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4470           )))
4471      (clobber (reg:CC CC_REGNUM))]
4472   "TARGET_SIMD"
4473   "#"
4474   "&& reload_completed"
4475   [(set (match_operand:DI 0 "register_operand")
4476         (neg:DI
4477           (COMPARISONS:DI
4478             (match_operand:DI 1 "register_operand")
4479             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4480           )))]
4481   {
4482     /* If we are in the general purpose register file,
4483        we split to a sequence of comparison and store.  */
4484     if (GP_REGNUM_P (REGNO (operands[0]))
4485         && GP_REGNUM_P (REGNO (operands[1])))
4486       {
4487         machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4488         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4489         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4490         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4491         DONE;
4492       }
4493     /* Otherwise, we expand to a similar pattern which does not
4494        clobber CC_REGNUM.  */
4495   }
4496   [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4497 )
4498
4499 (define_insn "*aarch64_cm<optab>di"
4500   [(set (match_operand:DI 0 "register_operand" "=w,w")
4501         (neg:DI
4502           (COMPARISONS:DI
4503             (match_operand:DI 1 "register_operand" "w,w")
4504             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4505           )))]
4506   "TARGET_SIMD && reload_completed"
4507   "@
4508   cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4509   cm<optab>\t%d0, %d1, #0"
4510   [(set_attr "type" "neon_compare, neon_compare_zero")]
4511 )
4512
4513 ;; cm(hs|hi)
4514
4515 (define_insn "aarch64_cm<optab><mode>"
4516   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4517         (neg:<V_INT_EQUIV>
4518           (UCOMPARISONS:<V_INT_EQUIV>
4519             (match_operand:VDQ_I 1 "register_operand" "w")
4520             (match_operand:VDQ_I 2 "register_operand" "w")
4521           )))]
4522   "TARGET_SIMD"
4523   "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4524   [(set_attr "type" "neon_compare<q>")]
4525 )
4526
4527 (define_insn_and_split "aarch64_cm<optab>di"
4528   [(set (match_operand:DI 0 "register_operand" "=w,r")
4529         (neg:DI
4530           (UCOMPARISONS:DI
4531             (match_operand:DI 1 "register_operand" "w,r")
4532             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4533           )))
4534     (clobber (reg:CC CC_REGNUM))]
4535   "TARGET_SIMD"
4536   "#"
4537   "&& reload_completed"
4538   [(set (match_operand:DI 0 "register_operand")
4539         (neg:DI
4540           (UCOMPARISONS:DI
4541             (match_operand:DI 1 "register_operand")
4542             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4543           )))]
4544   {
4545     /* If we are in the general purpose register file,
4546        we split to a sequence of comparison and store.  */
4547     if (GP_REGNUM_P (REGNO (operands[0]))
4548         && GP_REGNUM_P (REGNO (operands[1])))
4549       {
4550         machine_mode mode = CCmode;
4551         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4552         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4553         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4554         DONE;
4555       }
4556     /* Otherwise, we expand to a similar pattern which does not
4557        clobber CC_REGNUM.  */
4558   }
4559   [(set_attr "type" "neon_compare,multiple")]
4560 )
4561
4562 (define_insn "*aarch64_cm<optab>di"
4563   [(set (match_operand:DI 0 "register_operand" "=w")
4564         (neg:DI
4565           (UCOMPARISONS:DI
4566             (match_operand:DI 1 "register_operand" "w")
4567             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4568           )))]
4569   "TARGET_SIMD && reload_completed"
4570   "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4571   [(set_attr "type" "neon_compare")]
4572 )
4573
4574 ;; cmtst
4575
4576 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4577 ;; we don't have any insns using ne, and aarch64_vcond outputs
4578 ;; not (neg (eq (and x y) 0))
4579 ;; which is rewritten by simplify_rtx as
4580 ;; plus (eq (and x y) 0) -1.
4581
4582 (define_insn "aarch64_cmtst<mode>"
4583   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4584         (plus:<V_INT_EQUIV>
4585           (eq:<V_INT_EQUIV>
4586             (and:VDQ_I
4587               (match_operand:VDQ_I 1 "register_operand" "w")
4588               (match_operand:VDQ_I 2 "register_operand" "w"))
4589             (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4590           (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4591   ]
4592   "TARGET_SIMD"
4593   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4594   [(set_attr "type" "neon_tst<q>")]
4595 )
4596
4597 (define_insn_and_split "aarch64_cmtstdi"
4598   [(set (match_operand:DI 0 "register_operand" "=w,r")
4599         (neg:DI
4600           (ne:DI
4601             (and:DI
4602               (match_operand:DI 1 "register_operand" "w,r")
4603               (match_operand:DI 2 "register_operand" "w,r"))
4604             (const_int 0))))
4605     (clobber (reg:CC CC_REGNUM))]
4606   "TARGET_SIMD"
4607   "#"
4608   "&& reload_completed"
4609   [(set (match_operand:DI 0 "register_operand")
4610         (neg:DI
4611           (ne:DI
4612             (and:DI
4613               (match_operand:DI 1 "register_operand")
4614               (match_operand:DI 2 "register_operand"))
4615             (const_int 0))))]
4616   {
4617     /* If we are in the general purpose register file,
4618        we split to a sequence of comparison and store.  */
4619     if (GP_REGNUM_P (REGNO (operands[0]))
4620         && GP_REGNUM_P (REGNO (operands[1])))
4621       {
4622         rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4623         machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4624         rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4625         rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4626         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4627         DONE;
4628       }
4629     /* Otherwise, we expand to a similar pattern which does not
4630        clobber CC_REGNUM.  */
4631   }
4632   [(set_attr "type" "neon_tst,multiple")]
4633 )
4634
4635 (define_insn "*aarch64_cmtstdi"
4636   [(set (match_operand:DI 0 "register_operand" "=w")
4637         (neg:DI
4638           (ne:DI
4639             (and:DI
4640               (match_operand:DI 1 "register_operand" "w")
4641               (match_operand:DI 2 "register_operand" "w"))
4642             (const_int 0))))]
4643   "TARGET_SIMD"
4644   "cmtst\t%d0, %d1, %d2"
4645   [(set_attr "type" "neon_tst")]
4646 )
4647
4648 ;; fcm(eq|ge|gt|le|lt)
4649
4650 (define_insn "aarch64_cm<optab><mode>"
4651   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4652         (neg:<V_INT_EQUIV>
4653           (COMPARISONS:<V_INT_EQUIV>
4654             (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4655             (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4656           )))]
4657   "TARGET_SIMD"
4658   "@
4659   fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4660   fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4661   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4662 )
4663
4664 ;; fac(ge|gt)
4665 ;; Note we can also handle what would be fac(le|lt) by
4666 ;; generating fac(ge|gt).
4667
4668 (define_insn "aarch64_fac<optab><mode>"
4669   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4670         (neg:<V_INT_EQUIV>
4671           (FAC_COMPARISONS:<V_INT_EQUIV>
4672             (abs:VHSDF_HSDF
4673               (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4674             (abs:VHSDF_HSDF
4675               (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4676   )))]
4677   "TARGET_SIMD"
4678   "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4679   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4680 )
4681
4682 ;; addp
4683
4684 (define_insn "aarch64_addp<mode>"
4685   [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4686         (unspec:VD_BHSI
4687           [(match_operand:VD_BHSI 1 "register_operand" "w")
4688            (match_operand:VD_BHSI 2 "register_operand" "w")]
4689           UNSPEC_ADDP))]
4690   "TARGET_SIMD"
4691   "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4692   [(set_attr "type" "neon_reduc_add<q>")]
4693 )
4694
4695 (define_insn "aarch64_addpdi"
4696   [(set (match_operand:DI 0 "register_operand" "=w")
4697         (unspec:DI
4698           [(match_operand:V2DI 1 "register_operand" "w")]
4699           UNSPEC_ADDP))]
4700   "TARGET_SIMD"
4701   "addp\t%d0, %1.2d"
4702   [(set_attr "type" "neon_reduc_add")]
4703 )
4704
4705 ;; sqrt
4706
4707 (define_expand "sqrt<mode>2"
4708   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4709         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4710   "TARGET_SIMD"
4711 {
4712   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4713     DONE;
4714 })
4715
4716 (define_insn "*sqrt<mode>2"
4717   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4718         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4719   "TARGET_SIMD"
4720   "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4721   [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4722 )
4723
4724 ;; Patterns for vector struct loads and stores.
4725
4726 (define_insn "aarch64_simd_ld2<mode>"
4727   [(set (match_operand:OI 0 "register_operand" "=w")
4728         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4729                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4730                    UNSPEC_LD2))]
4731   "TARGET_SIMD"
4732   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4733   [(set_attr "type" "neon_load2_2reg<q>")]
4734 )
4735
4736 (define_insn "aarch64_simd_ld2r<mode>"
4737   [(set (match_operand:OI 0 "register_operand" "=w")
4738        (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4739                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4740                   UNSPEC_LD2_DUP))]
4741   "TARGET_SIMD"
4742   "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4743   [(set_attr "type" "neon_load2_all_lanes<q>")]
4744 )
4745
4746 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4747   [(set (match_operand:OI 0 "register_operand" "=w")
4748         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4749                     (match_operand:OI 2 "register_operand" "0")
4750                     (match_operand:SI 3 "immediate_operand" "i")
4751                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4752                    UNSPEC_LD2_LANE))]
4753   "TARGET_SIMD"
4754   {
4755     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4756     return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4757   }
4758   [(set_attr "type" "neon_load2_one_lane")]
4759 )
4760
4761 (define_expand "vec_load_lanesoi<mode>"
4762   [(set (match_operand:OI 0 "register_operand" "=w")
4763         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4764                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4765                    UNSPEC_LD2))]
4766   "TARGET_SIMD"
4767 {
4768   if (BYTES_BIG_ENDIAN)
4769     {
4770       rtx tmp = gen_reg_rtx (OImode);
4771       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4772       emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4773       emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4774     }
4775   else
4776     emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4777   DONE;
4778 })
4779
4780 (define_insn "aarch64_simd_st2<mode>"
4781   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4782         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4783                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4784                    UNSPEC_ST2))]
4785   "TARGET_SIMD"
4786   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4787   [(set_attr "type" "neon_store2_2reg<q>")]
4788 )
4789
4790 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4791 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4792   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4793         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4794                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4795                     (match_operand:SI 2 "immediate_operand" "i")]
4796                    UNSPEC_ST2_LANE))]
4797   "TARGET_SIMD"
4798   {
4799     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4800     return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4801   }
4802   [(set_attr "type" "neon_store2_one_lane<q>")]
4803 )
4804
4805 (define_expand "vec_store_lanesoi<mode>"
4806   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4807         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4808                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4809                    UNSPEC_ST2))]
4810   "TARGET_SIMD"
4811 {
4812   if (BYTES_BIG_ENDIAN)
4813     {
4814       rtx tmp = gen_reg_rtx (OImode);
4815       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4816       emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4817       emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4818     }
4819   else
4820     emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4821   DONE;
4822 })
4823
4824 (define_insn "aarch64_simd_ld3<mode>"
4825   [(set (match_operand:CI 0 "register_operand" "=w")
4826         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4827                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4828                    UNSPEC_LD3))]
4829   "TARGET_SIMD"
4830   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4831   [(set_attr "type" "neon_load3_3reg<q>")]
4832 )
4833
4834 (define_insn "aarch64_simd_ld3r<mode>"
4835   [(set (match_operand:CI 0 "register_operand" "=w")
4836        (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4837                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4838                   UNSPEC_LD3_DUP))]
4839   "TARGET_SIMD"
4840   "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4841   [(set_attr "type" "neon_load3_all_lanes<q>")]
4842 )
4843
4844 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4845   [(set (match_operand:CI 0 "register_operand" "=w")
4846         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4847                     (match_operand:CI 2 "register_operand" "0")
4848                     (match_operand:SI 3 "immediate_operand" "i")
4849                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4850                    UNSPEC_LD3_LANE))]
4851   "TARGET_SIMD"
4852 {
4853     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4854     return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4855 }
4856   [(set_attr "type" "neon_load3_one_lane")]
4857 )
4858
4859 (define_expand "vec_load_lanesci<mode>"
4860   [(set (match_operand:CI 0 "register_operand" "=w")
4861         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4862                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4863                    UNSPEC_LD3))]
4864   "TARGET_SIMD"
4865 {
4866   if (BYTES_BIG_ENDIAN)
4867     {
4868       rtx tmp = gen_reg_rtx (CImode);
4869       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4870       emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4871       emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4872     }
4873   else
4874     emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4875   DONE;
4876 })
4877
4878 (define_insn "aarch64_simd_st3<mode>"
4879   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4880         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4881                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4882                    UNSPEC_ST3))]
4883   "TARGET_SIMD"
4884   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4885   [(set_attr "type" "neon_store3_3reg<q>")]
4886 )
4887
4888 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4889 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4890   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4891         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4892                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4893                      (match_operand:SI 2 "immediate_operand" "i")]
4894                     UNSPEC_ST3_LANE))]
4895   "TARGET_SIMD"
4896   {
4897     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4898     return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4899   }
4900   [(set_attr "type" "neon_store3_one_lane<q>")]
4901 )
4902
4903 (define_expand "vec_store_lanesci<mode>"
4904   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4905         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4906                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4907                    UNSPEC_ST3))]
4908   "TARGET_SIMD"
4909 {
4910   if (BYTES_BIG_ENDIAN)
4911     {
4912       rtx tmp = gen_reg_rtx (CImode);
4913       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4914       emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4915       emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4916     }
4917   else
4918     emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4919   DONE;
4920 })
4921
4922 (define_insn "aarch64_simd_ld4<mode>"
4923   [(set (match_operand:XI 0 "register_operand" "=w")
4924         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4925                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4926                    UNSPEC_LD4))]
4927   "TARGET_SIMD"
4928   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4929   [(set_attr "type" "neon_load4_4reg<q>")]
4930 )
4931
4932 (define_insn "aarch64_simd_ld4r<mode>"
4933   [(set (match_operand:XI 0 "register_operand" "=w")
4934        (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4935                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4936                   UNSPEC_LD4_DUP))]
4937   "TARGET_SIMD"
4938   "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4939   [(set_attr "type" "neon_load4_all_lanes<q>")]
4940 )
4941
4942 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4943   [(set (match_operand:XI 0 "register_operand" "=w")
4944         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4945                     (match_operand:XI 2 "register_operand" "0")
4946                     (match_operand:SI 3 "immediate_operand" "i")
4947                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4948                    UNSPEC_LD4_LANE))]
4949   "TARGET_SIMD"
4950 {
4951     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4952     return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4953 }
4954   [(set_attr "type" "neon_load4_one_lane")]
4955 )
4956
4957 (define_expand "vec_load_lanesxi<mode>"
4958   [(set (match_operand:XI 0 "register_operand" "=w")
4959         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4960                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4961                    UNSPEC_LD4))]
4962   "TARGET_SIMD"
4963 {
4964   if (BYTES_BIG_ENDIAN)
4965     {
4966       rtx tmp = gen_reg_rtx (XImode);
4967       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4968       emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4969       emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4970     }
4971   else
4972     emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4973   DONE;
4974 })
4975
4976 (define_insn "aarch64_simd_st4<mode>"
4977   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4978         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4979                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4980                    UNSPEC_ST4))]
4981   "TARGET_SIMD"
4982   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4983   [(set_attr "type" "neon_store4_4reg<q>")]
4984 )
4985
4986 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4987 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4988   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4989         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4990                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4991                      (match_operand:SI 2 "immediate_operand" "i")]
4992                     UNSPEC_ST4_LANE))]
4993   "TARGET_SIMD"
4994   {
4995     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4996     return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
4997   }
4998   [(set_attr "type" "neon_store4_one_lane<q>")]
4999 )
5000
5001 (define_expand "vec_store_lanesxi<mode>"
5002   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5003         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5004                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5005                    UNSPEC_ST4))]
5006   "TARGET_SIMD"
5007 {
5008   if (BYTES_BIG_ENDIAN)
5009     {
5010       rtx tmp = gen_reg_rtx (XImode);
5011       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5012       emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5013       emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5014     }
5015   else
5016     emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5017   DONE;
5018 })
5019
5020 (define_insn_and_split "aarch64_rev_reglist<mode>"
5021 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5022         (unspec:VSTRUCT
5023                    [(match_operand:VSTRUCT 1 "register_operand" "w")
5024                     (match_operand:V16QI 2 "register_operand" "w")]
5025                    UNSPEC_REV_REGLIST))]
5026   "TARGET_SIMD"
5027   "#"
5028   "&& reload_completed"
5029   [(const_int 0)]
5030 {
5031   int i;
5032   int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5033   for (i = 0; i < nregs; i++)
5034     {
5035       rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5036       rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5037       emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5038     }
5039   DONE;
5040 }
5041   [(set_attr "type" "neon_tbl1_q")
5042    (set_attr "length" "<insn_count>")]
5043 )
5044
5045 ;; Reload patterns for AdvSIMD register list operands.
5046
5047 (define_expand "mov<mode>"
5048   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
5049         (match_operand:VSTRUCT 1 "general_operand" ""))]
5050   "TARGET_SIMD"
5051 {
5052   if (can_create_pseudo_p ())
5053     {
5054       if (GET_CODE (operands[0]) != REG)
5055         operands[1] = force_reg (<MODE>mode, operands[1]);
5056     }
5057 })
5058
5059
5060 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5061   [(match_operand:CI 0 "register_operand" "=w")
5062    (match_operand:DI 1 "register_operand" "r")
5063    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5064   "TARGET_SIMD"
5065 {
5066   rtx mem = gen_rtx_MEM (CImode, operands[1]);
5067   emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5068   DONE;
5069 })
5070
5071 (define_insn "aarch64_ld1_x3_<mode>"
5072   [(set (match_operand:CI 0 "register_operand" "=w")
5073         (unspec:CI
5074           [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5075            (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5076   "TARGET_SIMD"
5077   "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5078   [(set_attr "type" "neon_load1_3reg<q>")]
5079 )
5080
5081 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5082   [(match_operand:DI 0 "register_operand" "")
5083    (match_operand:OI 1 "register_operand" "")
5084    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5085   "TARGET_SIMD"
5086 {
5087   rtx mem = gen_rtx_MEM (OImode, operands[0]);
5088   emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5089   DONE;
5090 })
5091
5092 (define_insn "aarch64_st1_x2_<mode>"
5093    [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5094          (unspec:OI
5095           [(match_operand:OI 1 "register_operand" "w")
5096           (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5097   "TARGET_SIMD"
5098   "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5099   [(set_attr "type" "neon_store1_2reg<q>")]
5100 )
5101
5102 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5103   [(match_operand:DI 0 "register_operand" "")
5104    (match_operand:CI 1 "register_operand" "")
5105    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5106   "TARGET_SIMD"
5107 {
5108   rtx mem = gen_rtx_MEM (CImode, operands[0]);
5109   emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5110   DONE;
5111 })
5112
5113 (define_insn "aarch64_st1_x3_<mode>"
5114    [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5115         (unspec:CI
5116          [(match_operand:CI 1 "register_operand" "w")
5117           (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5118   "TARGET_SIMD"
5119   "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5120   [(set_attr "type" "neon_store1_3reg<q>")]
5121 )
5122
5123 (define_insn "*aarch64_mov<mode>"
5124   [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5125         (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5126   "TARGET_SIMD && !BYTES_BIG_ENDIAN
5127    && (register_operand (operands[0], <MODE>mode)
5128        || register_operand (operands[1], <MODE>mode))"
5129   "@
5130    #
5131    st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5132    ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5133   [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5134                      neon_load<nregs>_<nregs>reg_q")
5135    (set_attr "length" "<insn_count>,4,4")]
5136 )
5137
5138 (define_insn "aarch64_be_ld1<mode>"
5139   [(set (match_operand:VALLDI_F16 0     "register_operand" "=w")
5140         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5141                              "aarch64_simd_struct_operand" "Utv")]
5142         UNSPEC_LD1))]
5143   "TARGET_SIMD"
5144   "ld1\\t{%0<Vmtype>}, %1"
5145   [(set_attr "type" "neon_load1_1reg<q>")]
5146 )
5147
5148 (define_insn "aarch64_be_st1<mode>"
5149   [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5150         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5151         UNSPEC_ST1))]
5152   "TARGET_SIMD"
5153   "st1\\t{%1<Vmtype>}, %0"
5154   [(set_attr "type" "neon_store1_1reg<q>")]
5155 )
5156
5157 (define_insn "*aarch64_be_movoi"
5158   [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5159         (match_operand:OI 1 "general_operand"      " w,w,m"))]
5160   "TARGET_SIMD && BYTES_BIG_ENDIAN
5161    && (register_operand (operands[0], OImode)
5162        || register_operand (operands[1], OImode))"
5163   "@
5164    #
5165    stp\\t%q1, %R1, %0
5166    ldp\\t%q0, %R0, %1"
5167   [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5168    (set_attr "length" "8,4,4")]
5169 )
5170
5171 (define_insn "*aarch64_be_movci"
5172   [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5173         (match_operand:CI 1 "general_operand"      " w,w,o"))]
5174   "TARGET_SIMD && BYTES_BIG_ENDIAN
5175    && (register_operand (operands[0], CImode)
5176        || register_operand (operands[1], CImode))"
5177   "#"
5178   [(set_attr "type" "multiple")
5179    (set_attr "length" "12,4,4")]
5180 )
5181
5182 (define_insn "*aarch64_be_movxi"
5183   [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5184         (match_operand:XI 1 "general_operand"      " w,w,o"))]
5185   "TARGET_SIMD && BYTES_BIG_ENDIAN
5186    && (register_operand (operands[0], XImode)
5187        || register_operand (operands[1], XImode))"
5188   "#"
5189   [(set_attr "type" "multiple")
5190    (set_attr "length" "16,4,4")]
5191 )
5192
5193 (define_split
5194   [(set (match_operand:OI 0 "register_operand")
5195         (match_operand:OI 1 "register_operand"))]
5196   "TARGET_SIMD && reload_completed"
5197   [(const_int 0)]
5198 {
5199   aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5200   DONE;
5201 })
5202
5203 (define_split
5204   [(set (match_operand:CI 0 "nonimmediate_operand")
5205         (match_operand:CI 1 "general_operand"))]
5206   "TARGET_SIMD && reload_completed"
5207   [(const_int 0)]
5208 {
5209   if (register_operand (operands[0], CImode)
5210       && register_operand (operands[1], CImode))
5211     {
5212       aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5213       DONE;
5214     }
5215   else if (BYTES_BIG_ENDIAN)
5216     {
5217       emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5218                       simplify_gen_subreg (OImode, operands[1], CImode, 0));
5219       emit_move_insn (gen_lowpart (V16QImode,
5220                                    simplify_gen_subreg (TImode, operands[0],
5221                                                         CImode, 32)),
5222                       gen_lowpart (V16QImode,
5223                                    simplify_gen_subreg (TImode, operands[1],
5224                                                         CImode, 32)));
5225       DONE;
5226     }
5227   else
5228     FAIL;
5229 })
5230
5231 (define_split
5232   [(set (match_operand:XI 0 "nonimmediate_operand")
5233         (match_operand:XI 1 "general_operand"))]
5234   "TARGET_SIMD && reload_completed"
5235   [(const_int 0)]
5236 {
5237   if (register_operand (operands[0], XImode)
5238       && register_operand (operands[1], XImode))
5239     {
5240       aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5241       DONE;
5242     }
5243   else if (BYTES_BIG_ENDIAN)
5244     {
5245       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5246                       simplify_gen_subreg (OImode, operands[1], XImode, 0));
5247       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5248                       simplify_gen_subreg (OImode, operands[1], XImode, 32));
5249       DONE;
5250     }
5251   else
5252     FAIL;
5253 })
5254
5255 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5256   [(match_operand:VSTRUCT 0 "register_operand" "=w")
5257    (match_operand:DI 1 "register_operand" "w")
5258    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5259   "TARGET_SIMD"
5260 {
5261   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5262   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5263                      * <VSTRUCT:nregs>);
5264
5265   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5266                                                                 mem));
5267   DONE;
5268 })
5269
5270 (define_insn "aarch64_ld2<mode>_dreg"
5271   [(set (match_operand:OI 0 "register_operand" "=w")
5272         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5273                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5274                    UNSPEC_LD2_DREG))]
5275   "TARGET_SIMD"
5276   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5277   [(set_attr "type" "neon_load2_2reg<q>")]
5278 )
5279
5280 (define_insn "aarch64_ld2<mode>_dreg"
5281   [(set (match_operand:OI 0 "register_operand" "=w")
5282         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5283                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5284                    UNSPEC_LD2_DREG))]
5285   "TARGET_SIMD"
5286   "ld1\\t{%S0.1d - %T0.1d}, %1"
5287   [(set_attr "type" "neon_load1_2reg<q>")]
5288 )
5289
5290 (define_insn "aarch64_ld3<mode>_dreg"
5291   [(set (match_operand:CI 0 "register_operand" "=w")
5292         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5293                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5294                    UNSPEC_LD3_DREG))]
5295   "TARGET_SIMD"
5296   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5297   [(set_attr "type" "neon_load3_3reg<q>")]
5298 )
5299
5300 (define_insn "aarch64_ld3<mode>_dreg"
5301   [(set (match_operand:CI 0 "register_operand" "=w")
5302         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5303                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5304                    UNSPEC_LD3_DREG))]
5305   "TARGET_SIMD"
5306   "ld1\\t{%S0.1d - %U0.1d}, %1"
5307   [(set_attr "type" "neon_load1_3reg<q>")]
5308 )
5309
5310 (define_insn "aarch64_ld4<mode>_dreg"
5311   [(set (match_operand:XI 0 "register_operand" "=w")
5312         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5313                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5314                    UNSPEC_LD4_DREG))]
5315   "TARGET_SIMD"
5316   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5317   [(set_attr "type" "neon_load4_4reg<q>")]
5318 )
5319
5320 (define_insn "aarch64_ld4<mode>_dreg"
5321   [(set (match_operand:XI 0 "register_operand" "=w")
5322         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5323                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5324                    UNSPEC_LD4_DREG))]
5325   "TARGET_SIMD"
5326   "ld1\\t{%S0.1d - %V0.1d}, %1"
5327   [(set_attr "type" "neon_load1_4reg<q>")]
5328 )
5329
5330 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5331  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5332   (match_operand:DI 1 "register_operand" "r")
5333   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5334   "TARGET_SIMD"
5335 {
5336   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5337   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5338
5339   emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5340   DONE;
5341 })
5342
5343 (define_expand "aarch64_ld1<VALL_F16:mode>"
5344  [(match_operand:VALL_F16 0 "register_operand")
5345   (match_operand:DI 1 "register_operand")]
5346   "TARGET_SIMD"
5347 {
5348   machine_mode mode = <VALL_F16:MODE>mode;
5349   rtx mem = gen_rtx_MEM (mode, operands[1]);
5350
5351   if (BYTES_BIG_ENDIAN)
5352     emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5353   else
5354     emit_move_insn (operands[0], mem);
5355   DONE;
5356 })
5357
5358 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5359  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5360   (match_operand:DI 1 "register_operand" "r")
5361   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5362   "TARGET_SIMD"
5363 {
5364   machine_mode mode = <VSTRUCT:MODE>mode;
5365   rtx mem = gen_rtx_MEM (mode, operands[1]);
5366
5367   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5368   DONE;
5369 })
5370
5371 (define_expand "aarch64_ld1x2<VQ:mode>"
5372  [(match_operand:OI 0 "register_operand" "=w")
5373   (match_operand:DI 1 "register_operand" "r")
5374   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5375   "TARGET_SIMD"
5376 {
5377   machine_mode mode = OImode;
5378   rtx mem = gen_rtx_MEM (mode, operands[1]);
5379
5380   emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5381   DONE;
5382 })
5383
5384 (define_expand "aarch64_ld1x2<VDC:mode>"
5385  [(match_operand:OI 0 "register_operand" "=w")
5386   (match_operand:DI 1 "register_operand" "r")
5387   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5388   "TARGET_SIMD"
5389 {
5390   machine_mode mode = OImode;
5391   rtx mem = gen_rtx_MEM (mode, operands[1]);
5392
5393   emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5394   DONE;
5395 })
5396
5397
5398 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5399   [(match_operand:VSTRUCT 0 "register_operand" "=w")
5400         (match_operand:DI 1 "register_operand" "w")
5401         (match_operand:VSTRUCT 2 "register_operand" "0")
5402         (match_operand:SI 3 "immediate_operand" "i")
5403         (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5404   "TARGET_SIMD"
5405 {
5406   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5407   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5408                      * <VSTRUCT:nregs>);
5409
5410   aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5411   emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5412         operands[0], mem, operands[2], operands[3]));
5413   DONE;
5414 })
5415
5416 ;; Expanders for builtins to extract vector registers from large
5417 ;; opaque integer modes.
5418
5419 ;; D-register list.
5420
5421 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5422  [(match_operand:VDC 0 "register_operand" "=w")
5423   (match_operand:VSTRUCT 1 "register_operand" "w")
5424   (match_operand:SI 2 "immediate_operand" "i")]
5425   "TARGET_SIMD"
5426 {
5427   int part = INTVAL (operands[2]);
5428   rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5429   int offset = part * 16;
5430
5431   emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5432   emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5433   DONE;
5434 })
5435
5436 ;; Q-register list.
5437
5438 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5439  [(match_operand:VQ 0 "register_operand" "=w")
5440   (match_operand:VSTRUCT 1 "register_operand" "w")
5441   (match_operand:SI 2 "immediate_operand" "i")]
5442   "TARGET_SIMD"
5443 {
5444   int part = INTVAL (operands[2]);
5445   int offset = part * 16;
5446
5447   emit_move_insn (operands[0],
5448                   gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5449   DONE;
5450 })
5451
5452 ;; Permuted-store expanders for neon intrinsics.
5453
5454 ;; Permute instructions
5455
5456 ;; vec_perm support
5457
5458 (define_expand "vec_perm<mode>"
5459   [(match_operand:VB 0 "register_operand")
5460    (match_operand:VB 1 "register_operand")
5461    (match_operand:VB 2 "register_operand")
5462    (match_operand:VB 3 "register_operand")]
5463   "TARGET_SIMD"
5464 {
5465   aarch64_expand_vec_perm (operands[0], operands[1],
5466                            operands[2], operands[3], <nunits>);
5467   DONE;
5468 })
5469
5470 (define_insn "aarch64_tbl1<mode>"
5471   [(set (match_operand:VB 0 "register_operand" "=w")
5472         (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5473                     (match_operand:VB 2 "register_operand" "w")]
5474                    UNSPEC_TBL))]
5475   "TARGET_SIMD"
5476   "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5477   [(set_attr "type" "neon_tbl1<q>")]
5478 )
5479
5480 ;; Two source registers.
5481
5482 (define_insn "aarch64_tbl2v16qi"
5483   [(set (match_operand:V16QI 0 "register_operand" "=w")
5484         (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5485                        (match_operand:V16QI 2 "register_operand" "w")]
5486                       UNSPEC_TBL))]
5487   "TARGET_SIMD"
5488   "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5489   [(set_attr "type" "neon_tbl2_q")]
5490 )
5491
5492 (define_insn "aarch64_tbl3<mode>"
5493   [(set (match_operand:VB 0 "register_operand" "=w")
5494         (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5495                       (match_operand:VB 2 "register_operand" "w")]
5496                       UNSPEC_TBL))]
5497   "TARGET_SIMD"
5498   "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5499   [(set_attr "type" "neon_tbl3")]
5500 )
5501
5502 (define_insn "aarch64_tbx4<mode>"
5503   [(set (match_operand:VB 0 "register_operand" "=w")
5504         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5505                       (match_operand:OI 2 "register_operand" "w")
5506                       (match_operand:VB 3 "register_operand" "w")]
5507                       UNSPEC_TBX))]
5508   "TARGET_SIMD"
5509   "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5510   [(set_attr "type" "neon_tbl4")]
5511 )
5512
5513 ;; Three source registers.
5514
5515 (define_insn "aarch64_qtbl3<mode>"
5516   [(set (match_operand:VB 0 "register_operand" "=w")
5517         (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5518                       (match_operand:VB 2 "register_operand" "w")]
5519                       UNSPEC_TBL))]
5520   "TARGET_SIMD"
5521   "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5522   [(set_attr "type" "neon_tbl3")]
5523 )
5524
5525 (define_insn "aarch64_qtbx3<mode>"
5526   [(set (match_operand:VB 0 "register_operand" "=w")
5527         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5528                       (match_operand:CI 2 "register_operand" "w")
5529                       (match_operand:VB 3 "register_operand" "w")]
5530                       UNSPEC_TBX))]
5531   "TARGET_SIMD"
5532   "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5533   [(set_attr "type" "neon_tbl3")]
5534 )
5535
5536 ;; Four source registers.
5537
5538 (define_insn "aarch64_qtbl4<mode>"
5539   [(set (match_operand:VB 0 "register_operand" "=w")
5540         (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5541                       (match_operand:VB 2 "register_operand" "w")]
5542                       UNSPEC_TBL))]
5543   "TARGET_SIMD"
5544   "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5545   [(set_attr "type" "neon_tbl4")]
5546 )
5547
5548 (define_insn "aarch64_qtbx4<mode>"
5549   [(set (match_operand:VB 0 "register_operand" "=w")
5550         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5551                       (match_operand:XI 2 "register_operand" "w")
5552                       (match_operand:VB 3 "register_operand" "w")]
5553                       UNSPEC_TBX))]
5554   "TARGET_SIMD"
5555   "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5556   [(set_attr "type" "neon_tbl4")]
5557 )
5558
5559 (define_insn_and_split "aarch64_combinev16qi"
5560   [(set (match_operand:OI 0 "register_operand" "=w")
5561         (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5562                     (match_operand:V16QI 2 "register_operand" "w")]
5563                    UNSPEC_CONCAT))]
5564   "TARGET_SIMD"
5565   "#"
5566   "&& reload_completed"
5567   [(const_int 0)]
5568 {
5569   aarch64_split_combinev16qi (operands);
5570   DONE;
5571 }
5572 [(set_attr "type" "multiple")]
5573 )
5574
5575 ;; This instruction's pattern is generated directly by
5576 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5577 ;; need corresponding changes there.
5578 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5579   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5580         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5581                           (match_operand:VALL_F16 2 "register_operand" "w")]
5582          PERMUTE))]
5583   "TARGET_SIMD"
5584   "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5585   [(set_attr "type" "neon_permute<q>")]
5586 )
5587
5588 ;; This instruction's pattern is generated directly by
5589 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5590 ;; need corresponding changes there.  Note that the immediate (third)
5591 ;; operand is a lane index not a byte index.
5592 (define_insn "aarch64_ext<mode>"
5593   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5594         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5595                           (match_operand:VALL_F16 2 "register_operand" "w")
5596                           (match_operand:SI 3 "immediate_operand" "i")]
5597          UNSPEC_EXT))]
5598   "TARGET_SIMD"
5599 {
5600   operands[3] = GEN_INT (INTVAL (operands[3])
5601       * GET_MODE_UNIT_SIZE (<MODE>mode));
5602   return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5603 }
5604   [(set_attr "type" "neon_ext<q>")]
5605 )
5606
5607 ;; This instruction's pattern is generated directly by
5608 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5609 ;; need corresponding changes there.
5610 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5611   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5612         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5613                     REVERSE))]
5614   "TARGET_SIMD"
5615   "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5616   [(set_attr "type" "neon_rev<q>")]
5617 )
5618
5619 (define_insn "aarch64_st2<mode>_dreg"
5620   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5621         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5622                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5623                    UNSPEC_ST2))]
5624   "TARGET_SIMD"
5625   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5626   [(set_attr "type" "neon_store2_2reg")]
5627 )
5628
5629 (define_insn "aarch64_st2<mode>_dreg"
5630   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5631         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5632                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5633                    UNSPEC_ST2))]
5634   "TARGET_SIMD"
5635   "st1\\t{%S1.1d - %T1.1d}, %0"
5636   [(set_attr "type" "neon_store1_2reg")]
5637 )
5638
5639 (define_insn "aarch64_st3<mode>_dreg"
5640   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5641         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5642                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5643                    UNSPEC_ST3))]
5644   "TARGET_SIMD"
5645   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5646   [(set_attr "type" "neon_store3_3reg")]
5647 )
5648
5649 (define_insn "aarch64_st3<mode>_dreg"
5650   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5651         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5652                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5653                    UNSPEC_ST3))]
5654   "TARGET_SIMD"
5655   "st1\\t{%S1.1d - %U1.1d}, %0"
5656   [(set_attr "type" "neon_store1_3reg")]
5657 )
5658
5659 (define_insn "aarch64_st4<mode>_dreg"
5660   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5661         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5662                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5663                    UNSPEC_ST4))]
5664   "TARGET_SIMD"
5665   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5666   [(set_attr "type" "neon_store4_4reg")]
5667 )
5668
5669 (define_insn "aarch64_st4<mode>_dreg"
5670   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5671         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5672                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5673                    UNSPEC_ST4))]
5674   "TARGET_SIMD"
5675   "st1\\t{%S1.1d - %V1.1d}, %0"
5676   [(set_attr "type" "neon_store1_4reg")]
5677 )
5678
5679 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5680  [(match_operand:DI 0 "register_operand" "r")
5681   (match_operand:VSTRUCT 1 "register_operand" "w")
5682   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5683   "TARGET_SIMD"
5684 {
5685   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5686   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5687
5688   emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5689   DONE;
5690 })
5691
5692 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5693  [(match_operand:DI 0 "register_operand" "r")
5694   (match_operand:VSTRUCT 1 "register_operand" "w")
5695   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5696   "TARGET_SIMD"
5697 {
5698   machine_mode mode = <VSTRUCT:MODE>mode;
5699   rtx mem = gen_rtx_MEM (mode, operands[0]);
5700
5701   emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5702   DONE;
5703 })
5704
5705 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5706  [(match_operand:DI 0 "register_operand" "r")
5707   (match_operand:VSTRUCT 1 "register_operand" "w")
5708   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5709   (match_operand:SI 2 "immediate_operand")]
5710   "TARGET_SIMD"
5711 {
5712   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5713   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5714                      * <VSTRUCT:nregs>);
5715
5716   emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5717                 mem, operands[1], operands[2]));
5718   DONE;
5719 })
5720
5721 (define_expand "aarch64_st1<VALL_F16:mode>"
5722  [(match_operand:DI 0 "register_operand")
5723   (match_operand:VALL_F16 1 "register_operand")]
5724   "TARGET_SIMD"
5725 {
5726   machine_mode mode = <VALL_F16:MODE>mode;
5727   rtx mem = gen_rtx_MEM (mode, operands[0]);
5728
5729   if (BYTES_BIG_ENDIAN)
5730     emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5731   else
5732     emit_move_insn (mem, operands[1]);
5733   DONE;
5734 })
5735
5736 ;; Expander for builtins to insert vector registers into large
5737 ;; opaque integer modes.
5738
5739 ;; Q-register list.  We don't need a D-reg inserter as we zero
5740 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5741
5742 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5743  [(match_operand:VSTRUCT 0 "register_operand" "+w")
5744   (match_operand:VSTRUCT 1 "register_operand" "0")
5745   (match_operand:VQ 2 "register_operand" "w")
5746   (match_operand:SI 3 "immediate_operand" "i")]
5747   "TARGET_SIMD"
5748 {
5749   int part = INTVAL (operands[3]);
5750   int offset = part * 16;
5751
5752   emit_move_insn (operands[0], operands[1]);
5753   emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5754                   operands[2]);
5755   DONE;
5756 })
5757
5758 ;; Standard pattern name vec_init<mode><Vel>.
5759
5760 (define_expand "vec_init<mode><Vel>"
5761   [(match_operand:VALL_F16 0 "register_operand" "")
5762    (match_operand 1 "" "")]
5763   "TARGET_SIMD"
5764 {
5765   aarch64_expand_vector_init (operands[0], operands[1]);
5766   DONE;
5767 })
5768
5769 (define_insn "*aarch64_simd_ld1r<mode>"
5770   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5771         (vec_duplicate:VALL_F16
5772           (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5773   "TARGET_SIMD"
5774   "ld1r\\t{%0.<Vtype>}, %1"
5775   [(set_attr "type" "neon_load1_all_lanes")]
5776 )
5777
5778 (define_insn "aarch64_simd_ld1<mode>_x2"
5779   [(set (match_operand:OI 0 "register_operand" "=w")
5780         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5781                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5782                    UNSPEC_LD1))]
5783   "TARGET_SIMD"
5784   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5785   [(set_attr "type" "neon_load1_2reg<q>")]
5786 )
5787
5788 (define_insn "aarch64_simd_ld1<mode>_x2"
5789   [(set (match_operand:OI 0 "register_operand" "=w")
5790         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5791                     (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5792                    UNSPEC_LD1))]
5793   "TARGET_SIMD"
5794   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5795   [(set_attr "type" "neon_load1_2reg<q>")]
5796 )
5797
5798
5799 (define_insn "aarch64_frecpe<mode>"
5800   [(set (match_operand:VHSDF 0 "register_operand" "=w")
5801         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5802          UNSPEC_FRECPE))]
5803   "TARGET_SIMD"
5804   "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5805   [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5806 )
5807
5808 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5809   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5810         (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5811          FRECP))]
5812   "TARGET_SIMD"
5813   "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5814   [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5815 )
5816
5817 (define_insn "aarch64_frecps<mode>"
5818   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5819         (unspec:VHSDF_HSDF
5820           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5821           (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5822           UNSPEC_FRECPS))]
5823   "TARGET_SIMD"
5824   "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5825   [(set_attr "type" "neon_fp_recps_<stype><q>")]
5826 )
5827
5828 (define_insn "aarch64_urecpe<mode>"
5829   [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5830         (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5831                 UNSPEC_URECPE))]
5832  "TARGET_SIMD"
5833  "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5834   [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5835
5836 ;; Standard pattern name vec_extract<mode><Vel>.
5837
5838 (define_expand "vec_extract<mode><Vel>"
5839   [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5840    (match_operand:VALL_F16 1 "register_operand" "")
5841    (match_operand:SI 2 "immediate_operand" "")]
5842   "TARGET_SIMD"
5843 {
5844     emit_insn
5845       (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5846     DONE;
5847 })
5848
5849 ;; aes
5850
5851 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5852   [(set (match_operand:V16QI 0 "register_operand" "=w")
5853         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5854                        (match_operand:V16QI 2 "register_operand" "w")]
5855          CRYPTO_AES))]
5856   "TARGET_SIMD && TARGET_AES"
5857   "aes<aes_op>\\t%0.16b, %2.16b"
5858   [(set_attr "type" "crypto_aese")]
5859 )
5860
5861 ;; When AES/AESMC fusion is enabled we want the register allocation to
5862 ;; look like:
5863 ;;    AESE Vn, _
5864 ;;    AESMC Vn, Vn
5865 ;; So prefer to tie operand 1 to operand 0 when fusing.
5866
5867 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5868   [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5869         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5870          CRYPTO_AESMC))]
5871   "TARGET_SIMD && TARGET_AES"
5872   "aes<aesmc_op>\\t%0.16b, %1.16b"
5873   [(set_attr "type" "crypto_aesmc")
5874    (set_attr_alternative "enabled"
5875      [(if_then_else (match_test
5876                        "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5877                      (const_string "yes" )
5878                      (const_string "no"))
5879       (const_string "yes")])]
5880 )
5881
5882 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
5883 ;; and enforce the register dependency without scheduling or register
5884 ;; allocation messing up the order or introducing moves inbetween.
5885 ;;  Mash the two together during combine.
5886
5887 (define_insn "*aarch64_crypto_aese_fused"
5888   [(set (match_operand:V16QI 0 "register_operand" "=&w")
5889         (unspec:V16QI
5890           [(unspec:V16QI
5891             [(match_operand:V16QI 1 "register_operand" "0")
5892              (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE)
5893           ] UNSPEC_AESMC))]
5894   "TARGET_SIMD && TARGET_AES
5895    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
5896   "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
5897   [(set_attr "type" "crypto_aese")
5898    (set_attr "length" "8")]
5899 )
5900
5901 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
5902 ;; and enforce the register dependency without scheduling or register
5903 ;; allocation messing up the order or introducing moves inbetween.
5904 ;;  Mash the two together during combine.
5905
5906 (define_insn "*aarch64_crypto_aesd_fused"
5907   [(set (match_operand:V16QI 0 "register_operand" "=&w")
5908         (unspec:V16QI
5909           [(unspec:V16QI
5910             [(match_operand:V16QI 1 "register_operand" "0")
5911              (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD)
5912           ] UNSPEC_AESIMC))]
5913   "TARGET_SIMD && TARGET_AES
5914    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
5915   "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
5916   [(set_attr "type" "crypto_aese")
5917    (set_attr "length" "8")]
5918 )
5919
5920 ;; sha1
5921
5922 (define_insn "aarch64_crypto_sha1hsi"
5923   [(set (match_operand:SI 0 "register_operand" "=w")
5924         (unspec:SI [(match_operand:SI 1
5925                        "register_operand" "w")]
5926          UNSPEC_SHA1H))]
5927   "TARGET_SIMD && TARGET_SHA2"
5928   "sha1h\\t%s0, %s1"
5929   [(set_attr "type" "crypto_sha1_fast")]
5930 )
5931
5932 (define_insn "aarch64_crypto_sha1hv4si"
5933   [(set (match_operand:SI 0 "register_operand" "=w")
5934         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5935                      (parallel [(const_int 0)]))]
5936          UNSPEC_SHA1H))]
5937   "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
5938   "sha1h\\t%s0, %s1"
5939   [(set_attr "type" "crypto_sha1_fast")]
5940 )
5941
5942 (define_insn "aarch64_be_crypto_sha1hv4si"
5943   [(set (match_operand:SI 0 "register_operand" "=w")
5944         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5945                      (parallel [(const_int 3)]))]
5946          UNSPEC_SHA1H))]
5947   "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
5948   "sha1h\\t%s0, %s1"
5949   [(set_attr "type" "crypto_sha1_fast")]
5950 )
5951
5952 (define_insn "aarch64_crypto_sha1su1v4si"
5953   [(set (match_operand:V4SI 0 "register_operand" "=w")
5954         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5955                       (match_operand:V4SI 2 "register_operand" "w")]
5956          UNSPEC_SHA1SU1))]
5957   "TARGET_SIMD && TARGET_SHA2"
5958   "sha1su1\\t%0.4s, %2.4s"
5959   [(set_attr "type" "crypto_sha1_fast")]
5960 )
5961
5962 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5963   [(set (match_operand:V4SI 0 "register_operand" "=w")
5964         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5965                       (match_operand:SI 2 "register_operand" "w")
5966                       (match_operand:V4SI 3 "register_operand" "w")]
5967          CRYPTO_SHA1))]
5968   "TARGET_SIMD && TARGET_SHA2"
5969   "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5970   [(set_attr "type" "crypto_sha1_slow")]
5971 )
5972
5973 (define_insn "aarch64_crypto_sha1su0v4si"
5974   [(set (match_operand:V4SI 0 "register_operand" "=w")
5975         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5976                       (match_operand:V4SI 2 "register_operand" "w")
5977                       (match_operand:V4SI 3 "register_operand" "w")]
5978          UNSPEC_SHA1SU0))]
5979   "TARGET_SIMD && TARGET_SHA2"
5980   "sha1su0\\t%0.4s, %2.4s, %3.4s"
5981   [(set_attr "type" "crypto_sha1_xor")]
5982 )
5983
5984 ;; sha256
5985
5986 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
5987   [(set (match_operand:V4SI 0 "register_operand" "=w")
5988         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5989                       (match_operand:V4SI 2 "register_operand" "w")
5990                       (match_operand:V4SI 3 "register_operand" "w")]
5991          CRYPTO_SHA256))]
5992   "TARGET_SIMD && TARGET_SHA2"
5993   "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5994   [(set_attr "type" "crypto_sha256_slow")]
5995 )
5996
5997 (define_insn "aarch64_crypto_sha256su0v4si"
5998   [(set (match_operand:V4SI 0 "register_operand" "=w")
5999         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6000                       (match_operand:V4SI 2 "register_operand" "w")]
6001          UNSPEC_SHA256SU0))]
6002   "TARGET_SIMD && TARGET_SHA2"
6003   "sha256su0\\t%0.4s, %2.4s"
6004   [(set_attr "type" "crypto_sha256_fast")]
6005 )
6006
6007 (define_insn "aarch64_crypto_sha256su1v4si"
6008   [(set (match_operand:V4SI 0 "register_operand" "=w")
6009         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6010                       (match_operand:V4SI 2 "register_operand" "w")
6011                       (match_operand:V4SI 3 "register_operand" "w")]
6012          UNSPEC_SHA256SU1))]
6013   "TARGET_SIMD && TARGET_SHA2"
6014   "sha256su1\\t%0.4s, %2.4s, %3.4s"
6015   [(set_attr "type" "crypto_sha256_slow")]
6016 )
6017
6018 ;; sha512
6019
6020 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6021   [(set (match_operand:V2DI 0 "register_operand" "=w")
6022         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6023                       (match_operand:V2DI 2 "register_operand" "w")
6024                       (match_operand:V2DI 3 "register_operand" "w")]
6025          CRYPTO_SHA512))]
6026   "TARGET_SIMD && TARGET_SHA3"
6027   "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6028   [(set_attr "type" "crypto_sha512")]
6029 )
6030
6031 (define_insn "aarch64_crypto_sha512su0qv2di"
6032   [(set (match_operand:V2DI 0 "register_operand" "=w")
6033         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6034                       (match_operand:V2DI 2 "register_operand" "w")]
6035          UNSPEC_SHA512SU0))]
6036   "TARGET_SIMD && TARGET_SHA3"
6037   "sha512su0\\t%0.2d, %2.2d"
6038   [(set_attr "type" "crypto_sha512")]
6039 )
6040
6041 (define_insn "aarch64_crypto_sha512su1qv2di"
6042   [(set (match_operand:V2DI 0 "register_operand" "=w")
6043         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6044                       (match_operand:V2DI 2 "register_operand" "w")
6045                       (match_operand:V2DI 3 "register_operand" "w")]
6046          UNSPEC_SHA512SU1))]
6047   "TARGET_SIMD && TARGET_SHA3"
6048   "sha512su1\\t%0.2d, %2.2d, %3.2d"
6049   [(set_attr "type" "crypto_sha512")]
6050 )
6051
6052 ;; sha3
6053
6054 (define_insn "eor3q<mode>4"
6055   [(set (match_operand:VQ_I 0 "register_operand" "=w")
6056         (xor:VQ_I
6057          (xor:VQ_I
6058           (match_operand:VQ_I 2 "register_operand" "w")
6059           (match_operand:VQ_I 3 "register_operand" "w"))
6060          (match_operand:VQ_I 1 "register_operand" "w")))]
6061   "TARGET_SIMD && TARGET_SHA3"
6062   "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6063   [(set_attr "type" "crypto_sha3")]
6064 )
6065
6066 (define_insn "aarch64_rax1qv2di"
6067   [(set (match_operand:V2DI 0 "register_operand" "=w")
6068         (xor:V2DI
6069          (rotate:V2DI
6070           (match_operand:V2DI 2 "register_operand" "w")
6071           (const_int 1))
6072          (match_operand:V2DI 1 "register_operand" "w")))]
6073   "TARGET_SIMD && TARGET_SHA3"
6074   "rax1\\t%0.2d, %1.2d, %2.2d"
6075   [(set_attr "type" "crypto_sha3")]
6076 )
6077
6078 (define_insn "aarch64_xarqv2di"
6079   [(set (match_operand:V2DI 0 "register_operand" "=w")
6080         (rotatert:V2DI
6081          (xor:V2DI
6082           (match_operand:V2DI 1 "register_operand" "%w")
6083           (match_operand:V2DI 2 "register_operand" "w"))
6084          (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6085   "TARGET_SIMD && TARGET_SHA3"
6086   "xar\\t%0.2d, %1.2d, %2.2d, %3"
6087   [(set_attr "type" "crypto_sha3")]
6088 )
6089
6090 (define_insn "bcaxq<mode>4"
6091   [(set (match_operand:VQ_I 0 "register_operand" "=w")
6092         (xor:VQ_I
6093          (and:VQ_I
6094           (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6095           (match_operand:VQ_I 2 "register_operand" "w"))
6096          (match_operand:VQ_I 1 "register_operand" "w")))]
6097   "TARGET_SIMD && TARGET_SHA3"
6098   "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6099   [(set_attr "type" "crypto_sha3")]
6100 )
6101
6102 ;; SM3
6103
6104 (define_insn "aarch64_sm3ss1qv4si"
6105   [(set (match_operand:V4SI 0 "register_operand" "=w")
6106         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6107                       (match_operand:V4SI 2 "register_operand" "w")
6108                       (match_operand:V4SI 3 "register_operand" "w")]
6109          UNSPEC_SM3SS1))]
6110   "TARGET_SIMD && TARGET_SM4"
6111   "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6112   [(set_attr "type" "crypto_sm3")]
6113 )
6114
6115
6116 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6117   [(set (match_operand:V4SI 0 "register_operand" "=w")
6118         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6119                       (match_operand:V4SI 2 "register_operand" "w")
6120                       (match_operand:V4SI 3 "register_operand" "w")
6121                       (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6122          CRYPTO_SM3TT))]
6123   "TARGET_SIMD && TARGET_SM4"
6124   "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6125   [(set_attr "type" "crypto_sm3")]
6126 )
6127
6128 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6129   [(set (match_operand:V4SI 0 "register_operand" "=w")
6130         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6131                       (match_operand:V4SI 2 "register_operand" "w")
6132                       (match_operand:V4SI 3 "register_operand" "w")]
6133          CRYPTO_SM3PART))]
6134   "TARGET_SIMD && TARGET_SM4"
6135   "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6136   [(set_attr "type" "crypto_sm3")]
6137 )
6138
6139 ;; SM4
6140
6141 (define_insn "aarch64_sm4eqv4si"
6142   [(set (match_operand:V4SI 0 "register_operand" "=w")
6143         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6144                       (match_operand:V4SI 2 "register_operand" "w")]
6145          UNSPEC_SM4E))]
6146   "TARGET_SIMD && TARGET_SM4"
6147   "sm4e\\t%0.4s, %2.4s"
6148   [(set_attr "type" "crypto_sm4")]
6149 )
6150
6151 (define_insn "aarch64_sm4ekeyqv4si"
6152   [(set (match_operand:V4SI 0 "register_operand" "=w")
6153         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6154                       (match_operand:V4SI 2 "register_operand" "w")]
6155          UNSPEC_SM4EKEY))]
6156   "TARGET_SIMD && TARGET_SM4"
6157   "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6158   [(set_attr "type" "crypto_sm4")]
6159 )
6160
6161 ;; fp16fml
6162
6163 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6164   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6165         (unspec:VDQSF
6166          [(match_operand:VDQSF 1 "register_operand" "0")
6167           (match_operand:<VFMLA_W> 2 "register_operand" "w")
6168           (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6169          VFMLA16_LOW))]
6170   "TARGET_F16FML"
6171 {
6172   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6173                                             <nunits> * 2, false);
6174   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6175                                             <nunits> * 2, false);
6176
6177   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6178                                                                 operands[1],
6179                                                                 operands[2],
6180                                                                 operands[3],
6181                                                                 p1, p2));
6182   DONE;
6183
6184 })
6185
6186 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6187   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6188         (unspec:VDQSF
6189          [(match_operand:VDQSF 1 "register_operand" "0")
6190           (match_operand:<VFMLA_W> 2 "register_operand" "w")
6191           (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6192          VFMLA16_HIGH))]
6193   "TARGET_F16FML"
6194 {
6195   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6196   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6197
6198   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6199                                                                  operands[1],
6200                                                                  operands[2],
6201                                                                  operands[3],
6202                                                                  p1, p2));
6203   DONE;
6204 })
6205
6206 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6207   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6208         (fma:VDQSF
6209          (float_extend:VDQSF
6210           (vec_select:<VFMLA_SEL_W>
6211            (match_operand:<VFMLA_W> 2 "register_operand" "w")
6212            (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6213          (float_extend:VDQSF
6214           (vec_select:<VFMLA_SEL_W>
6215            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6216            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6217          (match_operand:VDQSF 1 "register_operand" "0")))]
6218   "TARGET_F16FML"
6219   "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6220   [(set_attr "type" "neon_fp_mul_s")]
6221 )
6222
6223 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6224   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6225         (fma:VDQSF
6226          (float_extend:VDQSF
6227           (neg:<VFMLA_SEL_W>
6228            (vec_select:<VFMLA_SEL_W>
6229             (match_operand:<VFMLA_W> 2 "register_operand" "w")
6230             (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6231          (float_extend:VDQSF
6232           (vec_select:<VFMLA_SEL_W>
6233            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6234            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6235          (match_operand:VDQSF 1 "register_operand" "0")))]
6236   "TARGET_F16FML"
6237   "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6238   [(set_attr "type" "neon_fp_mul_s")]
6239 )
6240
6241 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6242   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6243         (fma:VDQSF
6244          (float_extend:VDQSF
6245           (vec_select:<VFMLA_SEL_W>
6246            (match_operand:<VFMLA_W> 2 "register_operand" "w")
6247            (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6248          (float_extend:VDQSF
6249           (vec_select:<VFMLA_SEL_W>
6250            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6251            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6252          (match_operand:VDQSF 1 "register_operand" "0")))]
6253   "TARGET_F16FML"
6254   "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6255   [(set_attr "type" "neon_fp_mul_s")]
6256 )
6257
6258 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6259   [(set (match_operand:VDQSF 0 "register_operand" "=w")
6260         (fma:VDQSF
6261          (float_extend:VDQSF
6262           (neg:<VFMLA_SEL_W>
6263            (vec_select:<VFMLA_SEL_W>
6264             (match_operand:<VFMLA_W> 2 "register_operand" "w")
6265             (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6266          (float_extend:VDQSF
6267           (vec_select:<VFMLA_SEL_W>
6268            (match_operand:<VFMLA_W> 3 "register_operand" "w")
6269            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6270          (match_operand:VDQSF 1 "register_operand" "0")))]
6271   "TARGET_F16FML"
6272   "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6273   [(set_attr "type" "neon_fp_mul_s")]
6274 )
6275
6276 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6277   [(set (match_operand:V2SF 0 "register_operand" "")
6278         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6279                            (match_operand:V4HF 2 "register_operand" "")
6280                            (match_operand:V4HF 3 "register_operand" "")
6281                            (match_operand:SI 4 "aarch64_imm2" "")]
6282          VFMLA16_LOW))]
6283   "TARGET_F16FML"
6284 {
6285     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6286     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6287
6288     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6289                                                             operands[1],
6290                                                             operands[2],
6291                                                             operands[3],
6292                                                             p1, lane));
6293     DONE;
6294 }
6295 )
6296
6297 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6298   [(set (match_operand:V2SF 0 "register_operand" "")
6299         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6300                            (match_operand:V4HF 2 "register_operand" "")
6301                            (match_operand:V4HF 3 "register_operand" "")
6302                            (match_operand:SI 4 "aarch64_imm2" "")]
6303          VFMLA16_HIGH))]
6304   "TARGET_F16FML"
6305 {
6306     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6307     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6308
6309     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6310                                                              operands[1],
6311                                                              operands[2],
6312                                                              operands[3],
6313                                                              p1, lane));
6314     DONE;
6315 })
6316
6317 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6318   [(set (match_operand:V2SF 0 "register_operand" "=w")
6319         (fma:V2SF
6320          (float_extend:V2SF
6321            (vec_select:V2HF
6322             (match_operand:V4HF 2 "register_operand" "w")
6323             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6324          (float_extend:V2SF
6325            (vec_duplicate:V2HF
6326             (vec_select:HF
6327              (match_operand:V4HF 3 "register_operand" "x")
6328              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6329          (match_operand:V2SF 1 "register_operand" "0")))]
6330   "TARGET_F16FML"
6331   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6332   [(set_attr "type" "neon_fp_mul_s")]
6333 )
6334
6335 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6336   [(set (match_operand:V2SF 0 "register_operand" "=w")
6337         (fma:V2SF
6338          (float_extend:V2SF
6339           (neg:V2HF
6340            (vec_select:V2HF
6341             (match_operand:V4HF 2 "register_operand" "w")
6342             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6343          (float_extend:V2SF
6344           (vec_duplicate:V2HF
6345            (vec_select:HF
6346             (match_operand:V4HF 3 "register_operand" "x")
6347             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6348          (match_operand:V2SF 1 "register_operand" "0")))]
6349   "TARGET_F16FML"
6350   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6351   [(set_attr "type" "neon_fp_mul_s")]
6352 )
6353
6354 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6355   [(set (match_operand:V2SF 0 "register_operand" "=w")
6356         (fma:V2SF
6357          (float_extend:V2SF
6358            (vec_select:V2HF
6359             (match_operand:V4HF 2 "register_operand" "w")
6360             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6361          (float_extend:V2SF
6362            (vec_duplicate:V2HF
6363             (vec_select:HF
6364              (match_operand:V4HF 3 "register_operand" "x")
6365              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6366          (match_operand:V2SF 1 "register_operand" "0")))]
6367   "TARGET_F16FML"
6368   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6369   [(set_attr "type" "neon_fp_mul_s")]
6370 )
6371
6372 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6373   [(set (match_operand:V2SF 0 "register_operand" "=w")
6374         (fma:V2SF
6375          (float_extend:V2SF
6376            (neg:V2HF
6377             (vec_select:V2HF
6378              (match_operand:V4HF 2 "register_operand" "w")
6379              (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6380          (float_extend:V2SF
6381            (vec_duplicate:V2HF
6382             (vec_select:HF
6383              (match_operand:V4HF 3 "register_operand" "x")
6384              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6385          (match_operand:V2SF 1 "register_operand" "0")))]
6386   "TARGET_F16FML"
6387   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6388   [(set_attr "type" "neon_fp_mul_s")]
6389 )
6390
6391 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6392   [(set (match_operand:V4SF 0 "register_operand" "")
6393         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6394                            (match_operand:V8HF 2 "register_operand" "")
6395                            (match_operand:V8HF 3 "register_operand" "")
6396                            (match_operand:SI 4 "aarch64_lane_imm3" "")]
6397          VFMLA16_LOW))]
6398   "TARGET_F16FML"
6399 {
6400     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6401     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6402
6403     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6404                                                               operands[1],
6405                                                               operands[2],
6406                                                               operands[3],
6407                                                               p1, lane));
6408     DONE;
6409 })
6410
6411 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6412   [(set (match_operand:V4SF 0 "register_operand" "")
6413         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6414                            (match_operand:V8HF 2 "register_operand" "")
6415                            (match_operand:V8HF 3 "register_operand" "")
6416                            (match_operand:SI 4 "aarch64_lane_imm3" "")]
6417          VFMLA16_HIGH))]
6418   "TARGET_F16FML"
6419 {
6420     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6421     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6422
6423     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6424                                                                operands[1],
6425                                                                operands[2],
6426                                                                operands[3],
6427                                                                p1, lane));
6428     DONE;
6429 })
6430
6431 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6432   [(set (match_operand:V4SF 0 "register_operand" "=w")
6433         (fma:V4SF
6434          (float_extend:V4SF
6435           (vec_select:V4HF
6436             (match_operand:V8HF 2 "register_operand" "w")
6437             (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6438          (float_extend:V4SF
6439           (vec_duplicate:V4HF
6440            (vec_select:HF
6441             (match_operand:V8HF 3 "register_operand" "x")
6442             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6443          (match_operand:V4SF 1 "register_operand" "0")))]
6444   "TARGET_F16FML"
6445   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6446   [(set_attr "type" "neon_fp_mul_s")]
6447 )
6448
6449 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6450   [(set (match_operand:V4SF 0 "register_operand" "=w")
6451         (fma:V4SF
6452           (float_extend:V4SF
6453            (neg:V4HF
6454             (vec_select:V4HF
6455              (match_operand:V8HF 2 "register_operand" "w")
6456              (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6457          (float_extend:V4SF
6458           (vec_duplicate:V4HF
6459            (vec_select:HF
6460             (match_operand:V8HF 3 "register_operand" "x")
6461             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6462          (match_operand:V4SF 1 "register_operand" "0")))]
6463   "TARGET_F16FML"
6464   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6465   [(set_attr "type" "neon_fp_mul_s")]
6466 )
6467
6468 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6469   [(set (match_operand:V4SF 0 "register_operand" "=w")
6470         (fma:V4SF
6471          (float_extend:V4SF
6472           (vec_select:V4HF
6473             (match_operand:V8HF 2 "register_operand" "w")
6474             (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6475          (float_extend:V4SF
6476           (vec_duplicate:V4HF
6477            (vec_select:HF
6478             (match_operand:V8HF 3 "register_operand" "x")
6479             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6480          (match_operand:V4SF 1 "register_operand" "0")))]
6481   "TARGET_F16FML"
6482   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6483   [(set_attr "type" "neon_fp_mul_s")]
6484 )
6485
6486 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6487   [(set (match_operand:V4SF 0 "register_operand" "=w")
6488         (fma:V4SF
6489          (float_extend:V4SF
6490           (neg:V4HF
6491            (vec_select:V4HF
6492             (match_operand:V8HF 2 "register_operand" "w")
6493             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6494          (float_extend:V4SF
6495           (vec_duplicate:V4HF
6496            (vec_select:HF
6497             (match_operand:V8HF 3 "register_operand" "x")
6498             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6499          (match_operand:V4SF 1 "register_operand" "0")))]
6500   "TARGET_F16FML"
6501   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6502   [(set_attr "type" "neon_fp_mul_s")]
6503 )
6504
6505 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6506   [(set (match_operand:V2SF 0 "register_operand" "")
6507         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6508                       (match_operand:V4HF 2 "register_operand" "")
6509                       (match_operand:V8HF 3 "register_operand" "")
6510                       (match_operand:SI 4 "aarch64_lane_imm3" "")]
6511          VFMLA16_LOW))]
6512   "TARGET_F16FML"
6513 {
6514     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6515     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6516
6517     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6518                                                              operands[1],
6519                                                              operands[2],
6520                                                              operands[3],
6521                                                              p1, lane));
6522     DONE;
6523
6524 })
6525
6526 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6527   [(set (match_operand:V2SF 0 "register_operand" "")
6528         (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6529                       (match_operand:V4HF 2 "register_operand" "")
6530                       (match_operand:V8HF 3 "register_operand" "")
6531                       (match_operand:SI 4 "aarch64_lane_imm3" "")]
6532          VFMLA16_HIGH))]
6533   "TARGET_F16FML"
6534 {
6535     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6536     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6537
6538     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6539                                                               operands[1],
6540                                                               operands[2],
6541                                                               operands[3],
6542                                                               p1, lane));
6543     DONE;
6544
6545 })
6546
6547 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6548   [(set (match_operand:V2SF 0 "register_operand" "=w")
6549         (fma:V2SF
6550          (float_extend:V2SF
6551            (vec_select:V2HF
6552             (match_operand:V4HF 2 "register_operand" "w")
6553             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6554          (float_extend:V2SF
6555           (vec_duplicate:V2HF
6556            (vec_select:HF
6557             (match_operand:V8HF 3 "register_operand" "x")
6558             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6559          (match_operand:V2SF 1 "register_operand" "0")))]
6560   "TARGET_F16FML"
6561   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6562   [(set_attr "type" "neon_fp_mul_s")]
6563 )
6564
6565 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6566   [(set (match_operand:V2SF 0 "register_operand" "=w")
6567         (fma:V2SF
6568          (float_extend:V2SF
6569           (neg:V2HF
6570            (vec_select:V2HF
6571             (match_operand:V4HF 2 "register_operand" "w")
6572             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6573          (float_extend:V2SF
6574           (vec_duplicate:V2HF
6575            (vec_select:HF
6576             (match_operand:V8HF 3 "register_operand" "x")
6577             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6578          (match_operand:V2SF 1 "register_operand" "0")))]
6579   "TARGET_F16FML"
6580   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6581   [(set_attr "type" "neon_fp_mul_s")]
6582 )
6583
6584 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6585   [(set (match_operand:V2SF 0 "register_operand" "=w")
6586         (fma:V2SF
6587          (float_extend:V2SF
6588            (vec_select:V2HF
6589             (match_operand:V4HF 2 "register_operand" "w")
6590             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6591          (float_extend:V2SF
6592           (vec_duplicate:V2HF
6593            (vec_select:HF
6594             (match_operand:V8HF 3 "register_operand" "x")
6595             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6596          (match_operand:V2SF 1 "register_operand" "0")))]
6597   "TARGET_F16FML"
6598   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6599   [(set_attr "type" "neon_fp_mul_s")]
6600 )
6601
6602 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6603   [(set (match_operand:V2SF 0 "register_operand" "=w")
6604         (fma:V2SF
6605          (float_extend:V2SF
6606           (neg:V2HF
6607            (vec_select:V2HF
6608             (match_operand:V4HF 2 "register_operand" "w")
6609             (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6610          (float_extend:V2SF
6611           (vec_duplicate:V2HF
6612            (vec_select:HF
6613             (match_operand:V8HF 3 "register_operand" "x")
6614             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6615          (match_operand:V2SF 1 "register_operand" "0")))]
6616   "TARGET_F16FML"
6617   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6618   [(set_attr "type" "neon_fp_mul_s")]
6619 )
6620
6621 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6622   [(set (match_operand:V4SF 0 "register_operand" "")
6623         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6624                       (match_operand:V8HF 2 "register_operand" "")
6625                       (match_operand:V4HF 3 "register_operand" "")
6626                       (match_operand:SI 4 "aarch64_imm2" "")]
6627          VFMLA16_LOW))]
6628   "TARGET_F16FML"
6629 {
6630     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6631     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6632
6633     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6634                                                              operands[1],
6635                                                              operands[2],
6636                                                              operands[3],
6637                                                              p1, lane));
6638     DONE;
6639 })
6640
6641 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6642   [(set (match_operand:V4SF 0 "register_operand" "")
6643         (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6644                       (match_operand:V8HF 2 "register_operand" "")
6645                       (match_operand:V4HF 3 "register_operand" "")
6646                       (match_operand:SI 4 "aarch64_imm2" "")]
6647          VFMLA16_HIGH))]
6648   "TARGET_F16FML"
6649 {
6650     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6651     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6652
6653     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6654                                                               operands[1],
6655                                                               operands[2],
6656                                                               operands[3],
6657                                                               p1, lane));
6658     DONE;
6659 })
6660
6661 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6662   [(set (match_operand:V4SF 0 "register_operand" "=w")
6663         (fma:V4SF
6664          (float_extend:V4SF
6665           (vec_select:V4HF
6666            (match_operand:V8HF 2 "register_operand" "w")
6667            (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6668          (float_extend:V4SF
6669           (vec_duplicate:V4HF
6670            (vec_select:HF
6671             (match_operand:V4HF 3 "register_operand" "x")
6672             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6673          (match_operand:V4SF 1 "register_operand" "0")))]
6674   "TARGET_F16FML"
6675   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6676   [(set_attr "type" "neon_fp_mul_s")]
6677 )
6678
6679 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6680   [(set (match_operand:V4SF 0 "register_operand" "=w")
6681         (fma:V4SF
6682          (float_extend:V4SF
6683           (neg:V4HF
6684            (vec_select:V4HF
6685             (match_operand:V8HF 2 "register_operand" "w")
6686             (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6687          (float_extend:V4SF
6688           (vec_duplicate:V4HF
6689            (vec_select:HF
6690             (match_operand:V4HF 3 "register_operand" "x")
6691             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6692          (match_operand:V4SF 1 "register_operand" "0")))]
6693   "TARGET_F16FML"
6694   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6695   [(set_attr "type" "neon_fp_mul_s")]
6696 )
6697
6698 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6699   [(set (match_operand:V4SF 0 "register_operand" "=w")
6700         (fma:V4SF
6701          (float_extend:V4SF
6702           (vec_select:V4HF
6703            (match_operand:V8HF 2 "register_operand" "w")
6704            (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6705          (float_extend:V4SF
6706           (vec_duplicate:V4HF
6707            (vec_select:HF
6708             (match_operand:V4HF 3 "register_operand" "x")
6709             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6710          (match_operand:V4SF 1 "register_operand" "0")))]
6711   "TARGET_F16FML"
6712   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6713   [(set_attr "type" "neon_fp_mul_s")]
6714 )
6715
6716 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6717   [(set (match_operand:V4SF 0 "register_operand" "=w")
6718         (fma:V4SF
6719          (float_extend:V4SF
6720           (neg:V4HF
6721            (vec_select:V4HF
6722             (match_operand:V8HF 2 "register_operand" "w")
6723             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6724          (float_extend:V4SF
6725           (vec_duplicate:V4HF
6726            (vec_select:HF
6727             (match_operand:V4HF 3 "register_operand" "x")
6728             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6729          (match_operand:V4SF 1 "register_operand" "0")))]
6730   "TARGET_F16FML"
6731   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6732   [(set_attr "type" "neon_fp_mul_s")]
6733 )
6734
6735 ;; pmull
6736
6737 (define_insn "aarch64_crypto_pmulldi"
6738   [(set (match_operand:TI 0 "register_operand" "=w")
6739         (unspec:TI  [(match_operand:DI 1 "register_operand" "w")
6740                      (match_operand:DI 2 "register_operand" "w")]
6741                     UNSPEC_PMULL))]
6742  "TARGET_SIMD && TARGET_AES"
6743  "pmull\\t%0.1q, %1.1d, %2.1d"
6744   [(set_attr "type" "crypto_pmull")]
6745 )
6746
6747 (define_insn "aarch64_crypto_pmullv2di"
6748  [(set (match_operand:TI 0 "register_operand" "=w")
6749        (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
6750                    (match_operand:V2DI 2 "register_operand" "w")]
6751                   UNSPEC_PMULL2))]
6752   "TARGET_SIMD && TARGET_AES"
6753   "pmull2\\t%0.1q, %1.2d, %2.2d"
6754   [(set_attr "type" "crypto_pmull")]
6755 )