gcc/config/aarch64/aarch64-simd.md

   1 ;; Machine description for AArch64 AdvSIMD architecture.
   2 ;; Copyright (C) 2011-2018 Free Software Foundation, Inc.
   3 ;; Contributed by ARM Ltd.
   4 ;;
   5 ;; This file is part of GCC.
   6 ;;
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published by
   9 ;; the Free Software Foundation; either version 3, or (at your option)
  10 ;; any later version.
  11 ;;
  12 ;; GCC is distributed in the hope that it will be useful, but
  13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 ;; General Public License for more details.
  16 ;;
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 (define_expand "mov<mode>"
  22   [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
  23         (match_operand:VALL_F16 1 "general_operand" ""))]
  24   "TARGET_SIMD"
  25   "
  26   /* Force the operand into a register if it is not an
  27      immediate whose use can be replaced with xzr.
  28      If the mode is 16 bytes wide, then we will be doing
  29      a stp in DI mode, so we check the validity of that.
  30      If the mode is 8 bytes wide, then we will do doing a
  31      normal str, so the check need not apply.  */
  32   if (GET_CODE (operands[0]) == MEM
  33       && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
  34            && ((GET_MODE_SIZE (<MODE>mode) == 16
  35                 && aarch64_mem_pair_operand (operands[0], DImode))
  36                || GET_MODE_SIZE (<MODE>mode) == 8)))
  37       operands[1] = force_reg (<MODE>mode, operands[1]);
  38   "
  39 )
  40
  41 (define_expand "movmisalign<mode>"
  42   [(set (match_operand:VALL 0 "nonimmediate_operand" "")
  43         (match_operand:VALL 1 "general_operand" ""))]
  44   "TARGET_SIMD"
  45 {
  46   /* This pattern is not permitted to fail during expansion: if both arguments
  47      are non-registers (e.g. memory := constant, which can be created by the
  48      auto-vectorizer), force operand 1 into a register.  */
  49   if (!register_operand (operands[0], <MODE>mode)
  50       && !register_operand (operands[1], <MODE>mode))
  51     operands[1] = force_reg (<MODE>mode, operands[1]);
  52 })
  53
  54 (define_insn "aarch64_simd_dup<mode>"
  55   [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
  56         (vec_duplicate:VDQ_I
  57           (match_operand:<VEL> 1 "register_operand" "w,?r")))]
  58   "TARGET_SIMD"
  59   "@
  60    dup\\t%0.<Vtype>, %1.<Vetype>[0]
  61    dup\\t%0.<Vtype>, %<vw>1"
  62   [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
  63 )
  64
  65 (define_insn "aarch64_simd_dup<mode>"
  66   [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
  67         (vec_duplicate:VDQF_F16
  68           (match_operand:<VEL> 1 "register_operand" "w")))]
  69   "TARGET_SIMD"
  70   "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
  71   [(set_attr "type" "neon_dup<q>")]
  72 )
  73
  74 (define_insn "aarch64_dup_lane<mode>"
  75   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
  76         (vec_duplicate:VALL_F16
  77           (vec_select:<VEL>
  78             (match_operand:VALL_F16 1 "register_operand" "w")
  79             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  80           )))]
  81   "TARGET_SIMD"
  82   {
  83     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
  84     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
  85   }
  86   [(set_attr "type" "neon_dup<q>")]
  87 )
  88
  89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
  90   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
  91         (vec_duplicate:VALL_F16_NO_V2Q
  92           (vec_select:<VEL>
  93             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
  94             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  95           )))]
  96   "TARGET_SIMD"
  97   {
  98     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
  99     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
 100   }
 101   [(set_attr "type" "neon_dup<q>")]
 102 )
 103
 104 (define_insn "*aarch64_simd_mov<VD:mode>"
 105   [(set (match_operand:VD 0 "nonimmediate_operand"
 106                 "=w, m,  m,  w, ?r, ?w, ?r, w")
 107         (match_operand:VD 1 "general_operand"
 108                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 109   "TARGET_SIMD
 110    && (register_operand (operands[0], <MODE>mode)
 111        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 112 {
 113    switch (which_alternative)
 114      {
 115      case 0: return "ldr\t%d0, %1";
 116      case 1: return "str\txzr, %0";
 117      case 2: return "str\t%d1, %0";
 118      case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 119      case 4: return "umov\t%0, %1.d[0]";
 120      case 5: return "fmov\t%d0, %1";
 121      case 6: return "mov\t%0, %1";
 122      case 7:
 123         return aarch64_output_simd_mov_immediate (operands[1],
 124                                                   <MODE>mode, 64);
 125      default: gcc_unreachable ();
 126      }
 127 }
 128   [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
 129                      neon_logic<q>, neon_to_gp<q>, f_mcr,\
 130                      mov_reg, neon_move<q>")]
 131 )
 132
 133 (define_insn "*aarch64_simd_mov<VQ:mode>"
 134   [(set (match_operand:VQ 0 "nonimmediate_operand"
 135                 "=w, Umq,  m,  w, ?r, ?w, ?r, w")
 136         (match_operand:VQ 1 "general_operand"
 137                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 138   "TARGET_SIMD
 139    && (register_operand (operands[0], <MODE>mode)
 140        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 141 {
 142   switch (which_alternative)
 143     {
 144     case 0:
 145         return "ldr\t%q0, %1";
 146     case 1:
 147         return "stp\txzr, xzr, %0";
 148     case 2:
 149         return "str\t%q1, %0";
 150     case 3:
 151         return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 152     case 4:
 153     case 5:
 154     case 6:
 155         return "#";
 156     case 7:
 157         return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
 158     default:
 159         gcc_unreachable ();
 160     }
 161 }
 162   [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
 163                      neon_logic<q>, multiple, multiple,\
 164                      multiple, neon_move<q>")
 165    (set_attr "length" "4,4,4,4,8,8,8,4")]
 166 )
 167
 168 ;; When storing lane zero we can use the normal STR and its more permissive
 169 ;; addressing modes.
 170
 171 (define_insn "aarch64_store_lane0<mode>"
 172   [(set (match_operand:<VEL> 0 "memory_operand" "=m")
 173         (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
 174                         (parallel [(match_operand 2 "const_int_operand" "n")])))]
 175   "TARGET_SIMD
 176    && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
 177   "str\\t%<Vetype>1, %0"
 178   [(set_attr "type" "neon_store1_1reg<q>")]
 179 )
 180
 181 (define_insn "load_pair<mode>"
 182   [(set (match_operand:VD 0 "register_operand" "=w")
 183         (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
 184    (set (match_operand:VD 2 "register_operand" "=w")
 185         (match_operand:VD 3 "memory_operand" "m"))]
 186   "TARGET_SIMD
 187    && rtx_equal_p (XEXP (operands[3], 0),
 188                    plus_constant (Pmode,
 189                                   XEXP (operands[1], 0),
 190                                   GET_MODE_SIZE (<MODE>mode)))"
 191   "ldp\\t%d0, %d2, %1"
 192   [(set_attr "type" "neon_ldp")]
 193 )
 194
 195 (define_insn "store_pair<mode>"
 196   [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
 197         (match_operand:VD 1 "register_operand" "w"))
 198    (set (match_operand:VD 2 "memory_operand" "=m")
 199         (match_operand:VD 3 "register_operand" "w"))]
 200   "TARGET_SIMD
 201    && rtx_equal_p (XEXP (operands[2], 0),
 202                    plus_constant (Pmode,
 203                                   XEXP (operands[0], 0),
 204                                   GET_MODE_SIZE (<MODE>mode)))"
 205   "stp\\t%d1, %d3, %0"
 206   [(set_attr "type" "neon_stp")]
 207 )
 208
 209 (define_split
 210   [(set (match_operand:VQ 0 "register_operand" "")
 211       (match_operand:VQ 1 "register_operand" ""))]
 212   "TARGET_SIMD && reload_completed
 213    && GP_REGNUM_P (REGNO (operands[0]))
 214    && GP_REGNUM_P (REGNO (operands[1]))"
 215   [(const_int 0)]
 216 {
 217   aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
 218   DONE;
 219 })
 220
 221 (define_split
 222   [(set (match_operand:VQ 0 "register_operand" "")
 223         (match_operand:VQ 1 "register_operand" ""))]
 224   "TARGET_SIMD && reload_completed
 225    && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
 226        || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
 227   [(const_int 0)]
 228 {
 229   aarch64_split_simd_move (operands[0], operands[1]);
 230   DONE;
 231 })
 232
 233 (define_expand "aarch64_split_simd_mov<mode>"
 234   [(set (match_operand:VQ 0)
 235         (match_operand:VQ 1))]
 236   "TARGET_SIMD"
 237   {
 238     rtx dst = operands[0];
 239     rtx src = operands[1];
 240
 241     if (GP_REGNUM_P (REGNO (src)))
 242       {
 243         rtx src_low_part = gen_lowpart (<VHALF>mode, src);
 244         rtx src_high_part = gen_highpart (<VHALF>mode, src);
 245
 246         emit_insn
 247           (gen_move_lo_quad_<mode> (dst, src_low_part));
 248         emit_insn
 249           (gen_move_hi_quad_<mode> (dst, src_high_part));
 250       }
 251
 252     else
 253       {
 254         rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
 255         rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
 256         rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
 257         rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
 258
 259         emit_insn
 260           (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
 261         emit_insn
 262           (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
 263       }
 264     DONE;
 265   }
 266 )
 267
 268 (define_insn "aarch64_simd_mov_from_<mode>low"
 269   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
 270         (vec_select:<VHALF>
 271           (match_operand:VQ 1 "register_operand" "w")
 272           (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
 273   "TARGET_SIMD && reload_completed"
 274   "umov\t%0, %1.d[0]"
 275   [(set_attr "type" "neon_to_gp<q>")
 276    (set_attr "length" "4")
 277   ])
 278
 279 (define_insn "aarch64_simd_mov_from_<mode>high"
 280   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
 281         (vec_select:<VHALF>
 282           (match_operand:VQ 1 "register_operand" "w")
 283           (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
 284   "TARGET_SIMD && reload_completed"
 285   "umov\t%0, %1.d[1]"
 286   [(set_attr "type" "neon_to_gp<q>")
 287    (set_attr "length" "4")
 288   ])
 289
 290 (define_insn "orn<mode>3"
 291  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 292        (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 293                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 294  "TARGET_SIMD"
 295  "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 296   [(set_attr "type" "neon_logic<q>")]
 297 )
 298
 299 (define_insn "bic<mode>3"
 300  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 301        (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 302                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 303  "TARGET_SIMD"
 304  "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 305   [(set_attr "type" "neon_logic<q>")]
 306 )
 307
 308 (define_insn "add<mode>3"
 309   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 310         (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 311                   (match_operand:VDQ_I 2 "register_operand" "w")))]
 312   "TARGET_SIMD"
 313   "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 314   [(set_attr "type" "neon_add<q>")]
 315 )
 316
 317 (define_insn "sub<mode>3"
 318   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 319         (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 320                    (match_operand:VDQ_I 2 "register_operand" "w")))]
 321   "TARGET_SIMD"
 322   "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 323   [(set_attr "type" "neon_sub<q>")]
 324 )
 325
 326 (define_insn "mul<mode>3"
 327   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 328         (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
 329                    (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
 330   "TARGET_SIMD"
 331   "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 332   [(set_attr "type" "neon_mul_<Vetype><q>")]
 333 )
 334
 335 (define_insn "bswap<mode>2"
 336   [(set (match_operand:VDQHSD 0 "register_operand" "=w")
 337         (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
 338   "TARGET_SIMD"
 339   "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
 340   [(set_attr "type" "neon_rev<q>")]
 341 )
 342
 343 (define_insn "aarch64_rbit<mode>"
 344   [(set (match_operand:VB 0 "register_operand" "=w")
 345         (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
 346                    UNSPEC_RBIT))]
 347   "TARGET_SIMD"
 348   "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
 349   [(set_attr "type" "neon_rbit")]
 350 )
 351
 352 (define_expand "ctz<mode>2"
 353   [(set (match_operand:VS 0 "register_operand")
 354         (ctz:VS (match_operand:VS 1 "register_operand")))]
 355   "TARGET_SIMD"
 356   {
 357      emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
 358      rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
 359                                              <MODE>mode, 0);
 360      emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
 361      emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
 362      DONE;
 363   }
 364 )
 365
 366 (define_expand "xorsign<mode>3"
 367   [(match_operand:VHSDF 0 "register_operand")
 368    (match_operand:VHSDF 1 "register_operand")
 369    (match_operand:VHSDF 2 "register_operand")]
 370   "TARGET_SIMD"
 371 {
 372
 373   machine_mode imode = <V_INT_EQUIV>mode;
 374   rtx v_bitmask = gen_reg_rtx (imode);
 375   rtx op1x = gen_reg_rtx (imode);
 376   rtx op2x = gen_reg_rtx (imode);
 377
 378   rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
 379   rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
 380
 381   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 382
 383   emit_move_insn (v_bitmask,
 384                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 385                                                      HOST_WIDE_INT_M1U << bits));
 386
 387   emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
 388   emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
 389   emit_move_insn (operands[0],
 390                   lowpart_subreg (<MODE>mode, op1x, imode));
 391   DONE;
 392 }
 393 )
 394
 395 ;; These instructions map to the __builtins for the Dot Product operations.
 396 (define_insn "aarch64_<sur>dot<vsi2qi>"
 397   [(set (match_operand:VS 0 "register_operand" "=w")
 398         (plus:VS (match_operand:VS 1 "register_operand" "0")
 399                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 400                             (match_operand:<VSI2QI> 3 "register_operand" "w")]
 401                 DOTPROD)))]
 402   "TARGET_DOTPROD"
 403   "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
 404   [(set_attr "type" "neon_dot")]
 405 )
 406
 407 ;; These expands map to the Dot Product optab the vectorizer checks for.
 408 ;; The auto-vectorizer expects a dot product builtin that also does an
 409 ;; accumulation into the provided register.
 410 ;; Given the following pattern
 411 ;;
 412 ;; for (i=0; i<len; i++) {
 413 ;;     c = a[i] * b[i];
 414 ;;     r += c;
 415 ;; }
 416 ;; return result;
 417 ;;
 418 ;; This can be auto-vectorized to
 419 ;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
 420 ;;
 421 ;; given enough iterations.  However the vectorizer can keep unrolling the loop
 422 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
 423 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
 424 ;; ...
 425 ;;
 426 ;; and so the vectorizer provides r, in which the result has to be accumulated.
 427 (define_expand "<sur>dot_prod<vsi2qi>"
 428   [(set (match_operand:VS 0 "register_operand")
 429         (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
 430                             (match_operand:<VSI2QI> 2 "register_operand")]
 431                  DOTPROD)
 432                 (match_operand:VS 3 "register_operand")))]
 433   "TARGET_DOTPROD"
 434 {
 435   emit_insn (
 436     gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
 437                                     operands[2]));
 438   emit_insn (gen_rtx_SET (operands[0], operands[3]));
 439   DONE;
 440 })
 441
 442 ;; These instructions map to the __builtins for the Dot Product
 443 ;; indexed operations.
 444 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
 445   [(set (match_operand:VS 0 "register_operand" "=w")
 446         (plus:VS (match_operand:VS 1 "register_operand" "0")
 447                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 448                             (match_operand:V8QI 3 "register_operand" "<h_con>")
 449                             (match_operand:SI 4 "immediate_operand" "i")]
 450                 DOTPROD)))]
 451   "TARGET_DOTPROD"
 452   {
 453     operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
 454     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 455   }
 456   [(set_attr "type" "neon_dot")]
 457 )
 458
 459 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
 460   [(set (match_operand:VS 0 "register_operand" "=w")
 461         (plus:VS (match_operand:VS 1 "register_operand" "0")
 462                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 463                             (match_operand:V16QI 3 "register_operand" "<h_con>")
 464                             (match_operand:SI 4 "immediate_operand" "i")]
 465                 DOTPROD)))]
 466   "TARGET_DOTPROD"
 467   {
 468     operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
 469     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 470   }
 471   [(set_attr "type" "neon_dot")]
 472 )
 473
 474 (define_expand "copysign<mode>3"
 475   [(match_operand:VHSDF 0 "register_operand")
 476    (match_operand:VHSDF 1 "register_operand")
 477    (match_operand:VHSDF 2 "register_operand")]
 478   "TARGET_FLOAT && TARGET_SIMD"
 479 {
 480   rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
 481   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 482
 483   emit_move_insn (v_bitmask,
 484                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 485                                                      HOST_WIDE_INT_M1U << bits));
 486   emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
 487                                          operands[2], operands[1]));
 488   DONE;
 489 }
 490 )
 491
 492 (define_insn "*aarch64_mul3_elt<mode>"
 493  [(set (match_operand:VMUL 0 "register_operand" "=w")
 494     (mult:VMUL
 495       (vec_duplicate:VMUL
 496           (vec_select:<VEL>
 497             (match_operand:VMUL 1 "register_operand" "<h_con>")
 498             (parallel [(match_operand:SI 2 "immediate_operand")])))
 499       (match_operand:VMUL 3 "register_operand" "w")))]
 500   "TARGET_SIMD"
 501   {
 502     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
 503     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 504   }
 505   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 506 )
 507
 508 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
 509   [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
 510      (mult:VMUL_CHANGE_NLANES
 511        (vec_duplicate:VMUL_CHANGE_NLANES
 512           (vec_select:<VEL>
 513             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
 514             (parallel [(match_operand:SI 2 "immediate_operand")])))
 515       (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
 516   "TARGET_SIMD"
 517   {
 518     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
 519     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 520   }
 521   [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
 522 )
 523
 524 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
 525  [(set (match_operand:VMUL 0 "register_operand" "=w")
 526     (mult:VMUL
 527       (vec_duplicate:VMUL
 528             (match_operand:<VEL> 1 "register_operand" "<h_con>"))
 529       (match_operand:VMUL 2 "register_operand" "w")))]
 530   "TARGET_SIMD"
 531   "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
 532   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 533 )
 534
 535 (define_insn "aarch64_rsqrte<mode>"
 536   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 537         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
 538                      UNSPEC_RSQRTE))]
 539   "TARGET_SIMD"
 540   "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 541   [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
 542
 543 (define_insn "aarch64_rsqrts<mode>"
 544   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 545         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
 546                             (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
 547          UNSPEC_RSQRTS))]
 548   "TARGET_SIMD"
 549   "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 550   [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
 551
 552 (define_expand "rsqrt<mode>2"
 553   [(set (match_operand:VALLF 0 "register_operand" "=w")
 554         (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
 555                      UNSPEC_RSQRT))]
 556   "TARGET_SIMD"
 557 {
 558   aarch64_emit_approx_sqrt (operands[0], operands[1], true);
 559   DONE;
 560 })
 561
 562 (define_insn "*aarch64_mul3_elt_to_64v2df"
 563   [(set (match_operand:DF 0 "register_operand" "=w")
 564      (mult:DF
 565        (vec_select:DF
 566          (match_operand:V2DF 1 "register_operand" "w")
 567          (parallel [(match_operand:SI 2 "immediate_operand")]))
 568        (match_operand:DF 3 "register_operand" "w")))]
 569   "TARGET_SIMD"
 570   {
 571     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
 572     return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
 573   }
 574   [(set_attr "type" "neon_fp_mul_d_scalar_q")]
 575 )
 576
 577 (define_insn "neg<mode>2"
 578   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 579         (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 580   "TARGET_SIMD"
 581   "neg\t%0.<Vtype>, %1.<Vtype>"
 582   [(set_attr "type" "neon_neg<q>")]
 583 )
 584
 585 (define_insn "abs<mode>2"
 586   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 587         (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 588   "TARGET_SIMD"
 589   "abs\t%0.<Vtype>, %1.<Vtype>"
 590   [(set_attr "type" "neon_abs<q>")]
 591 )
 592
 593 ;; The intrinsic version of integer ABS must not be allowed to
 594 ;; combine with any operation with an integerated ABS step, such
 595 ;; as SABD.
 596 (define_insn "aarch64_abs<mode>"
 597   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
 598           (unspec:VSDQ_I_DI
 599             [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
 600            UNSPEC_ABS))]
 601   "TARGET_SIMD"
 602   "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 603   [(set_attr "type" "neon_abs<q>")]
 604 )
 605
 606 (define_insn "abd<mode>_3"
 607   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 608         (abs:VDQ_BHSI (minus:VDQ_BHSI
 609                        (match_operand:VDQ_BHSI 1 "register_operand" "w")
 610                        (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
 611   "TARGET_SIMD"
 612   "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 613   [(set_attr "type" "neon_abd<q>")]
 614 )
 615
 616 (define_insn "aba<mode>_3"
 617   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 618         (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
 619                          (match_operand:VDQ_BHSI 1 "register_operand" "w")
 620                          (match_operand:VDQ_BHSI 2 "register_operand" "w")))
 621                        (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
 622   "TARGET_SIMD"
 623   "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 624   [(set_attr "type" "neon_arith_acc<q>")]
 625 )
 626
 627 (define_insn "fabd<mode>3"
 628   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 629         (abs:VHSDF_HSDF
 630           (minus:VHSDF_HSDF
 631             (match_operand:VHSDF_HSDF 1 "register_operand" "w")
 632             (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
 633   "TARGET_SIMD"
 634   "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 635   [(set_attr "type" "neon_fp_abd_<stype><q>")]
 636 )
 637
 638 ;; For AND (vector, register) and BIC (vector, immediate)
 639 (define_insn "and<mode>3"
 640   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
 641         (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
 642                    (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
 643   "TARGET_SIMD"
 644   {
 645     switch (which_alternative)
 646       {
 647       case 0:
 648         return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
 649       case 1:
 650         return aarch64_output_simd_mov_immediate (operands[2],
 651            <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), AARCH64_CHECK_BIC);
 652       default:
 653         gcc_unreachable ();
 654       }
 655   }
 656   [(set_attr "type" "neon_logic<q>")]
 657 )
 658
 659 ;; For ORR (vector, register) and ORR (vector, immediate)
 660 (define_insn "ior<mode>3"
 661   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
 662         (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
 663                    (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
 664   "TARGET_SIMD"
 665   {
 666     switch (which_alternative)
 667       {
 668       case 0:
 669         return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
 670       case 1:
 671         return aarch64_output_simd_mov_immediate (operands[2],
 672                 <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), AARCH64_CHECK_ORR);
 673       default:
 674         gcc_unreachable ();
 675       }
 676   }
 677   [(set_attr "type" "neon_logic<q>")]
 678 )
 679
 680 (define_insn "xor<mode>3"
 681   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 682         (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 683                  (match_operand:VDQ_I 2 "register_operand" "w")))]
 684   "TARGET_SIMD"
 685   "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
 686   [(set_attr "type" "neon_logic<q>")]
 687 )
 688
 689 (define_insn "one_cmpl<mode>2"
 690   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 691         (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 692   "TARGET_SIMD"
 693   "not\t%0.<Vbtype>, %1.<Vbtype>"
 694   [(set_attr "type" "neon_logic<q>")]
 695 )
 696
 697 (define_insn "aarch64_simd_vec_set<mode>"
 698   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
 699         (vec_merge:VDQ_BHSI
 700             (vec_duplicate:VDQ_BHSI
 701                 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
 702             (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
 703             (match_operand:SI 2 "immediate_operand" "i,i,i")))]
 704   "TARGET_SIMD"
 705   {
 706    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 707    operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
 708    switch (which_alternative)
 709      {
 710      case 0:
 711         return "ins\\t%0.<Vetype>[%p2], %w1";
 712      case 1:
 713         return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
 714      case 2:
 715         return "ld1\\t{%0.<Vetype>}[%p2], %1";
 716      default:
 717         gcc_unreachable ();
 718      }
 719   }
 720   [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_one_lane<q>")]
 721 )
 722
 723 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
 724   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
 725         (vec_merge:VALL_F16
 726             (vec_duplicate:VALL_F16
 727               (vec_select:<VEL>
 728                 (match_operand:VALL_F16 3 "register_operand" "w")
 729                 (parallel
 730                   [(match_operand:SI 4 "immediate_operand" "i")])))
 731             (match_operand:VALL_F16 1 "register_operand" "0")
 732             (match_operand:SI 2 "immediate_operand" "i")))]
 733   "TARGET_SIMD"
 734   {
 735     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 736     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
 737     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
 738
 739     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
 740   }
 741   [(set_attr "type" "neon_ins<q>")]
 742 )
 743
 744 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
 745   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
 746         (vec_merge:VALL_F16_NO_V2Q
 747             (vec_duplicate:VALL_F16_NO_V2Q
 748               (vec_select:<VEL>
 749                 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
 750                 (parallel
 751                   [(match_operand:SI 4 "immediate_operand" "i")])))
 752             (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
 753             (match_operand:SI 2 "immediate_operand" "i")))]
 754   "TARGET_SIMD"
 755   {
 756     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 757     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
 758     operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
 759                                            INTVAL (operands[4]));
 760
 761     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
 762   }
 763   [(set_attr "type" "neon_ins<q>")]
 764 )
 765
 766 (define_insn "aarch64_simd_lshr<mode>"
 767  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 768        (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 769                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
 770  "TARGET_SIMD"
 771  "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
 772   [(set_attr "type" "neon_shift_imm<q>")]
 773 )
 774
 775 (define_insn "aarch64_simd_ashr<mode>"
 776  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 777        (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 778                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
 779  "TARGET_SIMD"
 780  "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
 781   [(set_attr "type" "neon_shift_imm<q>")]
 782 )
 783
 784 (define_insn "aarch64_simd_imm_shl<mode>"
 785  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 786        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 787                    (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm" "Dl")))]
 788  "TARGET_SIMD"
 789   "shl\t%0.<Vtype>, %1.<Vtype>, %2"
 790   [(set_attr "type" "neon_shift_imm<q>")]
 791 )
 792
 793 (define_insn "aarch64_simd_reg_sshl<mode>"
 794  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 795        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 796                    (match_operand:VDQ_I 2 "register_operand" "w")))]
 797  "TARGET_SIMD"
 798  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 799   [(set_attr "type" "neon_shift_reg<q>")]
 800 )
 801
 802 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
 803  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 804        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
 805                     (match_operand:VDQ_I 2 "register_operand" "w")]
 806                    UNSPEC_ASHIFT_UNSIGNED))]
 807  "TARGET_SIMD"
 808  "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 809   [(set_attr "type" "neon_shift_reg<q>")]
 810 )
 811
 812 (define_insn "aarch64_simd_reg_shl<mode>_signed"
 813  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 814        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
 815                     (match_operand:VDQ_I 2 "register_operand" "w")]
 816                    UNSPEC_ASHIFT_SIGNED))]
 817  "TARGET_SIMD"
 818  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 819   [(set_attr "type" "neon_shift_reg<q>")]
 820 )
 821
 822 (define_expand "ashl<mode>3"
 823   [(match_operand:VDQ_I 0 "register_operand" "")
 824    (match_operand:VDQ_I 1 "register_operand" "")
 825    (match_operand:SI  2 "general_operand" "")]
 826  "TARGET_SIMD"
 827 {
 828   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
 829   int shift_amount;
 830
 831   if (CONST_INT_P (operands[2]))
 832     {
 833       shift_amount = INTVAL (operands[2]);
 834       if (shift_amount >= 0 && shift_amount < bit_width)
 835         {
 836           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 837                                                        shift_amount);
 838           emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
 839                                                      operands[1],
 840                                                      tmp));
 841           DONE;
 842         }
 843       else
 844         {
 845           operands[2] = force_reg (SImode, operands[2]);
 846         }
 847     }
 848   else if (MEM_P (operands[2]))
 849     {
 850       operands[2] = force_reg (SImode, operands[2]);
 851     }
 852
 853   if (REG_P (operands[2]))
 854     {
 855       rtx tmp = gen_reg_rtx (<MODE>mode);
 856       emit_insn (gen_aarch64_simd_dup<mode> (tmp,
 857                                              convert_to_mode (<VEL>mode,
 858                                                               operands[2],
 859                                                               0)));
 860       emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
 861                                                   tmp));
 862       DONE;
 863     }
 864   else
 865     FAIL;
 866 }
 867 )
 868
 869 (define_expand "lshr<mode>3"
 870   [(match_operand:VDQ_I 0 "register_operand" "")
 871    (match_operand:VDQ_I 1 "register_operand" "")
 872    (match_operand:SI  2 "general_operand" "")]
 873  "TARGET_SIMD"
 874 {
 875   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
 876   int shift_amount;
 877
 878   if (CONST_INT_P (operands[2]))
 879     {
 880       shift_amount = INTVAL (operands[2]);
 881       if (shift_amount > 0 && shift_amount <= bit_width)
 882         {
 883           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 884                                                        shift_amount);
 885           emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
 886                                                   operands[1],
 887                                                   tmp));
 888           DONE;
 889         }
 890       else
 891         operands[2] = force_reg (SImode, operands[2]);
 892     }
 893   else if (MEM_P (operands[2]))
 894     {
 895       operands[2] = force_reg (SImode, operands[2]);
 896     }
 897
 898   if (REG_P (operands[2]))
 899     {
 900       rtx tmp = gen_reg_rtx (SImode);
 901       rtx tmp1 = gen_reg_rtx (<MODE>mode);
 902       emit_insn (gen_negsi2 (tmp, operands[2]));
 903       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
 904                                              convert_to_mode (<VEL>mode,
 905                                                               tmp, 0)));
 906       emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
 907                                                           operands[1],
 908                                                           tmp1));
 909       DONE;
 910     }
 911   else
 912     FAIL;
 913 }
 914 )
 915
 916 (define_expand "ashr<mode>3"
 917   [(match_operand:VDQ_I 0 "register_operand" "")
 918    (match_operand:VDQ_I 1 "register_operand" "")
 919    (match_operand:SI  2 "general_operand" "")]
 920  "TARGET_SIMD"
 921 {
 922   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
 923   int shift_amount;
 924
 925   if (CONST_INT_P (operands[2]))
 926     {
 927       shift_amount = INTVAL (operands[2]);
 928       if (shift_amount > 0 && shift_amount <= bit_width)
 929         {
 930           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 931                                                        shift_amount);
 932           emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
 933                                                   operands[1],
 934                                                   tmp));
 935           DONE;
 936         }
 937       else
 938         operands[2] = force_reg (SImode, operands[2]);
 939     }
 940   else if (MEM_P (operands[2]))
 941     {
 942       operands[2] = force_reg (SImode, operands[2]);
 943     }
 944
 945   if (REG_P (operands[2]))
 946     {
 947       rtx tmp = gen_reg_rtx (SImode);
 948       rtx tmp1 = gen_reg_rtx (<MODE>mode);
 949       emit_insn (gen_negsi2 (tmp, operands[2]));
 950       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
 951                                              convert_to_mode (<VEL>mode,
 952                                                               tmp, 0)));
 953       emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
 954                                                         operands[1],
 955                                                         tmp1));
 956       DONE;
 957     }
 958   else
 959     FAIL;
 960 }
 961 )
 962
 963 (define_expand "vashl<mode>3"
 964  [(match_operand:VDQ_I 0 "register_operand" "")
 965   (match_operand:VDQ_I 1 "register_operand" "")
 966   (match_operand:VDQ_I 2 "register_operand" "")]
 967  "TARGET_SIMD"
 968 {
 969   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
 970                                               operands[2]));
 971   DONE;
 972 })
 973
 974 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
 975 ;; Negating individual lanes most certainly offsets the
 976 ;; gain from vectorization.
 977 (define_expand "vashr<mode>3"
 978  [(match_operand:VDQ_BHSI 0 "register_operand" "")
 979   (match_operand:VDQ_BHSI 1 "register_operand" "")
 980   (match_operand:VDQ_BHSI 2 "register_operand" "")]
 981  "TARGET_SIMD"
 982 {
 983   rtx neg = gen_reg_rtx (<MODE>mode);
 984   emit (gen_neg<mode>2 (neg, operands[2]));
 985   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
 986                                                     neg));
 987   DONE;
 988 })
 989
 990 ;; DI vector shift
 991 (define_expand "aarch64_ashr_simddi"
 992   [(match_operand:DI 0 "register_operand" "=w")
 993    (match_operand:DI 1 "register_operand" "w")
 994    (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
 995   "TARGET_SIMD"
 996   {
 997     /* An arithmetic shift right by 64 fills the result with copies of the sign
 998        bit, just like asr by 63 - however the standard pattern does not handle
 999        a shift by 64.  */
1000     if (INTVAL (operands[2]) == 64)
1001       operands[2] = GEN_INT (63);
1002     emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1003     DONE;
1004   }
1005 )
1006
1007 (define_expand "vlshr<mode>3"
1008  [(match_operand:VDQ_BHSI 0 "register_operand" "")
1009   (match_operand:VDQ_BHSI 1 "register_operand" "")
1010   (match_operand:VDQ_BHSI 2 "register_operand" "")]
1011  "TARGET_SIMD"
1012 {
1013   rtx neg = gen_reg_rtx (<MODE>mode);
1014   emit (gen_neg<mode>2 (neg, operands[2]));
1015   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1016                                                       neg));
1017   DONE;
1018 })
1019
1020 (define_expand "aarch64_lshr_simddi"
1021   [(match_operand:DI 0 "register_operand" "=w")
1022    (match_operand:DI 1 "register_operand" "w")
1023    (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1024   "TARGET_SIMD"
1025   {
1026     if (INTVAL (operands[2]) == 64)
1027       emit_move_insn (operands[0], const0_rtx);
1028     else
1029       emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1030     DONE;
1031   }
1032 )
1033
1034 (define_expand "vec_set<mode>"
1035   [(match_operand:VDQ_BHSI 0 "register_operand")
1036    (match_operand:<VEL> 1 "register_operand")
1037    (match_operand:SI 2 "immediate_operand")]
1038   "TARGET_SIMD"
1039   {
1040     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1041     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1042                                             GEN_INT (elem), operands[0]));
1043     DONE;
1044   }
1045 )
1046
1047 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1048 (define_insn "vec_shr_<mode>"
1049   [(set (match_operand:VD 0 "register_operand" "=w")
1050         (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1051                     (match_operand:SI 2 "immediate_operand" "i")]
1052                    UNSPEC_VEC_SHR))]
1053   "TARGET_SIMD"
1054   {
1055     if (BYTES_BIG_ENDIAN)
1056       return "shl %d0, %d1, %2";
1057     else
1058       return "ushr %d0, %d1, %2";
1059   }
1060   [(set_attr "type" "neon_shift_imm")]
1061 )
1062
1063 (define_insn "aarch64_simd_vec_setv2di"
1064   [(set (match_operand:V2DI 0 "register_operand" "=w,w")
1065         (vec_merge:V2DI
1066             (vec_duplicate:V2DI
1067                 (match_operand:DI 1 "register_operand" "r,w"))
1068             (match_operand:V2DI 3 "register_operand" "0,0")
1069             (match_operand:SI 2 "immediate_operand" "i,i")))]
1070   "TARGET_SIMD"
1071   {
1072     int elt = ENDIAN_LANE_N (2, exact_log2 (INTVAL (operands[2])));
1073     operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1074     switch (which_alternative)
1075       {
1076       case 0:
1077         return "ins\\t%0.d[%p2], %1";
1078       case 1:
1079         return "ins\\t%0.d[%p2], %1.d[0]";
1080       default:
1081         gcc_unreachable ();
1082       }
1083   }
1084   [(set_attr "type" "neon_from_gp, neon_ins_q")]
1085 )
1086
1087 (define_expand "vec_setv2di"
1088   [(match_operand:V2DI 0 "register_operand")
1089    (match_operand:DI 1 "register_operand")
1090    (match_operand:SI 2 "immediate_operand")]
1091   "TARGET_SIMD"
1092   {
1093     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1094     emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
1095                                           GEN_INT (elem), operands[0]));
1096     DONE;
1097   }
1098 )
1099
1100 (define_insn "aarch64_simd_vec_set<mode>"
1101   [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
1102         (vec_merge:VDQF_F16
1103             (vec_duplicate:VDQF_F16
1104                 (match_operand:<VEL> 1 "register_operand" "w"))
1105             (match_operand:VDQF_F16 3 "register_operand" "0")
1106             (match_operand:SI 2 "immediate_operand" "i")))]
1107   "TARGET_SIMD"
1108   {
1109     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1110
1111     operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
1112     return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
1113   }
1114   [(set_attr "type" "neon_ins<q>")]
1115 )
1116
1117 (define_expand "vec_set<mode>"
1118   [(match_operand:VDQF_F16 0 "register_operand" "+w")
1119    (match_operand:<VEL> 1 "register_operand" "w")
1120    (match_operand:SI 2 "immediate_operand" "")]
1121   "TARGET_SIMD"
1122   {
1123     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1124     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1125                                           GEN_INT (elem), operands[0]));
1126     DONE;
1127   }
1128 )
1129
1130
1131 (define_insn "aarch64_mla<mode>"
1132  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1133        (plus:VDQ_BHSI (mult:VDQ_BHSI
1134                         (match_operand:VDQ_BHSI 2 "register_operand" "w")
1135                         (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1136                       (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1137  "TARGET_SIMD"
1138  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1139   [(set_attr "type" "neon_mla_<Vetype><q>")]
1140 )
1141
1142 (define_insn "*aarch64_mla_elt<mode>"
1143  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1144        (plus:VDQHS
1145          (mult:VDQHS
1146            (vec_duplicate:VDQHS
1147               (vec_select:<VEL>
1148                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1149                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1150            (match_operand:VDQHS 3 "register_operand" "w"))
1151          (match_operand:VDQHS 4 "register_operand" "0")))]
1152  "TARGET_SIMD"
1153   {
1154     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1155     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1156   }
1157   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1158 )
1159
1160 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1161  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1162        (plus:VDQHS
1163          (mult:VDQHS
1164            (vec_duplicate:VDQHS
1165               (vec_select:<VEL>
1166                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1167                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1168            (match_operand:VDQHS 3 "register_operand" "w"))
1169          (match_operand:VDQHS 4 "register_operand" "0")))]
1170  "TARGET_SIMD"
1171   {
1172     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1173     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1174   }
1175   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1176 )
1177
1178 (define_insn "*aarch64_mla_elt_merge<mode>"
1179   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1180         (plus:VDQHS
1181           (mult:VDQHS (vec_duplicate:VDQHS
1182                   (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1183                 (match_operand:VDQHS 2 "register_operand" "w"))
1184           (match_operand:VDQHS 3 "register_operand" "0")))]
1185  "TARGET_SIMD"
1186  "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1187   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1188 )
1189
1190 (define_insn "aarch64_mls<mode>"
1191  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1192        (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1193                    (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1194                               (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1195  "TARGET_SIMD"
1196  "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1197   [(set_attr "type" "neon_mla_<Vetype><q>")]
1198 )
1199
1200 (define_insn "*aarch64_mls_elt<mode>"
1201  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1202        (minus:VDQHS
1203          (match_operand:VDQHS 4 "register_operand" "0")
1204          (mult:VDQHS
1205            (vec_duplicate:VDQHS
1206               (vec_select:<VEL>
1207                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1208                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1209            (match_operand:VDQHS 3 "register_operand" "w"))))]
1210  "TARGET_SIMD"
1211   {
1212     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1213     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1214   }
1215   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1216 )
1217
1218 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1219  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1220        (minus:VDQHS
1221          (match_operand:VDQHS 4 "register_operand" "0")
1222          (mult:VDQHS
1223            (vec_duplicate:VDQHS
1224               (vec_select:<VEL>
1225                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1226                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1227            (match_operand:VDQHS 3 "register_operand" "w"))))]
1228  "TARGET_SIMD"
1229   {
1230     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1231     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1232   }
1233   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1234 )
1235
1236 (define_insn "*aarch64_mls_elt_merge<mode>"
1237   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1238         (minus:VDQHS
1239           (match_operand:VDQHS 1 "register_operand" "0")
1240           (mult:VDQHS (vec_duplicate:VDQHS
1241                   (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1242                 (match_operand:VDQHS 3 "register_operand" "w"))))]
1243   "TARGET_SIMD"
1244   "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1245   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1246 )
1247
1248 ;; Max/Min operations.
1249 (define_insn "<su><maxmin><mode>3"
1250  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1251        (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1252                     (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1253  "TARGET_SIMD"
1254  "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1255   [(set_attr "type" "neon_minmax<q>")]
1256 )
1257
1258 (define_expand "<su><maxmin>v2di3"
1259  [(set (match_operand:V2DI 0 "register_operand" "")
1260        (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1261                     (match_operand:V2DI 2 "register_operand" "")))]
1262  "TARGET_SIMD"
1263 {
1264   enum rtx_code cmp_operator;
1265   rtx cmp_fmt;
1266
1267   switch (<CODE>)
1268     {
1269     case UMIN:
1270       cmp_operator = LTU;
1271       break;
1272     case SMIN:
1273       cmp_operator = LT;
1274       break;
1275     case UMAX:
1276       cmp_operator = GTU;
1277       break;
1278     case SMAX:
1279       cmp_operator = GT;
1280       break;
1281     default:
1282       gcc_unreachable ();
1283     }
1284
1285   cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1286   emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1287               operands[2], cmp_fmt, operands[1], operands[2]));
1288   DONE;
1289 })
1290
1291 ;; Pairwise Integer Max/Min operations.
1292 (define_insn "aarch64_<maxmin_uns>p<mode>"
1293  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1294        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1295                          (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1296                         MAXMINV))]
1297  "TARGET_SIMD"
1298  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1299   [(set_attr "type" "neon_minmax<q>")]
1300 )
1301
1302 ;; Pairwise FP Max/Min operations.
1303 (define_insn "aarch64_<maxmin_uns>p<mode>"
1304  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1305        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1306                       (match_operand:VHSDF 2 "register_operand" "w")]
1307                       FMAXMINV))]
1308  "TARGET_SIMD"
1309  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1310   [(set_attr "type" "neon_minmax<q>")]
1311 )
1312
1313 ;; vec_concat gives a new vector with the low elements from operand 1, and
1314 ;; the high elements from operand 2.  That is to say, given op1 = { a, b }
1315 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1316 ;; What that means, is that the RTL descriptions of the below patterns
1317 ;; need to change depending on endianness.
1318
1319 ;; Move to the low architectural bits of the register.
1320 ;; On little-endian this is { operand, zeroes }
1321 ;; On big-endian this is { zeroes, operand }
1322
1323 (define_insn "move_lo_quad_internal_<mode>"
1324   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1325         (vec_concat:VQ_NO2E
1326           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1327           (vec_duplicate:<VHALF> (const_int 0))))]
1328   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1329   "@
1330    dup\\t%d0, %1.d[0]
1331    fmov\\t%d0, %1
1332    dup\\t%d0, %1"
1333   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1334    (set_attr "simd" "yes,*,yes")
1335    (set_attr "fp" "*,yes,*")
1336    (set_attr "length" "4")]
1337 )
1338
1339 (define_insn "move_lo_quad_internal_<mode>"
1340   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1341         (vec_concat:VQ_2E
1342           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1343           (const_int 0)))]
1344   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1345   "@
1346    dup\\t%d0, %1.d[0]
1347    fmov\\t%d0, %1
1348    dup\\t%d0, %1"
1349   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1350    (set_attr "simd" "yes,*,yes")
1351    (set_attr "fp" "*,yes,*")
1352    (set_attr "length" "4")]
1353 )
1354
1355 (define_insn "move_lo_quad_internal_be_<mode>"
1356   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1357         (vec_concat:VQ_NO2E
1358           (vec_duplicate:<VHALF> (const_int 0))
1359           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1360   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1361   "@
1362    dup\\t%d0, %1.d[0]
1363    fmov\\t%d0, %1
1364    dup\\t%d0, %1"
1365   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1366    (set_attr "simd" "yes,*,yes")
1367    (set_attr "fp" "*,yes,*")
1368    (set_attr "length" "4")]
1369 )
1370
1371 (define_insn "move_lo_quad_internal_be_<mode>"
1372   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1373         (vec_concat:VQ_2E
1374           (const_int 0)
1375           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1376   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1377   "@
1378    dup\\t%d0, %1.d[0]
1379    fmov\\t%d0, %1
1380    dup\\t%d0, %1"
1381   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1382    (set_attr "simd" "yes,*,yes")
1383    (set_attr "fp" "*,yes,*")
1384    (set_attr "length" "4")]
1385 )
1386
1387 (define_expand "move_lo_quad_<mode>"
1388   [(match_operand:VQ 0 "register_operand")
1389    (match_operand:VQ 1 "register_operand")]
1390   "TARGET_SIMD"
1391 {
1392   if (BYTES_BIG_ENDIAN)
1393     emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1394   else
1395     emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1396   DONE;
1397 }
1398 )
1399
1400 ;; Move operand1 to the high architectural bits of the register, keeping
1401 ;; the low architectural bits of operand2.
1402 ;; For little-endian this is { operand2, operand1 }
1403 ;; For big-endian this is { operand1, operand2 }
1404
1405 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1406   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1407         (vec_concat:VQ
1408           (vec_select:<VHALF>
1409                 (match_dup 0)
1410                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1411           (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1412   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1413   "@
1414    ins\\t%0.d[1], %1.d[0]
1415    ins\\t%0.d[1], %1"
1416   [(set_attr "type" "neon_ins")]
1417 )
1418
1419 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1420   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1421         (vec_concat:VQ
1422           (match_operand:<VHALF> 1 "register_operand" "w,r")
1423           (vec_select:<VHALF>
1424                 (match_dup 0)
1425                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1426   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1427   "@
1428    ins\\t%0.d[1], %1.d[0]
1429    ins\\t%0.d[1], %1"
1430   [(set_attr "type" "neon_ins")]
1431 )
1432
1433 (define_expand "move_hi_quad_<mode>"
1434  [(match_operand:VQ 0 "register_operand" "")
1435   (match_operand:<VHALF> 1 "register_operand" "")]
1436  "TARGET_SIMD"
1437 {
1438   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1439   if (BYTES_BIG_ENDIAN)
1440     emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1441                     operands[1], p));
1442   else
1443     emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1444                     operands[1], p));
1445   DONE;
1446 })
1447
1448 ;; Narrowing operations.
1449
1450 ;; For doubles.
1451 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1452  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1453        (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1454  "TARGET_SIMD"
1455  "xtn\\t%0.<Vntype>, %1.<Vtype>"
1456   [(set_attr "type" "neon_shift_imm_narrow_q")]
1457 )
1458
1459 (define_expand "vec_pack_trunc_<mode>"
1460  [(match_operand:<VNARROWD> 0 "register_operand" "")
1461   (match_operand:VDN 1 "register_operand" "")
1462   (match_operand:VDN 2 "register_operand" "")]
1463  "TARGET_SIMD"
1464 {
1465   rtx tempreg = gen_reg_rtx (<VDBL>mode);
1466   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1467   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1468
1469   emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1470   emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1471   emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1472   DONE;
1473 })
1474
1475 ;; For quads.
1476
1477 (define_insn "vec_pack_trunc_<mode>"
1478  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1479        (vec_concat:<VNARROWQ2>
1480          (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1481          (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1482  "TARGET_SIMD"
1483  {
1484    if (BYTES_BIG_ENDIAN)
1485      return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1486    else
1487      return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1488  }
1489   [(set_attr "type" "multiple")
1490    (set_attr "length" "8")]
1491 )
1492
1493 ;; Widening operations.
1494
1495 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1496   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1497         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1498                                (match_operand:VQW 1 "register_operand" "w")
1499                                (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1500                             )))]
1501   "TARGET_SIMD"
1502   "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1503   [(set_attr "type" "neon_shift_imm_long")]
1504 )
1505
1506 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1507   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1508         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1509                                (match_operand:VQW 1 "register_operand" "w")
1510                                (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1511                             )))]
1512   "TARGET_SIMD"
1513   "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1514   [(set_attr "type" "neon_shift_imm_long")]
1515 )
1516
1517 (define_expand "vec_unpack<su>_hi_<mode>"
1518   [(match_operand:<VWIDE> 0 "register_operand" "")
1519    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1520   "TARGET_SIMD"
1521   {
1522     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1523     emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1524                                                           operands[1], p));
1525     DONE;
1526   }
1527 )
1528
1529 (define_expand "vec_unpack<su>_lo_<mode>"
1530   [(match_operand:<VWIDE> 0 "register_operand" "")
1531    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1532   "TARGET_SIMD"
1533   {
1534     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1535     emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1536                                                           operands[1], p));
1537     DONE;
1538   }
1539 )
1540
1541 ;; Widening arithmetic.
1542
1543 (define_insn "*aarch64_<su>mlal_lo<mode>"
1544   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1545         (plus:<VWIDE>
1546           (mult:<VWIDE>
1547               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1548                  (match_operand:VQW 2 "register_operand" "w")
1549                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1550               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1551                  (match_operand:VQW 4 "register_operand" "w")
1552                  (match_dup 3))))
1553           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1554   "TARGET_SIMD"
1555   "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1556   [(set_attr "type" "neon_mla_<Vetype>_long")]
1557 )
1558
1559 (define_insn "*aarch64_<su>mlal_hi<mode>"
1560   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1561         (plus:<VWIDE>
1562           (mult:<VWIDE>
1563               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1564                  (match_operand:VQW 2 "register_operand" "w")
1565                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1566               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1567                  (match_operand:VQW 4 "register_operand" "w")
1568                  (match_dup 3))))
1569           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1570   "TARGET_SIMD"
1571   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1572   [(set_attr "type" "neon_mla_<Vetype>_long")]
1573 )
1574
1575 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1576   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1577         (minus:<VWIDE>
1578           (match_operand:<VWIDE> 1 "register_operand" "0")
1579           (mult:<VWIDE>
1580               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1581                  (match_operand:VQW 2 "register_operand" "w")
1582                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1583               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1584                  (match_operand:VQW 4 "register_operand" "w")
1585                  (match_dup 3))))))]
1586   "TARGET_SIMD"
1587   "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1588   [(set_attr "type" "neon_mla_<Vetype>_long")]
1589 )
1590
1591 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1592   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1593         (minus:<VWIDE>
1594           (match_operand:<VWIDE> 1 "register_operand" "0")
1595           (mult:<VWIDE>
1596               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1597                  (match_operand:VQW 2 "register_operand" "w")
1598                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1599               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1600                  (match_operand:VQW 4 "register_operand" "w")
1601                  (match_dup 3))))))]
1602   "TARGET_SIMD"
1603   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1604   [(set_attr "type" "neon_mla_<Vetype>_long")]
1605 )
1606
1607 (define_insn "*aarch64_<su>mlal<mode>"
1608   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1609         (plus:<VWIDE>
1610           (mult:<VWIDE>
1611             (ANY_EXTEND:<VWIDE>
1612               (match_operand:VD_BHSI 1 "register_operand" "w"))
1613             (ANY_EXTEND:<VWIDE>
1614               (match_operand:VD_BHSI 2 "register_operand" "w")))
1615           (match_operand:<VWIDE> 3 "register_operand" "0")))]
1616   "TARGET_SIMD"
1617   "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1618   [(set_attr "type" "neon_mla_<Vetype>_long")]
1619 )
1620
1621 (define_insn "*aarch64_<su>mlsl<mode>"
1622   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1623         (minus:<VWIDE>
1624           (match_operand:<VWIDE> 1 "register_operand" "0")
1625           (mult:<VWIDE>
1626             (ANY_EXTEND:<VWIDE>
1627               (match_operand:VD_BHSI 2 "register_operand" "w"))
1628             (ANY_EXTEND:<VWIDE>
1629               (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1630   "TARGET_SIMD"
1631   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1632   [(set_attr "type" "neon_mla_<Vetype>_long")]
1633 )
1634
1635 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1636  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1637        (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1638                            (match_operand:VQW 1 "register_operand" "w")
1639                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1640                      (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1641                            (match_operand:VQW 2 "register_operand" "w")
1642                            (match_dup 3)))))]
1643   "TARGET_SIMD"
1644   "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1645   [(set_attr "type" "neon_mul_<Vetype>_long")]
1646 )
1647
1648 (define_expand "vec_widen_<su>mult_lo_<mode>"
1649   [(match_operand:<VWIDE> 0 "register_operand" "")
1650    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1651    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1652  "TARGET_SIMD"
1653  {
1654    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1655    emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1656                                                        operands[1],
1657                                                        operands[2], p));
1658    DONE;
1659  }
1660 )
1661
1662 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1663  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1664       (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1665                             (match_operand:VQW 1 "register_operand" "w")
1666                             (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1667                     (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1668                             (match_operand:VQW 2 "register_operand" "w")
1669                             (match_dup 3)))))]
1670   "TARGET_SIMD"
1671   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1672   [(set_attr "type" "neon_mul_<Vetype>_long")]
1673 )
1674
1675 (define_expand "vec_widen_<su>mult_hi_<mode>"
1676   [(match_operand:<VWIDE> 0 "register_operand" "")
1677    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1678    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1679  "TARGET_SIMD"
1680  {
1681    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1682    emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1683                                                        operands[1],
1684                                                        operands[2], p));
1685    DONE;
1686
1687  }
1688 )
1689
1690 ;; FP vector operations.
1691 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1692 ;; double-precision (64-bit) floating-point data types and arithmetic as
1693 ;; defined by the IEEE 754-2008 standard.  This makes them vectorizable
1694 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1695 ;;
1696 ;; Floating-point operations can raise an exception.  Vectorizing such
1697 ;; operations are safe because of reasons explained below.
1698 ;;
1699 ;; ARMv8 permits an extension to enable trapped floating-point
1700 ;; exception handling, however this is an optional feature.  In the
1701 ;; event of a floating-point exception being raised by vectorised
1702 ;; code then:
1703 ;; 1.  If trapped floating-point exceptions are available, then a trap
1704 ;;     will be taken when any lane raises an enabled exception.  A trap
1705 ;;     handler may determine which lane raised the exception.
1706 ;; 2.  Alternatively a sticky exception flag is set in the
1707 ;;     floating-point status register (FPSR).  Software may explicitly
1708 ;;     test the exception flags, in which case the tests will either
1709 ;;     prevent vectorisation, allowing precise identification of the
1710 ;;     failing operation, or if tested outside of vectorisable regions
1711 ;;     then the specific operation and lane are not of interest.
1712
1713 ;; FP arithmetic operations.
1714
1715 (define_insn "add<mode>3"
1716  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1717        (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1718                    (match_operand:VHSDF 2 "register_operand" "w")))]
1719  "TARGET_SIMD"
1720  "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1721   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1722 )
1723
1724 (define_insn "sub<mode>3"
1725  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1726        (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1727                     (match_operand:VHSDF 2 "register_operand" "w")))]
1728  "TARGET_SIMD"
1729  "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1730   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1731 )
1732
1733 (define_insn "mul<mode>3"
1734  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1735        (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1736                    (match_operand:VHSDF 2 "register_operand" "w")))]
1737  "TARGET_SIMD"
1738  "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1739   [(set_attr "type" "neon_fp_mul_<stype><q>")]
1740 )
1741
1742 (define_expand "div<mode>3"
1743  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1744        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1745                   (match_operand:VHSDF 2 "register_operand" "w")))]
1746  "TARGET_SIMD"
1747 {
1748   if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1749     DONE;
1750
1751   operands[1] = force_reg (<MODE>mode, operands[1]);
1752 })
1753
1754 (define_insn "*div<mode>3"
1755  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1756        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1757                  (match_operand:VHSDF 2 "register_operand" "w")))]
1758  "TARGET_SIMD"
1759  "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1760   [(set_attr "type" "neon_fp_div_<stype><q>")]
1761 )
1762
1763 (define_insn "neg<mode>2"
1764  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1765        (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1766  "TARGET_SIMD"
1767  "fneg\\t%0.<Vtype>, %1.<Vtype>"
1768   [(set_attr "type" "neon_fp_neg_<stype><q>")]
1769 )
1770
1771 (define_insn "abs<mode>2"
1772  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1773        (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1774  "TARGET_SIMD"
1775  "fabs\\t%0.<Vtype>, %1.<Vtype>"
1776   [(set_attr "type" "neon_fp_abs_<stype><q>")]
1777 )
1778
1779 (define_insn "fma<mode>4"
1780   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1781        (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1782                   (match_operand:VHSDF 2 "register_operand" "w")
1783                   (match_operand:VHSDF 3 "register_operand" "0")))]
1784   "TARGET_SIMD"
1785  "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1786   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1787 )
1788
1789 (define_insn "*aarch64_fma4_elt<mode>"
1790   [(set (match_operand:VDQF 0 "register_operand" "=w")
1791     (fma:VDQF
1792       (vec_duplicate:VDQF
1793         (vec_select:<VEL>
1794           (match_operand:VDQF 1 "register_operand" "<h_con>")
1795           (parallel [(match_operand:SI 2 "immediate_operand")])))
1796       (match_operand:VDQF 3 "register_operand" "w")
1797       (match_operand:VDQF 4 "register_operand" "0")))]
1798   "TARGET_SIMD"
1799   {
1800     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1801     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1802   }
1803   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1804 )
1805
1806 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1807   [(set (match_operand:VDQSF 0 "register_operand" "=w")
1808     (fma:VDQSF
1809       (vec_duplicate:VDQSF
1810         (vec_select:<VEL>
1811           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1812           (parallel [(match_operand:SI 2 "immediate_operand")])))
1813       (match_operand:VDQSF 3 "register_operand" "w")
1814       (match_operand:VDQSF 4 "register_operand" "0")))]
1815   "TARGET_SIMD"
1816   {
1817     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1818     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1819   }
1820   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1821 )
1822
1823 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1824   [(set (match_operand:VMUL 0 "register_operand" "=w")
1825     (fma:VMUL
1826       (vec_duplicate:VMUL
1827           (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1828       (match_operand:VMUL 2 "register_operand" "w")
1829       (match_operand:VMUL 3 "register_operand" "0")))]
1830   "TARGET_SIMD"
1831   "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1832   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1833 )
1834
1835 (define_insn "*aarch64_fma4_elt_to_64v2df"
1836   [(set (match_operand:DF 0 "register_operand" "=w")
1837     (fma:DF
1838         (vec_select:DF
1839           (match_operand:V2DF 1 "register_operand" "w")
1840           (parallel [(match_operand:SI 2 "immediate_operand")]))
1841       (match_operand:DF 3 "register_operand" "w")
1842       (match_operand:DF 4 "register_operand" "0")))]
1843   "TARGET_SIMD"
1844   {
1845     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1846     return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1847   }
1848   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1849 )
1850
1851 (define_insn "fnma<mode>4"
1852   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1853         (fma:VHSDF
1854           (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1855           (match_operand:VHSDF 2 "register_operand" "w")
1856           (match_operand:VHSDF 3 "register_operand" "0")))]
1857   "TARGET_SIMD"
1858   "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1859   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1860 )
1861
1862 (define_insn "*aarch64_fnma4_elt<mode>"
1863   [(set (match_operand:VDQF 0 "register_operand" "=w")
1864     (fma:VDQF
1865       (neg:VDQF
1866         (match_operand:VDQF 3 "register_operand" "w"))
1867       (vec_duplicate:VDQF
1868         (vec_select:<VEL>
1869           (match_operand:VDQF 1 "register_operand" "<h_con>")
1870           (parallel [(match_operand:SI 2 "immediate_operand")])))
1871       (match_operand:VDQF 4 "register_operand" "0")))]
1872   "TARGET_SIMD"
1873   {
1874     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1875     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1876   }
1877   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1878 )
1879
1880 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1881   [(set (match_operand:VDQSF 0 "register_operand" "=w")
1882     (fma:VDQSF
1883       (neg:VDQSF
1884         (match_operand:VDQSF 3 "register_operand" "w"))
1885       (vec_duplicate:VDQSF
1886         (vec_select:<VEL>
1887           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1888           (parallel [(match_operand:SI 2 "immediate_operand")])))
1889       (match_operand:VDQSF 4 "register_operand" "0")))]
1890   "TARGET_SIMD"
1891   {
1892     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1893     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1894   }
1895   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1896 )
1897
1898 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1899   [(set (match_operand:VMUL 0 "register_operand" "=w")
1900     (fma:VMUL
1901       (neg:VMUL
1902         (match_operand:VMUL 2 "register_operand" "w"))
1903       (vec_duplicate:VMUL
1904         (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1905       (match_operand:VMUL 3 "register_operand" "0")))]
1906   "TARGET_SIMD"
1907   "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1908   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1909 )
1910
1911 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1912   [(set (match_operand:DF 0 "register_operand" "=w")
1913     (fma:DF
1914       (vec_select:DF
1915         (match_operand:V2DF 1 "register_operand" "w")
1916         (parallel [(match_operand:SI 2 "immediate_operand")]))
1917       (neg:DF
1918         (match_operand:DF 3 "register_operand" "w"))
1919       (match_operand:DF 4 "register_operand" "0")))]
1920   "TARGET_SIMD"
1921   {
1922     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1923     return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1924   }
1925   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1926 )
1927
1928 ;; Vector versions of the floating-point frint patterns.
1929 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1930 (define_insn "<frint_pattern><mode>2"
1931   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1932         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1933                        FRINT))]
1934   "TARGET_SIMD"
1935   "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1936   [(set_attr "type" "neon_fp_round_<stype><q>")]
1937 )
1938
1939 ;; Vector versions of the fcvt standard patterns.
1940 ;; Expands to lbtrunc, lround, lceil, lfloor
1941 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1942   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1943         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1944                                [(match_operand:VHSDF 1 "register_operand" "w")]
1945                                FCVT)))]
1946   "TARGET_SIMD"
1947   "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1948   [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1949 )
1950
1951 ;; HF Scalar variants of related SIMD instructions.
1952 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1953   [(set (match_operand:HI 0 "register_operand" "=w")
1954         (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1955                       FCVT)))]
1956   "TARGET_SIMD_F16INST"
1957   "fcvt<frint_suffix><su>\t%h0, %h1"
1958   [(set_attr "type" "neon_fp_to_int_s")]
1959 )
1960
1961 (define_insn "<optab>_trunchfhi2"
1962   [(set (match_operand:HI 0 "register_operand" "=w")
1963         (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1964   "TARGET_SIMD_F16INST"
1965   "fcvtz<su>\t%h0, %h1"
1966   [(set_attr "type" "neon_fp_to_int_s")]
1967 )
1968
1969 (define_insn "<optab>hihf2"
1970   [(set (match_operand:HF 0 "register_operand" "=w")
1971         (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1972   "TARGET_SIMD_F16INST"
1973   "<su_optab>cvtf\t%h0, %h1"
1974   [(set_attr "type" "neon_int_to_fp_s")]
1975 )
1976
1977 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1978   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1979         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1980                                [(mult:VDQF
1981          (match_operand:VDQF 1 "register_operand" "w")
1982          (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1983                                UNSPEC_FRINTZ)))]
1984   "TARGET_SIMD
1985    && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1986                 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1987   {
1988     int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1989     char buf[64];
1990     snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1991     output_asm_insn (buf, operands);
1992     return "";
1993   }
1994   [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1995 )
1996
1997 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
1998   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1999         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2000                                [(match_operand:VHSDF 1 "register_operand")]
2001                                 UNSPEC_FRINTZ)))]
2002   "TARGET_SIMD"
2003   {})
2004
2005 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2006   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2007         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2008                                [(match_operand:VHSDF 1 "register_operand")]
2009                                 UNSPEC_FRINTZ)))]
2010   "TARGET_SIMD"
2011   {})
2012
2013 (define_expand "ftrunc<VHSDF:mode>2"
2014   [(set (match_operand:VHSDF 0 "register_operand")
2015         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2016                        UNSPEC_FRINTZ))]
2017   "TARGET_SIMD"
2018   {})
2019
2020 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2021   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2022         (FLOATUORS:VHSDF
2023           (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2024   "TARGET_SIMD"
2025   "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2026   [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2027 )
2028
2029 ;; Conversions between vectors of floats and doubles.
2030 ;; Contains a mix of patterns to match standard pattern names
2031 ;; and those for intrinsics.
2032
2033 ;; Float widening operations.
2034
2035 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2036   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2037         (float_extend:<VWIDE> (vec_select:<VHALF>
2038                                (match_operand:VQ_HSF 1 "register_operand" "w")
2039                                (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2040                             )))]
2041   "TARGET_SIMD"
2042   "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2043   [(set_attr "type" "neon_fp_cvt_widen_s")]
2044 )
2045
2046 ;; Convert between fixed-point and floating-point (vector modes)
2047
2048 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2049   [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2050         (unspec:<VHSDF:FCVT_TARGET>
2051           [(match_operand:VHSDF 1 "register_operand" "w")
2052            (match_operand:SI 2 "immediate_operand" "i")]
2053          FCVT_F2FIXED))]
2054   "TARGET_SIMD"
2055   "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2056   [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2057 )
2058
2059 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2060   [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2061         (unspec:<VDQ_HSDI:FCVT_TARGET>
2062           [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2063            (match_operand:SI 2 "immediate_operand" "i")]
2064          FCVT_FIXED2F))]
2065   "TARGET_SIMD"
2066   "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2067   [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2068 )
2069
2070 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2071 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2072 ;; the meaning of HI and LO changes depending on the target endianness.
2073 ;; While elsewhere we map the higher numbered elements of a vector to
2074 ;; the lower architectural lanes of the vector, for these patterns we want
2075 ;; to always treat "hi" as referring to the higher architectural lanes.
2076 ;; Consequently, while the patterns below look inconsistent with our
2077 ;; other big-endian patterns their behavior is as required.
2078
2079 (define_expand "vec_unpacks_lo_<mode>"
2080   [(match_operand:<VWIDE> 0 "register_operand" "")
2081    (match_operand:VQ_HSF 1 "register_operand" "")]
2082   "TARGET_SIMD"
2083   {
2084     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2085     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2086                                                        operands[1], p));
2087     DONE;
2088   }
2089 )
2090
2091 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2092   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2093         (float_extend:<VWIDE> (vec_select:<VHALF>
2094                                (match_operand:VQ_HSF 1 "register_operand" "w")
2095                                (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2096                             )))]
2097   "TARGET_SIMD"
2098   "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2099   [(set_attr "type" "neon_fp_cvt_widen_s")]
2100 )
2101
2102 (define_expand "vec_unpacks_hi_<mode>"
2103   [(match_operand:<VWIDE> 0 "register_operand" "")
2104    (match_operand:VQ_HSF 1 "register_operand" "")]
2105   "TARGET_SIMD"
2106   {
2107     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2108     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2109                                                        operands[1], p));
2110     DONE;
2111   }
2112 )
2113 (define_insn "aarch64_float_extend_lo_<Vwide>"
2114   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2115         (float_extend:<VWIDE>
2116           (match_operand:VDF 1 "register_operand" "w")))]
2117   "TARGET_SIMD"
2118   "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2119   [(set_attr "type" "neon_fp_cvt_widen_s")]
2120 )
2121
2122 ;; Float narrowing operations.
2123
2124 (define_insn "aarch64_float_truncate_lo_<mode>"
2125   [(set (match_operand:VDF 0 "register_operand" "=w")
2126       (float_truncate:VDF
2127         (match_operand:<VWIDE> 1 "register_operand" "w")))]
2128   "TARGET_SIMD"
2129   "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2130   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2131 )
2132
2133 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2134   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2135     (vec_concat:<VDBL>
2136       (match_operand:VDF 1 "register_operand" "0")
2137       (float_truncate:VDF
2138         (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2139   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2140   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2141   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2142 )
2143
2144 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2145   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2146     (vec_concat:<VDBL>
2147       (float_truncate:VDF
2148         (match_operand:<VWIDE> 2 "register_operand" "w"))
2149       (match_operand:VDF 1 "register_operand" "0")))]
2150   "TARGET_SIMD && BYTES_BIG_ENDIAN"
2151   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2152   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2153 )
2154
2155 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2156   [(match_operand:<VDBL> 0 "register_operand" "=w")
2157    (match_operand:VDF 1 "register_operand" "0")
2158    (match_operand:<VWIDE> 2 "register_operand" "w")]
2159   "TARGET_SIMD"
2160 {
2161   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2162                              ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2163                              : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2164   emit_insn (gen (operands[0], operands[1], operands[2]));
2165   DONE;
2166 }
2167 )
2168
2169 (define_expand "vec_pack_trunc_v2df"
2170   [(set (match_operand:V4SF 0 "register_operand")
2171       (vec_concat:V4SF
2172         (float_truncate:V2SF
2173             (match_operand:V2DF 1 "register_operand"))
2174         (float_truncate:V2SF
2175             (match_operand:V2DF 2 "register_operand"))
2176           ))]
2177   "TARGET_SIMD"
2178   {
2179     rtx tmp = gen_reg_rtx (V2SFmode);
2180     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2181     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2182
2183     emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2184     emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2185                                                    tmp, operands[hi]));
2186     DONE;
2187   }
2188 )
2189
2190 (define_expand "vec_pack_trunc_df"
2191   [(set (match_operand:V2SF 0 "register_operand")
2192       (vec_concat:V2SF
2193         (float_truncate:SF
2194             (match_operand:DF 1 "register_operand"))
2195         (float_truncate:SF
2196             (match_operand:DF 2 "register_operand"))
2197           ))]
2198   "TARGET_SIMD"
2199   {
2200     rtx tmp = gen_reg_rtx (V2SFmode);
2201     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2202     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2203
2204     emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2205     emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2206     emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2207     DONE;
2208   }
2209 )
2210
2211 ;; FP Max/Min
2212 ;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
2213 ;; expression like:
2214 ;;      a = (b < c) ? b : c;
2215 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
2216 ;; either explicitly or indirectly via -ffast-math.
2217 ;;
2218 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2219 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2220 ;; operand will be returned when both operands are zero (i.e. they may not
2221 ;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
2222 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2223 ;; NaNs.
2224
2225 (define_insn "<su><maxmin><mode>3"
2226   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2227         (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2228                        (match_operand:VHSDF 2 "register_operand" "w")))]
2229   "TARGET_SIMD"
2230   "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2231   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2232 )
2233
2234 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2235 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2236 ;; which implement the IEEE fmax ()/fmin () functions.
2237 (define_insn "<maxmin_uns><mode>3"
2238   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2239        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2240                       (match_operand:VHSDF 2 "register_operand" "w")]
2241                       FMAXMIN_UNS))]
2242   "TARGET_SIMD"
2243   "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2244   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2245 )
2246
2247 ;; 'across lanes' add.
2248
2249 (define_expand "reduc_plus_scal_<mode>"
2250   [(match_operand:<VEL> 0 "register_operand" "=w")
2251    (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2252                UNSPEC_ADDV)]
2253   "TARGET_SIMD"
2254   {
2255     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2256     rtx scratch = gen_reg_rtx (<MODE>mode);
2257     emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2258     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2259     DONE;
2260   }
2261 )
2262
2263 (define_insn "aarch64_faddp<mode>"
2264  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2265        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2266                       (match_operand:VHSDF 2 "register_operand" "w")]
2267         UNSPEC_FADDV))]
2268  "TARGET_SIMD"
2269  "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2270   [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2271 )
2272
2273 (define_insn "aarch64_reduc_plus_internal<mode>"
2274  [(set (match_operand:VDQV 0 "register_operand" "=w")
2275        (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2276                     UNSPEC_ADDV))]
2277  "TARGET_SIMD"
2278  "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2279   [(set_attr "type" "neon_reduc_add<q>")]
2280 )
2281
2282 (define_insn "aarch64_reduc_plus_internalv2si"
2283  [(set (match_operand:V2SI 0 "register_operand" "=w")
2284        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2285                     UNSPEC_ADDV))]
2286  "TARGET_SIMD"
2287  "addp\\t%0.2s, %1.2s, %1.2s"
2288   [(set_attr "type" "neon_reduc_add")]
2289 )
2290
2291 (define_insn "reduc_plus_scal_<mode>"
2292  [(set (match_operand:<VEL> 0 "register_operand" "=w")
2293        (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2294                    UNSPEC_FADDV))]
2295  "TARGET_SIMD"
2296  "faddp\\t%<Vetype>0, %1.<Vtype>"
2297   [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2298 )
2299
2300 (define_expand "reduc_plus_scal_v4sf"
2301  [(set (match_operand:SF 0 "register_operand")
2302        (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2303                     UNSPEC_FADDV))]
2304  "TARGET_SIMD"
2305 {
2306   rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2307   rtx scratch = gen_reg_rtx (V4SFmode);
2308   emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2309   emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2310   emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2311   DONE;
2312 })
2313
2314 (define_insn "clrsb<mode>2"
2315   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2316         (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2317   "TARGET_SIMD"
2318   "cls\\t%0.<Vtype>, %1.<Vtype>"
2319   [(set_attr "type" "neon_cls<q>")]
2320 )
2321
2322 (define_insn "clz<mode>2"
2323  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2324        (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2325  "TARGET_SIMD"
2326  "clz\\t%0.<Vtype>, %1.<Vtype>"
2327   [(set_attr "type" "neon_cls<q>")]
2328 )
2329
2330 (define_insn "popcount<mode>2"
2331   [(set (match_operand:VB 0 "register_operand" "=w")
2332         (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2333   "TARGET_SIMD"
2334   "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2335   [(set_attr "type" "neon_cnt<q>")]
2336 )
2337
2338 ;; 'across lanes' max and min ops.
2339
2340 ;; Template for outputting a scalar, so we can create __builtins which can be
2341 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function.  (This is FP smax/smin).
2342 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2343   [(match_operand:<VEL> 0 "register_operand")
2344    (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2345                   FMAXMINV)]
2346   "TARGET_SIMD"
2347   {
2348     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2349     rtx scratch = gen_reg_rtx (<MODE>mode);
2350     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2351                                                               operands[1]));
2352     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2353     DONE;
2354   }
2355 )
2356
2357 ;; Likewise for integer cases, signed and unsigned.
2358 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2359   [(match_operand:<VEL> 0 "register_operand")
2360    (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2361                     MAXMINV)]
2362   "TARGET_SIMD"
2363   {
2364     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2365     rtx scratch = gen_reg_rtx (<MODE>mode);
2366     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2367                                                               operands[1]));
2368     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2369     DONE;
2370   }
2371 )
2372
2373 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2374  [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2375        (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2376                     MAXMINV))]
2377  "TARGET_SIMD"
2378  "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2379   [(set_attr "type" "neon_reduc_minmax<q>")]
2380 )
2381
2382 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2383  [(set (match_operand:V2SI 0 "register_operand" "=w")
2384        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2385                     MAXMINV))]
2386  "TARGET_SIMD"
2387  "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2388   [(set_attr "type" "neon_reduc_minmax")]
2389 )
2390
2391 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2392  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2393        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2394                       FMAXMINV))]
2395  "TARGET_SIMD"
2396  "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2397   [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2398 )
2399
2400 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2401 ;; allocation.
2402 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2403 ;; to select.
2404 ;;
2405 ;; Thus our BSL is of the form:
2406 ;;   op0 = bsl (mask, op2, op3)
2407 ;; We can use any of:
2408 ;;
2409 ;;   if (op0 = mask)
2410 ;;     bsl mask, op1, op2
2411 ;;   if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2412 ;;     bit op0, op2, mask
2413 ;;   if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2414 ;;     bif op0, op1, mask
2415 ;;
2416 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2417 ;; Some forms of straight-line code may generate the equivalent form
2418 ;; in *aarch64_simd_bsl<mode>_alt.
2419
2420 (define_insn "aarch64_simd_bsl<mode>_internal"
2421   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2422         (xor:VDQ_I
2423            (and:VDQ_I
2424              (xor:VDQ_I
2425                (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2426                (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2427              (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2428           (match_dup:<V_INT_EQUIV> 3)
2429         ))]
2430   "TARGET_SIMD"
2431   "@
2432   bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2433   bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2434   bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2435   [(set_attr "type" "neon_bsl<q>")]
2436 )
2437
2438 ;; We need this form in addition to the above pattern to match the case
2439 ;; when combine tries merging three insns such that the second operand of
2440 ;; the outer XOR matches the second operand of the inner XOR rather than
2441 ;; the first.  The two are equivalent but since recog doesn't try all
2442 ;; permutations of commutative operations, we have to have a separate pattern.
2443
2444 (define_insn "*aarch64_simd_bsl<mode>_alt"
2445   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2446         (xor:VDQ_I
2447            (and:VDQ_I
2448              (xor:VDQ_I
2449                (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2450                (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2451               (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2452           (match_dup:<V_INT_EQUIV> 2)))]
2453   "TARGET_SIMD"
2454   "@
2455   bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2456   bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2457   bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2458   [(set_attr "type" "neon_bsl<q>")]
2459 )
2460
2461 ;; DImode is special, we want to avoid computing operations which are
2462 ;; more naturally computed in general purpose registers in the vector
2463 ;; registers.  If we do that, we need to move all three operands from general
2464 ;; purpose registers to vector registers, then back again.  However, we
2465 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2466 ;; optimizations based on the component operations of a BSL.
2467 ;;
2468 ;; That means we need a splitter back to the individual operations, if they
2469 ;; would be better calculated on the integer side.
2470
2471 (define_insn_and_split "aarch64_simd_bsldi_internal"
2472   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2473         (xor:DI
2474            (and:DI
2475              (xor:DI
2476                (match_operand:DI 3 "register_operand" "w,0,w,r")
2477                (match_operand:DI 2 "register_operand" "w,w,0,r"))
2478              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2479           (match_dup:DI 3)
2480         ))]
2481   "TARGET_SIMD"
2482   "@
2483   bsl\\t%0.8b, %2.8b, %3.8b
2484   bit\\t%0.8b, %2.8b, %1.8b
2485   bif\\t%0.8b, %3.8b, %1.8b
2486   #"
2487   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2488   [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2489 {
2490   /* Split back to individual operations.  If we're before reload, and
2491      able to create a temporary register, do so.  If we're after reload,
2492      we've got an early-clobber destination register, so use that.
2493      Otherwise, we can't create pseudos and we can't yet guarantee that
2494      operands[0] is safe to write, so FAIL to split.  */
2495
2496   rtx scratch;
2497   if (reload_completed)
2498     scratch = operands[0];
2499   else if (can_create_pseudo_p ())
2500     scratch = gen_reg_rtx (DImode);
2501   else
2502     FAIL;
2503
2504   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2505   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2506   emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2507   DONE;
2508 }
2509   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2510    (set_attr "length" "4,4,4,12")]
2511 )
2512
2513 (define_insn_and_split "aarch64_simd_bsldi_alt"
2514   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2515         (xor:DI
2516            (and:DI
2517              (xor:DI
2518                (match_operand:DI 3 "register_operand" "w,w,0,r")
2519                (match_operand:DI 2 "register_operand" "w,0,w,r"))
2520              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2521           (match_dup:DI 2)
2522         ))]
2523   "TARGET_SIMD"
2524   "@
2525   bsl\\t%0.8b, %3.8b, %2.8b
2526   bit\\t%0.8b, %3.8b, %1.8b
2527   bif\\t%0.8b, %2.8b, %1.8b
2528   #"
2529   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2530   [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2531 {
2532   /* Split back to individual operations.  If we're before reload, and
2533      able to create a temporary register, do so.  If we're after reload,
2534      we've got an early-clobber destination register, so use that.
2535      Otherwise, we can't create pseudos and we can't yet guarantee that
2536      operands[0] is safe to write, so FAIL to split.  */
2537
2538   rtx scratch;
2539   if (reload_completed)
2540     scratch = operands[0];
2541   else if (can_create_pseudo_p ())
2542     scratch = gen_reg_rtx (DImode);
2543   else
2544     FAIL;
2545
2546   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2547   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2548   emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2549   DONE;
2550 }
2551   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2552    (set_attr "length" "4,4,4,12")]
2553 )
2554
2555 (define_expand "aarch64_simd_bsl<mode>"
2556   [(match_operand:VALLDIF 0 "register_operand")
2557    (match_operand:<V_INT_EQUIV> 1 "register_operand")
2558    (match_operand:VALLDIF 2 "register_operand")
2559    (match_operand:VALLDIF 3 "register_operand")]
2560  "TARGET_SIMD"
2561 {
2562   /* We can't alias operands together if they have different modes.  */
2563   rtx tmp = operands[0];
2564   if (FLOAT_MODE_P (<MODE>mode))
2565     {
2566       operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2567       operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2568       tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2569     }
2570   operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2571   emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2572                                                          operands[1],
2573                                                          operands[2],
2574                                                          operands[3]));
2575   if (tmp != operands[0])
2576     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2577
2578   DONE;
2579 })
2580
2581 (define_expand "vcond_mask_<mode><v_int_equiv>"
2582   [(match_operand:VALLDI 0 "register_operand")
2583    (match_operand:VALLDI 1 "nonmemory_operand")
2584    (match_operand:VALLDI 2 "nonmemory_operand")
2585    (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2586   "TARGET_SIMD"
2587 {
2588   /* If we have (a = (P) ? -1 : 0);
2589      Then we can simply move the generated mask (result must be int).  */
2590   if (operands[1] == CONSTM1_RTX (<MODE>mode)
2591       && operands[2] == CONST0_RTX (<MODE>mode))
2592     emit_move_insn (operands[0], operands[3]);
2593   /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask.  */
2594   else if (operands[1] == CONST0_RTX (<MODE>mode)
2595            && operands[2] == CONSTM1_RTX (<MODE>mode))
2596     emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2597   else
2598     {
2599       if (!REG_P (operands[1]))
2600         operands[1] = force_reg (<MODE>mode, operands[1]);
2601       if (!REG_P (operands[2]))
2602         operands[2] = force_reg (<MODE>mode, operands[2]);
2603       emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2604                                              operands[1], operands[2]));
2605     }
2606
2607   DONE;
2608 })
2609
2610 ;; Patterns comparing two vectors to produce a mask.
2611
2612 (define_expand "vec_cmp<mode><mode>"
2613   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2614           (match_operator 1 "comparison_operator"
2615             [(match_operand:VSDQ_I_DI 2 "register_operand")
2616              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2617   "TARGET_SIMD"
2618 {
2619   rtx mask = operands[0];
2620   enum rtx_code code = GET_CODE (operands[1]);
2621
2622   switch (code)
2623     {
2624     case NE:
2625     case LE:
2626     case LT:
2627     case GE:
2628     case GT:
2629     case EQ:
2630       if (operands[3] == CONST0_RTX (<MODE>mode))
2631         break;
2632
2633       /* Fall through.  */
2634     default:
2635       if (!REG_P (operands[3]))
2636         operands[3] = force_reg (<MODE>mode, operands[3]);
2637
2638       break;
2639     }
2640
2641   switch (code)
2642     {
2643     case LT:
2644       emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2645       break;
2646
2647     case GE:
2648       emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2649       break;
2650
2651     case LE:
2652       emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2653       break;
2654
2655     case GT:
2656       emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2657       break;
2658
2659     case LTU:
2660       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2661       break;
2662
2663     case GEU:
2664       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2665       break;
2666
2667     case LEU:
2668       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2669       break;
2670
2671     case GTU:
2672       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2673       break;
2674
2675     case NE:
2676       /* Handle NE as !EQ.  */
2677       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2678       emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2679       break;
2680
2681     case EQ:
2682       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2683       break;
2684
2685     default:
2686       gcc_unreachable ();
2687     }
2688
2689   DONE;
2690 })
2691
2692 (define_expand "vec_cmp<mode><v_int_equiv>"
2693   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2694         (match_operator 1 "comparison_operator"
2695             [(match_operand:VDQF 2 "register_operand")
2696              (match_operand:VDQF 3 "nonmemory_operand")]))]
2697   "TARGET_SIMD"
2698 {
2699   int use_zero_form = 0;
2700   enum rtx_code code = GET_CODE (operands[1]);
2701   rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2702
2703   rtx (*comparison) (rtx, rtx, rtx) = NULL;
2704
2705   switch (code)
2706     {
2707     case LE:
2708     case LT:
2709     case GE:
2710     case GT:
2711     case EQ:
2712       if (operands[3] == CONST0_RTX (<MODE>mode))
2713         {
2714           use_zero_form = 1;
2715           break;
2716         }
2717       /* Fall through.  */
2718     default:
2719       if (!REG_P (operands[3]))
2720         operands[3] = force_reg (<MODE>mode, operands[3]);
2721
2722       break;
2723     }
2724
2725   switch (code)
2726     {
2727     case LT:
2728       if (use_zero_form)
2729         {
2730           comparison = gen_aarch64_cmlt<mode>;
2731           break;
2732         }
2733       /* Fall through.  */
2734     case UNGE:
2735       std::swap (operands[2], operands[3]);
2736       /* Fall through.  */
2737     case UNLE:
2738     case GT:
2739       comparison = gen_aarch64_cmgt<mode>;
2740       break;
2741     case LE:
2742       if (use_zero_form)
2743         {
2744           comparison = gen_aarch64_cmle<mode>;
2745           break;
2746         }
2747       /* Fall through.  */
2748     case UNGT:
2749       std::swap (operands[2], operands[3]);
2750       /* Fall through.  */
2751     case UNLT:
2752     case GE:
2753       comparison = gen_aarch64_cmge<mode>;
2754       break;
2755     case NE:
2756     case EQ:
2757       comparison = gen_aarch64_cmeq<mode>;
2758       break;
2759     case UNEQ:
2760     case ORDERED:
2761     case UNORDERED:
2762     case LTGT:
2763       break;
2764     default:
2765       gcc_unreachable ();
2766     }
2767
2768   switch (code)
2769     {
2770     case UNGE:
2771     case UNGT:
2772     case UNLE:
2773     case UNLT:
2774     case NE:
2775       /* FCM returns false for lanes which are unordered, so if we use
2776          the inverse of the comparison we actually want to emit, then
2777          invert the result, we will end up with the correct result.
2778          Note that a NE NaN and NaN NE b are true for all a, b.
2779
2780          Our transformations are:
2781          a UNGE b -> !(b GT a)
2782          a UNGT b -> !(b GE a)
2783          a UNLE b -> !(a GT b)
2784          a UNLT b -> !(a GE b)
2785          a   NE b -> !(a EQ b)  */
2786       gcc_assert (comparison != NULL);
2787       emit_insn (comparison (operands[0], operands[2], operands[3]));
2788       emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2789       break;
2790
2791     case LT:
2792     case LE:
2793     case GT:
2794     case GE:
2795     case EQ:
2796       /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
2797          As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
2798          a GE b -> a GE b
2799          a GT b -> a GT b
2800          a LE b -> b GE a
2801          a LT b -> b GT a
2802          a EQ b -> a EQ b  */
2803       gcc_assert (comparison != NULL);
2804       emit_insn (comparison (operands[0], operands[2], operands[3]));
2805       break;
2806
2807     case UNEQ:
2808       /* We first check (a > b ||  b > a) which is !UNEQ, inverting
2809          this result will then give us (a == b || a UNORDERED b).  */
2810       emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2811                                          operands[2], operands[3]));
2812       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2813       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2814       emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2815       break;
2816
2817     case LTGT:
2818       /* LTGT is not guranteed to not generate a FP exception.  So let's
2819          go the faster way : ((a > b) || (b > a)).  */
2820       emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2821                                          operands[2], operands[3]));
2822       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2823       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2824       break;
2825
2826     case UNORDERED:
2827       /* Operands are ORDERED iff (a > b || b >= a), so we can compute
2828          UNORDERED as !ORDERED.  */
2829       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2830       emit_insn (gen_aarch64_cmge<mode> (operands[0],
2831                                          operands[3], operands[2]));
2832       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2833       emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2834       break;
2835
2836     case ORDERED:
2837       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2838       emit_insn (gen_aarch64_cmge<mode> (operands[0],
2839                                          operands[3], operands[2]));
2840       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2841       break;
2842
2843     default:
2844       gcc_unreachable ();
2845     }
2846
2847   DONE;
2848 })
2849
2850 (define_expand "vec_cmpu<mode><mode>"
2851   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2852           (match_operator 1 "comparison_operator"
2853             [(match_operand:VSDQ_I_DI 2 "register_operand")
2854              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2855   "TARGET_SIMD"
2856 {
2857   emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2858                                       operands[2], operands[3]));
2859   DONE;
2860 })
2861
2862 (define_expand "vcond<mode><mode>"
2863   [(set (match_operand:VALLDI 0 "register_operand")
2864         (if_then_else:VALLDI
2865           (match_operator 3 "comparison_operator"
2866             [(match_operand:VALLDI 4 "register_operand")
2867              (match_operand:VALLDI 5 "nonmemory_operand")])
2868           (match_operand:VALLDI 1 "nonmemory_operand")
2869           (match_operand:VALLDI 2 "nonmemory_operand")))]
2870   "TARGET_SIMD"
2871 {
2872   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2873   enum rtx_code code = GET_CODE (operands[3]);
2874
2875   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2876      it as well as switch operands 1/2 in order to avoid the additional
2877      NOT instruction.  */
2878   if (code == NE)
2879     {
2880       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2881                                     operands[4], operands[5]);
2882       std::swap (operands[1], operands[2]);
2883     }
2884   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2885                                              operands[4], operands[5]));
2886   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2887                                                  operands[2], mask));
2888
2889   DONE;
2890 })
2891
2892 (define_expand "vcond<v_cmp_mixed><mode>"
2893   [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2894         (if_then_else:<V_cmp_mixed>
2895           (match_operator 3 "comparison_operator"
2896             [(match_operand:VDQF_COND 4 "register_operand")
2897              (match_operand:VDQF_COND 5 "nonmemory_operand")])
2898           (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2899           (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2900   "TARGET_SIMD"
2901 {
2902   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2903   enum rtx_code code = GET_CODE (operands[3]);
2904
2905   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2906      it as well as switch operands 1/2 in order to avoid the additional
2907      NOT instruction.  */
2908   if (code == NE)
2909     {
2910       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2911                                     operands[4], operands[5]);
2912       std::swap (operands[1], operands[2]);
2913     }
2914   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2915                                              operands[4], operands[5]));
2916   emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
2917                                                 operands[0], operands[1],
2918                                                 operands[2], mask));
2919
2920   DONE;
2921 })
2922
2923 (define_expand "vcondu<mode><mode>"
2924   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2925         (if_then_else:VSDQ_I_DI
2926           (match_operator 3 "comparison_operator"
2927             [(match_operand:VSDQ_I_DI 4 "register_operand")
2928              (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2929           (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2930           (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2931   "TARGET_SIMD"
2932 {
2933   rtx mask = gen_reg_rtx (<MODE>mode);
2934   enum rtx_code code = GET_CODE (operands[3]);
2935
2936   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2937      it as well as switch operands 1/2 in order to avoid the additional
2938      NOT instruction.  */
2939   if (code == NE)
2940     {
2941       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2942                                     operands[4], operands[5]);
2943       std::swap (operands[1], operands[2]);
2944     }
2945   emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2946                                       operands[4], operands[5]));
2947   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2948                                                  operands[2], mask));
2949   DONE;
2950 })
2951
2952 (define_expand "vcondu<mode><v_cmp_mixed>"
2953   [(set (match_operand:VDQF 0 "register_operand")
2954         (if_then_else:VDQF
2955           (match_operator 3 "comparison_operator"
2956             [(match_operand:<V_cmp_mixed> 4 "register_operand")
2957              (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2958           (match_operand:VDQF 1 "nonmemory_operand")
2959           (match_operand:VDQF 2 "nonmemory_operand")))]
2960   "TARGET_SIMD"
2961 {
2962   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2963   enum rtx_code code = GET_CODE (operands[3]);
2964
2965   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2966      it as well as switch operands 1/2 in order to avoid the additional
2967      NOT instruction.  */
2968   if (code == NE)
2969     {
2970       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2971                                     operands[4], operands[5]);
2972       std::swap (operands[1], operands[2]);
2973     }
2974   emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
2975                                                   mask, operands[3],
2976                                                   operands[4], operands[5]));
2977   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2978                                                  operands[2], mask));
2979   DONE;
2980 })
2981
2982 ;; Patterns for AArch64 SIMD Intrinsics.
2983
2984 ;; Lane extraction with sign extension to general purpose register.
2985 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2986   [(set (match_operand:GPI 0 "register_operand" "=r")
2987         (sign_extend:GPI
2988           (vec_select:<VEL>
2989             (match_operand:VDQQH 1 "register_operand" "w")
2990             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2991   "TARGET_SIMD"
2992   {
2993     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2994     return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
2995   }
2996   [(set_attr "type" "neon_to_gp<q>")]
2997 )
2998
2999 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
3000   [(set (match_operand:SI 0 "register_operand" "=r")
3001         (zero_extend:SI
3002           (vec_select:<VEL>
3003             (match_operand:VDQQH 1 "register_operand" "w")
3004             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3005   "TARGET_SIMD"
3006   {
3007     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3008     return "umov\\t%w0, %1.<Vetype>[%2]";
3009   }
3010   [(set_attr "type" "neon_to_gp<q>")]
3011 )
3012
3013 ;; Lane extraction of a value, neither sign nor zero extension
3014 ;; is guaranteed so upper bits should be considered undefined.
3015 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3016 (define_insn "aarch64_get_lane<mode>"
3017   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
3018         (vec_select:<VEL>
3019           (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3020           (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3021   "TARGET_SIMD"
3022   {
3023     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3024     switch (which_alternative)
3025       {
3026         case 0:
3027           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3028         case 1:
3029           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3030         case 2:
3031           return "st1\\t{%1.<Vetype>}[%2], %0";
3032         default:
3033           gcc_unreachable ();
3034       }
3035   }
3036   [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3037 )
3038
3039 (define_insn "load_pair_lanes<mode>"
3040   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3041         (vec_concat:<VDBL>
3042            (match_operand:VDC 1 "memory_operand" "Utq")
3043            (match_operand:VDC 2 "memory_operand" "m")))]
3044   "TARGET_SIMD && !STRICT_ALIGNMENT
3045    && rtx_equal_p (XEXP (operands[2], 0),
3046                    plus_constant (Pmode,
3047                                   XEXP (operands[1], 0),
3048                                   GET_MODE_SIZE (<MODE>mode)))"
3049   "ldr\\t%q0, %1"
3050   [(set_attr "type" "neon_load1_1reg_q")]
3051 )
3052
3053 (define_insn "store_pair_lanes<mode>"
3054   [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Uml, Uml")
3055         (vec_concat:<VDBL>
3056            (match_operand:VDC 1 "register_operand" "w, r")
3057            (match_operand:VDC 2 "register_operand" "w, r")))]
3058   "TARGET_SIMD"
3059   "@
3060    stp\\t%d1, %d2, %y0
3061    stp\\t%x1, %x2, %y0"
3062   [(set_attr "type" "neon_stp, store_16")]
3063 )
3064
3065 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3066 ;; dest vector.
3067
3068 (define_insn "*aarch64_combinez<mode>"
3069   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3070         (vec_concat:<VDBL>
3071           (match_operand:VDC 1 "general_operand" "w,?r,m")
3072           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3073   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3074   "@
3075    mov\\t%0.8b, %1.8b
3076    fmov\t%d0, %1
3077    ldr\\t%d0, %1"
3078   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3079    (set_attr "simd" "yes,*,yes")
3080    (set_attr "fp" "*,yes,*")]
3081 )
3082
3083 (define_insn "*aarch64_combinez_be<mode>"
3084   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3085         (vec_concat:<VDBL>
3086           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3087           (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3088   "TARGET_SIMD && BYTES_BIG_ENDIAN"
3089   "@
3090    mov\\t%0.8b, %1.8b
3091    fmov\t%d0, %1
3092    ldr\\t%d0, %1"
3093   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3094    (set_attr "simd" "yes,*,yes")
3095    (set_attr "fp" "*,yes,*")]
3096 )
3097
3098 (define_expand "aarch64_combine<mode>"
3099   [(match_operand:<VDBL> 0 "register_operand")
3100    (match_operand:VDC 1 "register_operand")
3101    (match_operand:VDC 2 "register_operand")]
3102   "TARGET_SIMD"
3103 {
3104   aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3105
3106   DONE;
3107 }
3108 )
3109
3110 (define_expand "aarch64_simd_combine<mode>"
3111   [(match_operand:<VDBL> 0 "register_operand")
3112    (match_operand:VDC 1 "register_operand")
3113    (match_operand:VDC 2 "register_operand")]
3114   "TARGET_SIMD"
3115   {
3116     emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3117     emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3118     DONE;
3119   }
3120 [(set_attr "type" "multiple")]
3121 )
3122
3123 ;; <su><addsub>l<q>.
3124
3125 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3126  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3127        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3128                            (match_operand:VQW 1 "register_operand" "w")
3129                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3130                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3131                            (match_operand:VQW 2 "register_operand" "w")
3132                            (match_dup 3)))))]
3133   "TARGET_SIMD"
3134   "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3135   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3136 )
3137
3138 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3139  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3140        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3141                            (match_operand:VQW 1 "register_operand" "w")
3142                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3143                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3144                            (match_operand:VQW 2 "register_operand" "w")
3145                            (match_dup 3)))))]
3146   "TARGET_SIMD"
3147   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3148   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3149 )
3150
3151
3152 (define_expand "aarch64_saddl2<mode>"
3153   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3154    (match_operand:VQW 1 "register_operand" "w")
3155    (match_operand:VQW 2 "register_operand" "w")]
3156   "TARGET_SIMD"
3157 {
3158   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3159   emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3160                                                   operands[2], p));
3161   DONE;
3162 })
3163
3164 (define_expand "aarch64_uaddl2<mode>"
3165   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3166    (match_operand:VQW 1 "register_operand" "w")
3167    (match_operand:VQW 2 "register_operand" "w")]
3168   "TARGET_SIMD"
3169 {
3170   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3171   emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3172                                                   operands[2], p));
3173   DONE;
3174 })
3175
3176 (define_expand "aarch64_ssubl2<mode>"
3177   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3178    (match_operand:VQW 1 "register_operand" "w")
3179    (match_operand:VQW 2 "register_operand" "w")]
3180   "TARGET_SIMD"
3181 {
3182   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3183   emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3184                                                 operands[2], p));
3185   DONE;
3186 })
3187
3188 (define_expand "aarch64_usubl2<mode>"
3189   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3190    (match_operand:VQW 1 "register_operand" "w")
3191    (match_operand:VQW 2 "register_operand" "w")]
3192   "TARGET_SIMD"
3193 {
3194   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3195   emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3196                                                 operands[2], p));
3197   DONE;
3198 })
3199
3200 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3201  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3202        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3203                            (match_operand:VD_BHSI 1 "register_operand" "w"))
3204                        (ANY_EXTEND:<VWIDE>
3205                            (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3206   "TARGET_SIMD"
3207   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3208   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3209 )
3210
3211 ;; <su><addsub>w<q>.
3212
3213 (define_expand "widen_ssum<mode>3"
3214   [(set (match_operand:<VDBLW> 0 "register_operand" "")
3215         (plus:<VDBLW> (sign_extend:<VDBLW>
3216                         (match_operand:VQW 1 "register_operand" ""))
3217                       (match_operand:<VDBLW> 2 "register_operand" "")))]
3218   "TARGET_SIMD"
3219   {
3220     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3221     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3222
3223     emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3224                                                 operands[1], p));
3225     emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3226     DONE;
3227   }
3228 )
3229
3230 (define_expand "widen_ssum<mode>3"
3231   [(set (match_operand:<VWIDE> 0 "register_operand" "")
3232         (plus:<VWIDE> (sign_extend:<VWIDE>
3233                         (match_operand:VD_BHSI 1 "register_operand" ""))
3234                       (match_operand:<VWIDE> 2 "register_operand" "")))]
3235   "TARGET_SIMD"
3236 {
3237   emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3238   DONE;
3239 })
3240
3241 (define_expand "widen_usum<mode>3"
3242   [(set (match_operand:<VDBLW> 0 "register_operand" "")
3243         (plus:<VDBLW> (zero_extend:<VDBLW>
3244                         (match_operand:VQW 1 "register_operand" ""))
3245                       (match_operand:<VDBLW> 2 "register_operand" "")))]
3246   "TARGET_SIMD"
3247   {
3248     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3249     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3250
3251     emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3252                                                  operands[1], p));
3253     emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3254     DONE;
3255   }
3256 )
3257
3258 (define_expand "widen_usum<mode>3"
3259   [(set (match_operand:<VWIDE> 0 "register_operand" "")
3260         (plus:<VWIDE> (zero_extend:<VWIDE>
3261                         (match_operand:VD_BHSI 1 "register_operand" ""))
3262                       (match_operand:<VWIDE> 2 "register_operand" "")))]
3263   "TARGET_SIMD"
3264 {
3265   emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3266   DONE;
3267 })
3268
3269 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
3270   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3271         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3272                         (ANY_EXTEND:<VWIDE>
3273                           (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3274   "TARGET_SIMD"
3275   "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3276   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3277 )
3278
3279 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
3280   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3281         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3282                         (ANY_EXTEND:<VWIDE>
3283                           (vec_select:<VHALF>
3284                            (match_operand:VQW 2 "register_operand" "w")
3285                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3286   "TARGET_SIMD"
3287   "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3288   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3289 )
3290
3291 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
3292   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3293         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3294                         (ANY_EXTEND:<VWIDE>
3295                           (vec_select:<VHALF>
3296                            (match_operand:VQW 2 "register_operand" "w")
3297                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3298   "TARGET_SIMD"
3299   "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3300   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3301 )
3302
3303 (define_expand "aarch64_saddw2<mode>"
3304   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3305    (match_operand:<VWIDE> 1 "register_operand" "w")
3306    (match_operand:VQW 2 "register_operand" "w")]
3307   "TARGET_SIMD"
3308 {
3309   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3310   emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3311                                                 operands[2], p));
3312   DONE;
3313 })
3314
3315 (define_expand "aarch64_uaddw2<mode>"
3316   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3317    (match_operand:<VWIDE> 1 "register_operand" "w")
3318    (match_operand:VQW 2 "register_operand" "w")]
3319   "TARGET_SIMD"
3320 {
3321   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3322   emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3323                                                 operands[2], p));
3324   DONE;
3325 })
3326
3327
3328 (define_expand "aarch64_ssubw2<mode>"
3329   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3330    (match_operand:<VWIDE> 1 "register_operand" "w")
3331    (match_operand:VQW 2 "register_operand" "w")]
3332   "TARGET_SIMD"
3333 {
3334   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3335   emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3336                                                 operands[2], p));
3337   DONE;
3338 })
3339
3340 (define_expand "aarch64_usubw2<mode>"
3341   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3342    (match_operand:<VWIDE> 1 "register_operand" "w")
3343    (match_operand:VQW 2 "register_operand" "w")]
3344   "TARGET_SIMD"
3345 {
3346   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3347   emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3348                                                 operands[2], p));
3349   DONE;
3350 })
3351
3352 ;; <su><r>h<addsub>.
3353
3354 (define_insn "aarch64_<sur>h<addsub><mode>"
3355   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3356         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3357                       (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3358                      HADDSUB))]
3359   "TARGET_SIMD"
3360   "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3361   [(set_attr "type" "neon_<addsub>_halve<q>")]
3362 )
3363
3364 ;; <r><addsub>hn<q>.
3365
3366 (define_insn "aarch64_<sur><addsub>hn<mode>"
3367   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3368         (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3369                             (match_operand:VQN 2 "register_operand" "w")]
3370                            ADDSUBHN))]
3371   "TARGET_SIMD"
3372   "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3373   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3374 )
3375
3376 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3377   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3378         (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3379                              (match_operand:VQN 2 "register_operand" "w")
3380                              (match_operand:VQN 3 "register_operand" "w")]
3381                             ADDSUBHN2))]
3382   "TARGET_SIMD"
3383   "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3384   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3385 )
3386
3387 ;; pmul.
3388
3389 (define_insn "aarch64_pmul<mode>"
3390   [(set (match_operand:VB 0 "register_operand" "=w")
3391         (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3392                     (match_operand:VB 2 "register_operand" "w")]
3393                    UNSPEC_PMUL))]
3394  "TARGET_SIMD"
3395  "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3396   [(set_attr "type" "neon_mul_<Vetype><q>")]
3397 )
3398
3399 ;; fmulx.
3400
3401 (define_insn "aarch64_fmulx<mode>"
3402   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3403         (unspec:VHSDF_HSDF
3404           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3405            (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3406            UNSPEC_FMULX))]
3407  "TARGET_SIMD"
3408  "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3409  [(set_attr "type" "neon_fp_mul_<stype>")]
3410 )
3411
3412 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3413
3414 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3415   [(set (match_operand:VDQSF 0 "register_operand" "=w")
3416         (unspec:VDQSF
3417          [(match_operand:VDQSF 1 "register_operand" "w")
3418           (vec_duplicate:VDQSF
3419            (vec_select:<VEL>
3420             (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3421             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3422          UNSPEC_FMULX))]
3423   "TARGET_SIMD"
3424   {
3425     operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3426     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3427   }
3428   [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3429 )
3430
3431 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3432
3433 (define_insn "*aarch64_mulx_elt<mode>"
3434   [(set (match_operand:VDQF 0 "register_operand" "=w")
3435         (unspec:VDQF
3436          [(match_operand:VDQF 1 "register_operand" "w")
3437           (vec_duplicate:VDQF
3438            (vec_select:<VEL>
3439             (match_operand:VDQF 2 "register_operand" "w")
3440             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3441          UNSPEC_FMULX))]
3442   "TARGET_SIMD"
3443   {
3444     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3445     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3446   }
3447   [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3448 )
3449
3450 ;; vmulxq_lane
3451
3452 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3453   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3454         (unspec:VHSDF
3455          [(match_operand:VHSDF 1 "register_operand" "w")
3456           (vec_duplicate:VHSDF
3457             (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3458          UNSPEC_FMULX))]
3459   "TARGET_SIMD"
3460   "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3461   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3462 )
3463
3464 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3465 ;; vmulxd_lane_f64 ==  vmulx_lane_f64
3466 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3467
3468 (define_insn "*aarch64_vgetfmulx<mode>"
3469   [(set (match_operand:<VEL> 0 "register_operand" "=w")
3470         (unspec:<VEL>
3471          [(match_operand:<VEL> 1 "register_operand" "w")
3472           (vec_select:<VEL>
3473            (match_operand:VDQF 2 "register_operand" "w")
3474             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3475          UNSPEC_FMULX))]
3476   "TARGET_SIMD"
3477   {
3478     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3479     return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3480   }
3481   [(set_attr "type" "fmul<Vetype>")]
3482 )
3483 ;; <su>q<addsub>
3484
3485 (define_insn "aarch64_<su_optab><optab><mode>"
3486   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3487         (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3488                           (match_operand:VSDQ_I 2 "register_operand" "w")))]
3489   "TARGET_SIMD"
3490   "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3491   [(set_attr "type" "neon_<optab><q>")]
3492 )
3493
3494 ;; suqadd and usqadd
3495
3496 (define_insn "aarch64_<sur>qadd<mode>"
3497   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3498         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3499                         (match_operand:VSDQ_I 2 "register_operand" "w")]
3500                        USSUQADD))]
3501   "TARGET_SIMD"
3502   "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3503   [(set_attr "type" "neon_qadd<q>")]
3504 )
3505
3506 ;; sqmovun
3507
3508 (define_insn "aarch64_sqmovun<mode>"
3509   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3510         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3511                             UNSPEC_SQXTUN))]
3512    "TARGET_SIMD"
3513    "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3514    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3515 )
3516
3517 ;; sqmovn and uqmovn
3518
3519 (define_insn "aarch64_<sur>qmovn<mode>"
3520   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3521         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3522                             SUQMOVN))]
3523   "TARGET_SIMD"
3524   "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3525    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3526 )
3527
3528 ;; <su>q<absneg>
3529
3530 (define_insn "aarch64_s<optab><mode>"
3531   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3532         (UNQOPS:VSDQ_I
3533           (match_operand:VSDQ_I 1 "register_operand" "w")))]
3534   "TARGET_SIMD"
3535   "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3536   [(set_attr "type" "neon_<optab><q>")]
3537 )
3538
3539 ;; sq<r>dmulh.
3540
3541 (define_insn "aarch64_sq<r>dmulh<mode>"
3542   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3543         (unspec:VSDQ_HSI
3544           [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3545            (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3546          VQDMULH))]
3547   "TARGET_SIMD"
3548   "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3549   [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3550 )
3551
3552 ;; sq<r>dmulh_lane
3553
3554 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3555   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3556         (unspec:VDQHS
3557           [(match_operand:VDQHS 1 "register_operand" "w")
3558            (vec_select:<VEL>
3559              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3560              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3561          VQDMULH))]
3562   "TARGET_SIMD"
3563   "*
3564    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3565    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3566   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3567 )
3568
3569 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3570   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3571         (unspec:VDQHS
3572           [(match_operand:VDQHS 1 "register_operand" "w")
3573            (vec_select:<VEL>
3574              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3575              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3576          VQDMULH))]
3577   "TARGET_SIMD"
3578   "*
3579    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3580    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3581   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3582 )
3583
3584 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3585   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3586         (unspec:SD_HSI
3587           [(match_operand:SD_HSI 1 "register_operand" "w")
3588            (vec_select:<VEL>
3589              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3590              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3591          VQDMULH))]
3592   "TARGET_SIMD"
3593   "*
3594    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3595    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3596   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3597 )
3598
3599 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3600   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3601         (unspec:SD_HSI
3602           [(match_operand:SD_HSI 1 "register_operand" "w")
3603            (vec_select:<VEL>
3604              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3605              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3606          VQDMULH))]
3607   "TARGET_SIMD"
3608   "*
3609    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3610    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3611   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3612 )
3613
3614 ;; sqrdml[as]h.
3615
3616 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3617   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3618         (unspec:VSDQ_HSI
3619           [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3620            (match_operand:VSDQ_HSI 2 "register_operand" "w")
3621            (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3622           SQRDMLH_AS))]
3623    "TARGET_SIMD_RDMA"
3624    "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3625    [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3626 )
3627
3628 ;; sqrdml[as]h_lane.
3629
3630 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3631   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3632         (unspec:VDQHS
3633           [(match_operand:VDQHS 1 "register_operand" "0")
3634            (match_operand:VDQHS 2 "register_operand" "w")
3635            (vec_select:<VEL>
3636              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3637              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3638           SQRDMLH_AS))]
3639    "TARGET_SIMD_RDMA"
3640    {
3641      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3642      return
3643       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3644    }
3645    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3646 )
3647
3648 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3649   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3650         (unspec:SD_HSI
3651           [(match_operand:SD_HSI 1 "register_operand" "0")
3652            (match_operand:SD_HSI 2 "register_operand" "w")
3653            (vec_select:<VEL>
3654              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3655              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3656           SQRDMLH_AS))]
3657    "TARGET_SIMD_RDMA"
3658    {
3659      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3660      return
3661       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3662    }
3663    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3664 )
3665
3666 ;; sqrdml[as]h_laneq.
3667
3668 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3669   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3670         (unspec:VDQHS
3671           [(match_operand:VDQHS 1 "register_operand" "0")
3672            (match_operand:VDQHS 2 "register_operand" "w")
3673            (vec_select:<VEL>
3674              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3675              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3676           SQRDMLH_AS))]
3677    "TARGET_SIMD_RDMA"
3678    {
3679      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3680      return
3681       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3682    }
3683    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3684 )
3685
3686 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3687   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3688         (unspec:SD_HSI
3689           [(match_operand:SD_HSI 1 "register_operand" "0")
3690            (match_operand:SD_HSI 2 "register_operand" "w")
3691            (vec_select:<VEL>
3692              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3693              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3694           SQRDMLH_AS))]
3695    "TARGET_SIMD_RDMA"
3696    {
3697      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3698      return
3699       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3700    }
3701    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3702 )
3703
3704 ;; vqdml[sa]l
3705
3706 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3707   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3708         (SBINQOPS:<VWIDE>
3709           (match_operand:<VWIDE> 1 "register_operand" "0")
3710           (ss_ashift:<VWIDE>
3711               (mult:<VWIDE>
3712                 (sign_extend:<VWIDE>
3713                       (match_operand:VSD_HSI 2 "register_operand" "w"))
3714                 (sign_extend:<VWIDE>
3715                       (match_operand:VSD_HSI 3 "register_operand" "w")))
3716               (const_int 1))))]
3717   "TARGET_SIMD"
3718   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3719   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3720 )
3721
3722 ;; vqdml[sa]l_lane
3723
3724 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3725   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3726         (SBINQOPS:<VWIDE>
3727           (match_operand:<VWIDE> 1 "register_operand" "0")
3728           (ss_ashift:<VWIDE>
3729             (mult:<VWIDE>
3730               (sign_extend:<VWIDE>
3731                 (match_operand:VD_HSI 2 "register_operand" "w"))
3732               (sign_extend:<VWIDE>
3733                 (vec_duplicate:VD_HSI
3734                   (vec_select:<VEL>
3735                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3736                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3737               ))
3738             (const_int 1))))]
3739   "TARGET_SIMD"
3740   {
3741     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3742     return
3743       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3744   }
3745   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3746 )
3747
3748 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3749   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3750         (SBINQOPS:<VWIDE>
3751           (match_operand:<VWIDE> 1 "register_operand" "0")
3752           (ss_ashift:<VWIDE>
3753             (mult:<VWIDE>
3754               (sign_extend:<VWIDE>
3755                 (match_operand:VD_HSI 2 "register_operand" "w"))
3756               (sign_extend:<VWIDE>
3757                 (vec_duplicate:VD_HSI
3758                   (vec_select:<VEL>
3759                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3760                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3761               ))
3762             (const_int 1))))]
3763   "TARGET_SIMD"
3764   {
3765     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3766     return
3767       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3768   }
3769   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3770 )
3771
3772 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3773   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3774         (SBINQOPS:<VWIDE>
3775           (match_operand:<VWIDE> 1 "register_operand" "0")
3776           (ss_ashift:<VWIDE>
3777             (mult:<VWIDE>
3778               (sign_extend:<VWIDE>
3779                 (match_operand:SD_HSI 2 "register_operand" "w"))
3780               (sign_extend:<VWIDE>
3781                 (vec_select:<VEL>
3782                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3783                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3784               )
3785             (const_int 1))))]
3786   "TARGET_SIMD"
3787   {
3788     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3789     return
3790       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3791   }
3792   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3793 )
3794
3795 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3796   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3797         (SBINQOPS:<VWIDE>
3798           (match_operand:<VWIDE> 1 "register_operand" "0")
3799           (ss_ashift:<VWIDE>
3800             (mult:<VWIDE>
3801               (sign_extend:<VWIDE>
3802                 (match_operand:SD_HSI 2 "register_operand" "w"))
3803               (sign_extend:<VWIDE>
3804                 (vec_select:<VEL>
3805                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3806                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3807               )
3808             (const_int 1))))]
3809   "TARGET_SIMD"
3810   {
3811     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3812     return
3813       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3814   }
3815   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3816 )
3817
3818 ;; vqdml[sa]l_n
3819
3820 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3821   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3822         (SBINQOPS:<VWIDE>
3823           (match_operand:<VWIDE> 1 "register_operand" "0")
3824           (ss_ashift:<VWIDE>
3825               (mult:<VWIDE>
3826                 (sign_extend:<VWIDE>
3827                       (match_operand:VD_HSI 2 "register_operand" "w"))
3828                 (sign_extend:<VWIDE>
3829                   (vec_duplicate:VD_HSI
3830                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3831               (const_int 1))))]
3832   "TARGET_SIMD"
3833   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3834   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3835 )
3836
3837 ;; sqdml[as]l2
3838
3839 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3840   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3841         (SBINQOPS:<VWIDE>
3842          (match_operand:<VWIDE> 1 "register_operand" "0")
3843          (ss_ashift:<VWIDE>
3844              (mult:<VWIDE>
3845                (sign_extend:<VWIDE>
3846                  (vec_select:<VHALF>
3847                      (match_operand:VQ_HSI 2 "register_operand" "w")
3848                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3849                (sign_extend:<VWIDE>
3850                  (vec_select:<VHALF>
3851                      (match_operand:VQ_HSI 3 "register_operand" "w")
3852                      (match_dup 4))))
3853              (const_int 1))))]
3854   "TARGET_SIMD"
3855   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3856   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3857 )
3858
3859 (define_expand "aarch64_sqdmlal2<mode>"
3860   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3861    (match_operand:<VWIDE> 1 "register_operand" "w")
3862    (match_operand:VQ_HSI 2 "register_operand" "w")
3863    (match_operand:VQ_HSI 3 "register_operand" "w")]
3864   "TARGET_SIMD"
3865 {
3866   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3867   emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3868                                                   operands[2], operands[3], p));
3869   DONE;
3870 })
3871
3872 (define_expand "aarch64_sqdmlsl2<mode>"
3873   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3874    (match_operand:<VWIDE> 1 "register_operand" "w")
3875    (match_operand:VQ_HSI 2 "register_operand" "w")
3876    (match_operand:VQ_HSI 3 "register_operand" "w")]
3877   "TARGET_SIMD"
3878 {
3879   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3880   emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3881                                                   operands[2], operands[3], p));
3882   DONE;
3883 })
3884
3885 ;; vqdml[sa]l2_lane
3886
3887 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3888   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3889         (SBINQOPS:<VWIDE>
3890           (match_operand:<VWIDE> 1 "register_operand" "0")
3891           (ss_ashift:<VWIDE>
3892               (mult:<VWIDE>
3893                 (sign_extend:<VWIDE>
3894                   (vec_select:<VHALF>
3895                     (match_operand:VQ_HSI 2 "register_operand" "w")
3896                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3897                 (sign_extend:<VWIDE>
3898                   (vec_duplicate:<VHALF>
3899                     (vec_select:<VEL>
3900                       (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3901                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3902                     ))))
3903               (const_int 1))))]
3904   "TARGET_SIMD"
3905   {
3906     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3907     return
3908      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3909   }
3910   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3911 )
3912
3913 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3914   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3915         (SBINQOPS:<VWIDE>
3916           (match_operand:<VWIDE> 1 "register_operand" "0")
3917           (ss_ashift:<VWIDE>
3918               (mult:<VWIDE>
3919                 (sign_extend:<VWIDE>
3920                   (vec_select:<VHALF>
3921                     (match_operand:VQ_HSI 2 "register_operand" "w")
3922                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3923                 (sign_extend:<VWIDE>
3924                   (vec_duplicate:<VHALF>
3925                     (vec_select:<VEL>
3926                       (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3927                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3928                     ))))
3929               (const_int 1))))]
3930   "TARGET_SIMD"
3931   {
3932     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3933     return
3934      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3935   }
3936   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3937 )
3938
3939 (define_expand "aarch64_sqdmlal2_lane<mode>"
3940   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3941    (match_operand:<VWIDE> 1 "register_operand" "w")
3942    (match_operand:VQ_HSI 2 "register_operand" "w")
3943    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3944    (match_operand:SI 4 "immediate_operand" "i")]
3945   "TARGET_SIMD"
3946 {
3947   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3948   emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3949                                                        operands[2], operands[3],
3950                                                        operands[4], p));
3951   DONE;
3952 })
3953
3954 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3955   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3956    (match_operand:<VWIDE> 1 "register_operand" "w")
3957    (match_operand:VQ_HSI 2 "register_operand" "w")
3958    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3959    (match_operand:SI 4 "immediate_operand" "i")]
3960   "TARGET_SIMD"
3961 {
3962   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3963   emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3964                                                        operands[2], operands[3],
3965                                                        operands[4], p));
3966   DONE;
3967 })
3968
3969 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3970   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3971    (match_operand:<VWIDE> 1 "register_operand" "w")
3972    (match_operand:VQ_HSI 2 "register_operand" "w")
3973    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3974    (match_operand:SI 4 "immediate_operand" "i")]
3975   "TARGET_SIMD"
3976 {
3977   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3978   emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3979                                                        operands[2], operands[3],
3980                                                        operands[4], p));
3981   DONE;
3982 })
3983
3984 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3985   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3986    (match_operand:<VWIDE> 1 "register_operand" "w")
3987    (match_operand:VQ_HSI 2 "register_operand" "w")
3988    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3989    (match_operand:SI 4 "immediate_operand" "i")]
3990   "TARGET_SIMD"
3991 {
3992   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3993   emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
3994                                                        operands[2], operands[3],
3995                                                        operands[4], p));
3996   DONE;
3997 })
3998
3999 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4000   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4001         (SBINQOPS:<VWIDE>
4002           (match_operand:<VWIDE> 1 "register_operand" "0")
4003           (ss_ashift:<VWIDE>
4004             (mult:<VWIDE>
4005               (sign_extend:<VWIDE>
4006                 (vec_select:<VHALF>
4007                   (match_operand:VQ_HSI 2 "register_operand" "w")
4008                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4009               (sign_extend:<VWIDE>
4010                 (vec_duplicate:<VHALF>
4011                   (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4012             (const_int 1))))]
4013   "TARGET_SIMD"
4014   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4015   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4016 )
4017
4018 (define_expand "aarch64_sqdmlal2_n<mode>"
4019   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4020    (match_operand:<VWIDE> 1 "register_operand" "w")
4021    (match_operand:VQ_HSI 2 "register_operand" "w")
4022    (match_operand:<VEL> 3 "register_operand" "w")]
4023   "TARGET_SIMD"
4024 {
4025   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4026   emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4027                                                     operands[2], operands[3],
4028                                                     p));
4029   DONE;
4030 })
4031
4032 (define_expand "aarch64_sqdmlsl2_n<mode>"
4033   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4034    (match_operand:<VWIDE> 1 "register_operand" "w")
4035    (match_operand:VQ_HSI 2 "register_operand" "w")
4036    (match_operand:<VEL> 3 "register_operand" "w")]
4037   "TARGET_SIMD"
4038 {
4039   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4040   emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4041                                                     operands[2], operands[3],
4042                                                     p));
4043   DONE;
4044 })
4045
4046 ;; vqdmull
4047
4048 (define_insn "aarch64_sqdmull<mode>"
4049   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4050         (ss_ashift:<VWIDE>
4051              (mult:<VWIDE>
4052                (sign_extend:<VWIDE>
4053                      (match_operand:VSD_HSI 1 "register_operand" "w"))
4054                (sign_extend:<VWIDE>
4055                      (match_operand:VSD_HSI 2 "register_operand" "w")))
4056              (const_int 1)))]
4057   "TARGET_SIMD"
4058   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4059   [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4060 )
4061
4062 ;; vqdmull_lane
4063
4064 (define_insn "aarch64_sqdmull_lane<mode>"
4065   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4066         (ss_ashift:<VWIDE>
4067              (mult:<VWIDE>
4068                (sign_extend:<VWIDE>
4069                  (match_operand:VD_HSI 1 "register_operand" "w"))
4070                (sign_extend:<VWIDE>
4071                  (vec_duplicate:VD_HSI
4072                    (vec_select:<VEL>
4073                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4074                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4075                ))
4076              (const_int 1)))]
4077   "TARGET_SIMD"
4078   {
4079     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4080     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4081   }
4082   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4083 )
4084
4085 (define_insn "aarch64_sqdmull_laneq<mode>"
4086   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4087         (ss_ashift:<VWIDE>
4088              (mult:<VWIDE>
4089                (sign_extend:<VWIDE>
4090                  (match_operand:VD_HSI 1 "register_operand" "w"))
4091                (sign_extend:<VWIDE>
4092                  (vec_duplicate:VD_HSI
4093                    (vec_select:<VEL>
4094                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4095                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4096                ))
4097              (const_int 1)))]
4098   "TARGET_SIMD"
4099   {
4100     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4101     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4102   }
4103   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4104 )
4105
4106 (define_insn "aarch64_sqdmull_lane<mode>"
4107   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4108         (ss_ashift:<VWIDE>
4109              (mult:<VWIDE>
4110                (sign_extend:<VWIDE>
4111                  (match_operand:SD_HSI 1 "register_operand" "w"))
4112                (sign_extend:<VWIDE>
4113                  (vec_select:<VEL>
4114                    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4115                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4116                ))
4117              (const_int 1)))]
4118   "TARGET_SIMD"
4119   {
4120     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4121     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4122   }
4123   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4124 )
4125
4126 (define_insn "aarch64_sqdmull_laneq<mode>"
4127   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4128         (ss_ashift:<VWIDE>
4129              (mult:<VWIDE>
4130                (sign_extend:<VWIDE>
4131                  (match_operand:SD_HSI 1 "register_operand" "w"))
4132                (sign_extend:<VWIDE>
4133                  (vec_select:<VEL>
4134                    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4135                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4136                ))
4137              (const_int 1)))]
4138   "TARGET_SIMD"
4139   {
4140     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4141     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4142   }
4143   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4144 )
4145
4146 ;; vqdmull_n
4147
4148 (define_insn "aarch64_sqdmull_n<mode>"
4149   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4150         (ss_ashift:<VWIDE>
4151              (mult:<VWIDE>
4152                (sign_extend:<VWIDE>
4153                  (match_operand:VD_HSI 1 "register_operand" "w"))
4154                (sign_extend:<VWIDE>
4155                  (vec_duplicate:VD_HSI
4156                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4157                )
4158              (const_int 1)))]
4159   "TARGET_SIMD"
4160   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4161   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4162 )
4163
4164 ;; vqdmull2
4165
4166
4167
4168 (define_insn "aarch64_sqdmull2<mode>_internal"
4169   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4170         (ss_ashift:<VWIDE>
4171              (mult:<VWIDE>
4172                (sign_extend:<VWIDE>
4173                  (vec_select:<VHALF>
4174                    (match_operand:VQ_HSI 1 "register_operand" "w")
4175                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4176                (sign_extend:<VWIDE>
4177                  (vec_select:<VHALF>
4178                    (match_operand:VQ_HSI 2 "register_operand" "w")
4179                    (match_dup 3)))
4180                )
4181              (const_int 1)))]
4182   "TARGET_SIMD"
4183   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4184   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4185 )
4186
4187 (define_expand "aarch64_sqdmull2<mode>"
4188   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4189    (match_operand:VQ_HSI 1 "register_operand" "w")
4190    (match_operand:VQ_HSI 2 "register_operand" "w")]
4191   "TARGET_SIMD"
4192 {
4193   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4194   emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4195                                                   operands[2], p));
4196   DONE;
4197 })
4198
4199 ;; vqdmull2_lane
4200
4201 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4202   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4203         (ss_ashift:<VWIDE>
4204              (mult:<VWIDE>
4205                (sign_extend:<VWIDE>
4206                  (vec_select:<VHALF>
4207                    (match_operand:VQ_HSI 1 "register_operand" "w")
4208                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4209                (sign_extend:<VWIDE>
4210                  (vec_duplicate:<VHALF>
4211                    (vec_select:<VEL>
4212                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4213                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4214                ))
4215              (const_int 1)))]
4216   "TARGET_SIMD"
4217   {
4218     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4219     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4220   }
4221   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4222 )
4223
4224 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4225   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4226         (ss_ashift:<VWIDE>
4227              (mult:<VWIDE>
4228                (sign_extend:<VWIDE>
4229                  (vec_select:<VHALF>
4230                    (match_operand:VQ_HSI 1 "register_operand" "w")
4231                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4232                (sign_extend:<VWIDE>
4233                  (vec_duplicate:<VHALF>
4234                    (vec_select:<VEL>
4235                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4236                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4237                ))
4238              (const_int 1)))]
4239   "TARGET_SIMD"
4240   {
4241     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4242     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4243   }
4244   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4245 )
4246
4247 (define_expand "aarch64_sqdmull2_lane<mode>"
4248   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4249    (match_operand:VQ_HSI 1 "register_operand" "w")
4250    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4251    (match_operand:SI 3 "immediate_operand" "i")]
4252   "TARGET_SIMD"
4253 {
4254   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4255   emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4256                                                        operands[2], operands[3],
4257                                                        p));
4258   DONE;
4259 })
4260
4261 (define_expand "aarch64_sqdmull2_laneq<mode>"
4262   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4263    (match_operand:VQ_HSI 1 "register_operand" "w")
4264    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4265    (match_operand:SI 3 "immediate_operand" "i")]
4266   "TARGET_SIMD"
4267 {
4268   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4269   emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4270                                                        operands[2], operands[3],
4271                                                        p));
4272   DONE;
4273 })
4274
4275 ;; vqdmull2_n
4276
4277 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4278   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4279         (ss_ashift:<VWIDE>
4280              (mult:<VWIDE>
4281                (sign_extend:<VWIDE>
4282                  (vec_select:<VHALF>
4283                    (match_operand:VQ_HSI 1 "register_operand" "w")
4284                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4285                (sign_extend:<VWIDE>
4286                  (vec_duplicate:<VHALF>
4287                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4288                )
4289              (const_int 1)))]
4290   "TARGET_SIMD"
4291   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4292   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4293 )
4294
4295 (define_expand "aarch64_sqdmull2_n<mode>"
4296   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4297    (match_operand:VQ_HSI 1 "register_operand" "w")
4298    (match_operand:<VEL> 2 "register_operand" "w")]
4299   "TARGET_SIMD"
4300 {
4301   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4302   emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4303                                                     operands[2], p));
4304   DONE;
4305 })
4306
4307 ;; vshl
4308
4309 (define_insn "aarch64_<sur>shl<mode>"
4310   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4311         (unspec:VSDQ_I_DI
4312           [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4313            (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4314          VSHL))]
4315   "TARGET_SIMD"
4316   "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4317   [(set_attr "type" "neon_shift_reg<q>")]
4318 )
4319
4320
4321 ;; vqshl
4322
4323 (define_insn "aarch64_<sur>q<r>shl<mode>"
4324   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4325         (unspec:VSDQ_I
4326           [(match_operand:VSDQ_I 1 "register_operand" "w")
4327            (match_operand:VSDQ_I 2 "register_operand" "w")]
4328          VQSHL))]
4329   "TARGET_SIMD"
4330   "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4331   [(set_attr "type" "neon_sat_shift_reg<q>")]
4332 )
4333
4334 ;; vshll_n
4335
4336 (define_insn "aarch64_<sur>shll_n<mode>"
4337   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4338         (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4339                          (match_operand:SI 2
4340                            "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4341                          VSHLL))]
4342   "TARGET_SIMD"
4343   {
4344     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4345       return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4346     else
4347       return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4348   }
4349   [(set_attr "type" "neon_shift_imm_long")]
4350 )
4351
4352 ;; vshll_high_n
4353
4354 (define_insn "aarch64_<sur>shll2_n<mode>"
4355   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4356         (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4357                          (match_operand:SI 2 "immediate_operand" "i")]
4358                          VSHLL))]
4359   "TARGET_SIMD"
4360   {
4361     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4362       return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4363     else
4364       return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4365   }
4366   [(set_attr "type" "neon_shift_imm_long")]
4367 )
4368
4369 ;; vrshr_n
4370
4371 (define_insn "aarch64_<sur>shr_n<mode>"
4372   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4373         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4374                            (match_operand:SI 2
4375                              "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4376                           VRSHR_N))]
4377   "TARGET_SIMD"
4378   "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4379   [(set_attr "type" "neon_sat_shift_imm<q>")]
4380 )
4381
4382 ;; v(r)sra_n
4383
4384 (define_insn "aarch64_<sur>sra_n<mode>"
4385   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4386         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4387                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4388                        (match_operand:SI 3
4389                          "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4390                       VSRA))]
4391   "TARGET_SIMD"
4392   "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4393   [(set_attr "type" "neon_shift_acc<q>")]
4394 )
4395
4396 ;; vs<lr>i_n
4397
4398 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4399   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4400         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4401                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4402                        (match_operand:SI 3
4403                          "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4404                       VSLRI))]
4405   "TARGET_SIMD"
4406   "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4407   [(set_attr "type" "neon_shift_imm<q>")]
4408 )
4409
4410 ;; vqshl(u)
4411
4412 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4413   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4414         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4415                        (match_operand:SI 2
4416                          "aarch64_simd_shift_imm_<ve_mode>" "i")]
4417                       VQSHL_N))]
4418   "TARGET_SIMD"
4419   "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4420   [(set_attr "type" "neon_sat_shift_imm<q>")]
4421 )
4422
4423
4424 ;; vq(r)shr(u)n_n
4425
4426 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4427   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4428         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4429                             (match_operand:SI 2
4430                               "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4431                            VQSHRN_N))]
4432   "TARGET_SIMD"
4433   "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4434   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4435 )
4436
4437
4438 ;; cm(eq|ge|gt|lt|le)
4439 ;; Note, we have constraints for Dz and Z as different expanders
4440 ;; have different ideas of what should be passed to this pattern.
4441
4442 (define_insn "aarch64_cm<optab><mode>"
4443   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4444         (neg:<V_INT_EQUIV>
4445           (COMPARISONS:<V_INT_EQUIV>
4446             (match_operand:VDQ_I 1 "register_operand" "w,w")
4447             (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4448           )))]
4449   "TARGET_SIMD"
4450   "@
4451   cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4452   cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4453   [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4454 )
4455
4456 (define_insn_and_split "aarch64_cm<optab>di"
4457   [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4458         (neg:DI
4459           (COMPARISONS:DI
4460             (match_operand:DI 1 "register_operand" "w,w,r")
4461             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4462           )))
4463      (clobber (reg:CC CC_REGNUM))]
4464   "TARGET_SIMD"
4465   "#"
4466   "&& reload_completed"
4467   [(set (match_operand:DI 0 "register_operand")
4468         (neg:DI
4469           (COMPARISONS:DI
4470             (match_operand:DI 1 "register_operand")
4471             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4472           )))]
4473   {
4474     /* If we are in the general purpose register file,
4475        we split to a sequence of comparison and store.  */
4476     if (GP_REGNUM_P (REGNO (operands[0]))
4477         && GP_REGNUM_P (REGNO (operands[1])))
4478       {
4479         machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4480         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4481         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4482         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4483         DONE;
4484       }
4485     /* Otherwise, we expand to a similar pattern which does not
4486        clobber CC_REGNUM.  */
4487   }
4488   [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4489 )
4490
4491 (define_insn "*aarch64_cm<optab>di"
4492   [(set (match_operand:DI 0 "register_operand" "=w,w")
4493         (neg:DI
4494           (COMPARISONS:DI
4495             (match_operand:DI 1 "register_operand" "w,w")
4496             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4497           )))]
4498   "TARGET_SIMD && reload_completed"
4499   "@
4500   cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4501   cm<optab>\t%d0, %d1, #0"
4502   [(set_attr "type" "neon_compare, neon_compare_zero")]
4503 )
4504
4505 ;; cm(hs|hi)
4506
4507 (define_insn "aarch64_cm<optab><mode>"
4508   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4509         (neg:<V_INT_EQUIV>
4510           (UCOMPARISONS:<V_INT_EQUIV>
4511             (match_operand:VDQ_I 1 "register_operand" "w")
4512             (match_operand:VDQ_I 2 "register_operand" "w")
4513           )))]
4514   "TARGET_SIMD"
4515   "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4516   [(set_attr "type" "neon_compare<q>")]
4517 )
4518
4519 (define_insn_and_split "aarch64_cm<optab>di"
4520   [(set (match_operand:DI 0 "register_operand" "=w,r")
4521         (neg:DI
4522           (UCOMPARISONS:DI
4523             (match_operand:DI 1 "register_operand" "w,r")
4524             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4525           )))
4526     (clobber (reg:CC CC_REGNUM))]
4527   "TARGET_SIMD"
4528   "#"
4529   "&& reload_completed"
4530   [(set (match_operand:DI 0 "register_operand")
4531         (neg:DI
4532           (UCOMPARISONS:DI
4533             (match_operand:DI 1 "register_operand")
4534             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4535           )))]
4536   {
4537     /* If we are in the general purpose register file,
4538        we split to a sequence of comparison and store.  */
4539     if (GP_REGNUM_P (REGNO (operands[0]))
4540         && GP_REGNUM_P (REGNO (operands[1])))
4541       {
4542         machine_mode mode = CCmode;
4543         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4544         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4545         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4546         DONE;
4547       }
4548     /* Otherwise, we expand to a similar pattern which does not
4549        clobber CC_REGNUM.  */
4550   }
4551   [(set_attr "type" "neon_compare,multiple")]
4552 )
4553
4554 (define_insn "*aarch64_cm<optab>di"
4555   [(set (match_operand:DI 0 "register_operand" "=w")
4556         (neg:DI
4557           (UCOMPARISONS:DI
4558             (match_operand:DI 1 "register_operand" "w")
4559             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4560           )))]
4561   "TARGET_SIMD && reload_completed"
4562   "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4563   [(set_attr "type" "neon_compare")]
4564 )
4565
4566 ;; cmtst
4567
4568 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4569 ;; we don't have any insns using ne, and aarch64_vcond outputs
4570 ;; not (neg (eq (and x y) 0))
4571 ;; which is rewritten by simplify_rtx as
4572 ;; plus (eq (and x y) 0) -1.
4573
4574 (define_insn "aarch64_cmtst<mode>"
4575   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4576         (plus:<V_INT_EQUIV>
4577           (eq:<V_INT_EQUIV>
4578             (and:VDQ_I
4579               (match_operand:VDQ_I 1 "register_operand" "w")
4580               (match_operand:VDQ_I 2 "register_operand" "w"))
4581             (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4582           (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4583   ]
4584   "TARGET_SIMD"
4585   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4586   [(set_attr "type" "neon_tst<q>")]
4587 )
4588
4589 (define_insn_and_split "aarch64_cmtstdi"
4590   [(set (match_operand:DI 0 "register_operand" "=w,r")
4591         (neg:DI
4592           (ne:DI
4593             (and:DI
4594               (match_operand:DI 1 "register_operand" "w,r")
4595               (match_operand:DI 2 "register_operand" "w,r"))
4596             (const_int 0))))
4597     (clobber (reg:CC CC_REGNUM))]
4598   "TARGET_SIMD"
4599   "#"
4600   "&& reload_completed"
4601   [(set (match_operand:DI 0 "register_operand")
4602         (neg:DI
4603           (ne:DI
4604             (and:DI
4605               (match_operand:DI 1 "register_operand")
4606               (match_operand:DI 2 "register_operand"))
4607             (const_int 0))))]
4608   {
4609     /* If we are in the general purpose register file,
4610        we split to a sequence of comparison and store.  */
4611     if (GP_REGNUM_P (REGNO (operands[0]))
4612         && GP_REGNUM_P (REGNO (operands[1])))
4613       {
4614         rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4615         machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4616         rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4617         rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4618         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4619         DONE;
4620       }
4621     /* Otherwise, we expand to a similar pattern which does not
4622        clobber CC_REGNUM.  */
4623   }
4624   [(set_attr "type" "neon_tst,multiple")]
4625 )
4626
4627 (define_insn "*aarch64_cmtstdi"
4628   [(set (match_operand:DI 0 "register_operand" "=w")
4629         (neg:DI
4630           (ne:DI
4631             (and:DI
4632               (match_operand:DI 1 "register_operand" "w")
4633               (match_operand:DI 2 "register_operand" "w"))
4634             (const_int 0))))]
4635   "TARGET_SIMD"
4636   "cmtst\t%d0, %d1, %d2"
4637   [(set_attr "type" "neon_tst")]
4638 )
4639
4640 ;; fcm(eq|ge|gt|le|lt)
4641
4642 (define_insn "aarch64_cm<optab><mode>"
4643   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4644         (neg:<V_INT_EQUIV>
4645           (COMPARISONS:<V_INT_EQUIV>
4646             (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4647             (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4648           )))]
4649   "TARGET_SIMD"
4650   "@
4651   fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4652   fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4653   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4654 )
4655
4656 ;; fac(ge|gt)
4657 ;; Note we can also handle what would be fac(le|lt) by
4658 ;; generating fac(ge|gt).
4659
4660 (define_insn "aarch64_fac<optab><mode>"
4661   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4662         (neg:<V_INT_EQUIV>
4663           (FAC_COMPARISONS:<V_INT_EQUIV>
4664             (abs:VHSDF_HSDF
4665               (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4666             (abs:VHSDF_HSDF
4667               (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4668   )))]
4669   "TARGET_SIMD"
4670   "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4671   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4672 )
4673
4674 ;; addp
4675
4676 (define_insn "aarch64_addp<mode>"
4677   [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4678         (unspec:VD_BHSI
4679           [(match_operand:VD_BHSI 1 "register_operand" "w")
4680            (match_operand:VD_BHSI 2 "register_operand" "w")]
4681           UNSPEC_ADDP))]
4682   "TARGET_SIMD"
4683   "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4684   [(set_attr "type" "neon_reduc_add<q>")]
4685 )
4686
4687 (define_insn "aarch64_addpdi"
4688   [(set (match_operand:DI 0 "register_operand" "=w")
4689         (unspec:DI
4690           [(match_operand:V2DI 1 "register_operand" "w")]
4691           UNSPEC_ADDP))]
4692   "TARGET_SIMD"
4693   "addp\t%d0, %1.2d"
4694   [(set_attr "type" "neon_reduc_add")]
4695 )
4696
4697 ;; sqrt
4698
4699 (define_expand "sqrt<mode>2"
4700   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4701         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4702   "TARGET_SIMD"
4703 {
4704   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4705     DONE;
4706 })
4707
4708 (define_insn "*sqrt<mode>2"
4709   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4710         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4711   "TARGET_SIMD"
4712   "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4713   [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4714 )
4715
4716 ;; Patterns for vector struct loads and stores.
4717
4718 (define_insn "aarch64_simd_ld2<mode>"
4719   [(set (match_operand:OI 0 "register_operand" "=w")
4720         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4721                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4722                    UNSPEC_LD2))]
4723   "TARGET_SIMD"
4724   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4725   [(set_attr "type" "neon_load2_2reg<q>")]
4726 )
4727
4728 (define_insn "aarch64_simd_ld2r<mode>"
4729   [(set (match_operand:OI 0 "register_operand" "=w")
4730        (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4731                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4732                   UNSPEC_LD2_DUP))]
4733   "TARGET_SIMD"
4734   "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4735   [(set_attr "type" "neon_load2_all_lanes<q>")]
4736 )
4737
4738 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4739   [(set (match_operand:OI 0 "register_operand" "=w")
4740         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4741                     (match_operand:OI 2 "register_operand" "0")
4742                     (match_operand:SI 3 "immediate_operand" "i")
4743                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4744                    UNSPEC_LD2_LANE))]
4745   "TARGET_SIMD"
4746   {
4747     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4748     return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4749   }
4750   [(set_attr "type" "neon_load2_one_lane")]
4751 )
4752
4753 (define_expand "vec_load_lanesoi<mode>"
4754   [(set (match_operand:OI 0 "register_operand" "=w")
4755         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4756                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4757                    UNSPEC_LD2))]
4758   "TARGET_SIMD"
4759 {
4760   if (BYTES_BIG_ENDIAN)
4761     {
4762       rtx tmp = gen_reg_rtx (OImode);
4763       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4764       emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4765       emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4766     }
4767   else
4768     emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4769   DONE;
4770 })
4771
4772 (define_insn "aarch64_simd_st2<mode>"
4773   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4774         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4775                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4776                    UNSPEC_ST2))]
4777   "TARGET_SIMD"
4778   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4779   [(set_attr "type" "neon_store2_2reg<q>")]
4780 )
4781
4782 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4783 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4784   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4785         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4786                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4787                     (match_operand:SI 2 "immediate_operand" "i")]
4788                    UNSPEC_ST2_LANE))]
4789   "TARGET_SIMD"
4790   {
4791     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4792     return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4793   }
4794   [(set_attr "type" "neon_store2_one_lane<q>")]
4795 )
4796
4797 (define_expand "vec_store_lanesoi<mode>"
4798   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4799         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4800                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4801                    UNSPEC_ST2))]
4802   "TARGET_SIMD"
4803 {
4804   if (BYTES_BIG_ENDIAN)
4805     {
4806       rtx tmp = gen_reg_rtx (OImode);
4807       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4808       emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4809       emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4810     }
4811   else
4812     emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4813   DONE;
4814 })
4815
4816 (define_insn "aarch64_simd_ld3<mode>"
4817   [(set (match_operand:CI 0 "register_operand" "=w")
4818         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4819                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4820                    UNSPEC_LD3))]
4821   "TARGET_SIMD"
4822   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4823   [(set_attr "type" "neon_load3_3reg<q>")]
4824 )
4825
4826 (define_insn "aarch64_simd_ld3r<mode>"
4827   [(set (match_operand:CI 0 "register_operand" "=w")
4828        (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4829                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4830                   UNSPEC_LD3_DUP))]
4831   "TARGET_SIMD"
4832   "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4833   [(set_attr "type" "neon_load3_all_lanes<q>")]
4834 )
4835
4836 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4837   [(set (match_operand:CI 0 "register_operand" "=w")
4838         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4839                     (match_operand:CI 2 "register_operand" "0")
4840                     (match_operand:SI 3 "immediate_operand" "i")
4841                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4842                    UNSPEC_LD3_LANE))]
4843   "TARGET_SIMD"
4844 {
4845     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4846     return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4847 }
4848   [(set_attr "type" "neon_load3_one_lane")]
4849 )
4850
4851 (define_expand "vec_load_lanesci<mode>"
4852   [(set (match_operand:CI 0 "register_operand" "=w")
4853         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4854                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4855                    UNSPEC_LD3))]
4856   "TARGET_SIMD"
4857 {
4858   if (BYTES_BIG_ENDIAN)
4859     {
4860       rtx tmp = gen_reg_rtx (CImode);
4861       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4862       emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4863       emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4864     }
4865   else
4866     emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4867   DONE;
4868 })
4869
4870 (define_insn "aarch64_simd_st3<mode>"
4871   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4872         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4873                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4874                    UNSPEC_ST3))]
4875   "TARGET_SIMD"
4876   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4877   [(set_attr "type" "neon_store3_3reg<q>")]
4878 )
4879
4880 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4881 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4882   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4883         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4884                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4885                      (match_operand:SI 2 "immediate_operand" "i")]
4886                     UNSPEC_ST3_LANE))]
4887   "TARGET_SIMD"
4888   {
4889     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4890     return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4891   }
4892   [(set_attr "type" "neon_store3_one_lane<q>")]
4893 )
4894
4895 (define_expand "vec_store_lanesci<mode>"
4896   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4897         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4898                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4899                    UNSPEC_ST3))]
4900   "TARGET_SIMD"
4901 {
4902   if (BYTES_BIG_ENDIAN)
4903     {
4904       rtx tmp = gen_reg_rtx (CImode);
4905       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4906       emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4907       emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4908     }
4909   else
4910     emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4911   DONE;
4912 })
4913
4914 (define_insn "aarch64_simd_ld4<mode>"
4915   [(set (match_operand:XI 0 "register_operand" "=w")
4916         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4917                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4918                    UNSPEC_LD4))]
4919   "TARGET_SIMD"
4920   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4921   [(set_attr "type" "neon_load4_4reg<q>")]
4922 )
4923
4924 (define_insn "aarch64_simd_ld4r<mode>"
4925   [(set (match_operand:XI 0 "register_operand" "=w")
4926        (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4927                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4928                   UNSPEC_LD4_DUP))]
4929   "TARGET_SIMD"
4930   "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4931   [(set_attr "type" "neon_load4_all_lanes<q>")]
4932 )
4933
4934 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4935   [(set (match_operand:XI 0 "register_operand" "=w")
4936         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4937                     (match_operand:XI 2 "register_operand" "0")
4938                     (match_operand:SI 3 "immediate_operand" "i")
4939                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4940                    UNSPEC_LD4_LANE))]
4941   "TARGET_SIMD"
4942 {
4943     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4944     return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4945 }
4946   [(set_attr "type" "neon_load4_one_lane")]
4947 )
4948
4949 (define_expand "vec_load_lanesxi<mode>"
4950   [(set (match_operand:XI 0 "register_operand" "=w")
4951         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4952                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4953                    UNSPEC_LD4))]
4954   "TARGET_SIMD"
4955 {
4956   if (BYTES_BIG_ENDIAN)
4957     {
4958       rtx tmp = gen_reg_rtx (XImode);
4959       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4960       emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4961       emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4962     }
4963   else
4964     emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4965   DONE;
4966 })
4967
4968 (define_insn "aarch64_simd_st4<mode>"
4969   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4970         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4971                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4972                    UNSPEC_ST4))]
4973   "TARGET_SIMD"
4974   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4975   [(set_attr "type" "neon_store4_4reg<q>")]
4976 )
4977
4978 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4979 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4980   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4981         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4982                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4983                      (match_operand:SI 2 "immediate_operand" "i")]
4984                     UNSPEC_ST4_LANE))]
4985   "TARGET_SIMD"
4986   {
4987     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4988     return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
4989   }
4990   [(set_attr "type" "neon_store4_one_lane<q>")]
4991 )
4992
4993 (define_expand "vec_store_lanesxi<mode>"
4994   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4995         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4996                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4997                    UNSPEC_ST4))]
4998   "TARGET_SIMD"
4999 {
5000   if (BYTES_BIG_ENDIAN)
5001     {
5002       rtx tmp = gen_reg_rtx (XImode);
5003       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5004       emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5005       emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5006     }
5007   else
5008     emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5009   DONE;
5010 })
5011
5012 (define_insn_and_split "aarch64_rev_reglist<mode>"
5013 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5014         (unspec:VSTRUCT
5015                    [(match_operand:VSTRUCT 1 "register_operand" "w")
5016                     (match_operand:V16QI 2 "register_operand" "w")]
5017                    UNSPEC_REV_REGLIST))]
5018   "TARGET_SIMD"
5019   "#"
5020   "&& reload_completed"
5021   [(const_int 0)]
5022 {
5023   int i;
5024   int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5025   for (i = 0; i < nregs; i++)
5026     {
5027       rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5028       rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5029       emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5030     }
5031   DONE;
5032 }
5033   [(set_attr "type" "neon_tbl1_q")
5034    (set_attr "length" "<insn_count>")]
5035 )
5036
5037 ;; Reload patterns for AdvSIMD register list operands.
5038
5039 (define_expand "mov<mode>"
5040   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
5041         (match_operand:VSTRUCT 1 "general_operand" ""))]
5042   "TARGET_SIMD"
5043 {
5044   if (can_create_pseudo_p ())
5045     {
5046       if (GET_CODE (operands[0]) != REG)
5047         operands[1] = force_reg (<MODE>mode, operands[1]);
5048     }
5049 })
5050
5051 (define_insn "*aarch64_mov<mode>"
5052   [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5053         (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5054   "TARGET_SIMD && !BYTES_BIG_ENDIAN
5055    && (register_operand (operands[0], <MODE>mode)
5056        || register_operand (operands[1], <MODE>mode))"
5057   "@
5058    #
5059    st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5060    ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5061   [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5062                      neon_load<nregs>_<nregs>reg_q")
5063    (set_attr "length" "<insn_count>,4,4")]
5064 )
5065
5066 (define_insn "aarch64_be_ld1<mode>"
5067   [(set (match_operand:VALLDI_F16 0     "register_operand" "=w")
5068         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5069                              "aarch64_simd_struct_operand" "Utv")]
5070         UNSPEC_LD1))]
5071   "TARGET_SIMD"
5072   "ld1\\t{%0<Vmtype>}, %1"
5073   [(set_attr "type" "neon_load1_1reg<q>")]
5074 )
5075
5076 (define_insn "aarch64_be_st1<mode>"
5077   [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5078         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5079         UNSPEC_ST1))]
5080   "TARGET_SIMD"
5081   "st1\\t{%1<Vmtype>}, %0"
5082   [(set_attr "type" "neon_store1_1reg<q>")]
5083 )
5084
5085 (define_insn "*aarch64_be_movoi"
5086   [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5087         (match_operand:OI 1 "general_operand"      " w,w,m"))]
5088   "TARGET_SIMD && BYTES_BIG_ENDIAN
5089    && (register_operand (operands[0], OImode)
5090        || register_operand (operands[1], OImode))"
5091   "@
5092    #
5093    stp\\t%q1, %R1, %0
5094    ldp\\t%q0, %R0, %1"
5095   [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5096    (set_attr "length" "8,4,4")]
5097 )
5098
5099 (define_insn "*aarch64_be_movci"
5100   [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5101         (match_operand:CI 1 "general_operand"      " w,w,o"))]
5102   "TARGET_SIMD && BYTES_BIG_ENDIAN
5103    && (register_operand (operands[0], CImode)
5104        || register_operand (operands[1], CImode))"
5105   "#"
5106   [(set_attr "type" "multiple")
5107    (set_attr "length" "12,4,4")]
5108 )
5109
5110 (define_insn "*aarch64_be_movxi"
5111   [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5112         (match_operand:XI 1 "general_operand"      " w,w,o"))]
5113   "TARGET_SIMD && BYTES_BIG_ENDIAN
5114    && (register_operand (operands[0], XImode)
5115        || register_operand (operands[1], XImode))"
5116   "#"
5117   [(set_attr "type" "multiple")
5118    (set_attr "length" "16,4,4")]
5119 )
5120
5121 (define_split
5122   [(set (match_operand:OI 0 "register_operand")
5123         (match_operand:OI 1 "register_operand"))]
5124   "TARGET_SIMD && reload_completed"
5125   [(const_int 0)]
5126 {
5127   aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5128   DONE;
5129 })
5130
5131 (define_split
5132   [(set (match_operand:CI 0 "nonimmediate_operand")
5133         (match_operand:CI 1 "general_operand"))]
5134   "TARGET_SIMD && reload_completed"
5135   [(const_int 0)]
5136 {
5137   if (register_operand (operands[0], CImode)
5138       && register_operand (operands[1], CImode))
5139     {
5140       aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5141       DONE;
5142     }
5143   else if (BYTES_BIG_ENDIAN)
5144     {
5145       emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5146                       simplify_gen_subreg (OImode, operands[1], CImode, 0));
5147       emit_move_insn (gen_lowpart (V16QImode,
5148                                    simplify_gen_subreg (TImode, operands[0],
5149                                                         CImode, 32)),
5150                       gen_lowpart (V16QImode,
5151                                    simplify_gen_subreg (TImode, operands[1],
5152                                                         CImode, 32)));
5153       DONE;
5154     }
5155   else
5156     FAIL;
5157 })
5158
5159 (define_split
5160   [(set (match_operand:XI 0 "nonimmediate_operand")
5161         (match_operand:XI 1 "general_operand"))]
5162   "TARGET_SIMD && reload_completed"
5163   [(const_int 0)]
5164 {
5165   if (register_operand (operands[0], XImode)
5166       && register_operand (operands[1], XImode))
5167     {
5168       aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5169       DONE;
5170     }
5171   else if (BYTES_BIG_ENDIAN)
5172     {
5173       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5174                       simplify_gen_subreg (OImode, operands[1], XImode, 0));
5175       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5176                       simplify_gen_subreg (OImode, operands[1], XImode, 32));
5177       DONE;
5178     }
5179   else
5180     FAIL;
5181 })
5182
5183 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5184   [(match_operand:VSTRUCT 0 "register_operand" "=w")
5185    (match_operand:DI 1 "register_operand" "w")
5186    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5187   "TARGET_SIMD"
5188 {
5189   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5190   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5191                      * <VSTRUCT:nregs>);
5192
5193   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5194                                                                 mem));
5195   DONE;
5196 })
5197
5198 (define_insn "aarch64_ld2<mode>_dreg"
5199   [(set (match_operand:OI 0 "register_operand" "=w")
5200         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5201                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5202                    UNSPEC_LD2_DREG))]
5203   "TARGET_SIMD"
5204   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5205   [(set_attr "type" "neon_load2_2reg<q>")]
5206 )
5207
5208 (define_insn "aarch64_ld2<mode>_dreg"
5209   [(set (match_operand:OI 0 "register_operand" "=w")
5210         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5211                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5212                    UNSPEC_LD2_DREG))]
5213   "TARGET_SIMD"
5214   "ld1\\t{%S0.1d - %T0.1d}, %1"
5215   [(set_attr "type" "neon_load1_2reg<q>")]
5216 )
5217
5218 (define_insn "aarch64_ld3<mode>_dreg"
5219   [(set (match_operand:CI 0 "register_operand" "=w")
5220         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5221                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5222                    UNSPEC_LD3_DREG))]
5223   "TARGET_SIMD"
5224   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5225   [(set_attr "type" "neon_load3_3reg<q>")]
5226 )
5227
5228 (define_insn "aarch64_ld3<mode>_dreg"
5229   [(set (match_operand:CI 0 "register_operand" "=w")
5230         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5231                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5232                    UNSPEC_LD3_DREG))]
5233   "TARGET_SIMD"
5234   "ld1\\t{%S0.1d - %U0.1d}, %1"
5235   [(set_attr "type" "neon_load1_3reg<q>")]
5236 )
5237
5238 (define_insn "aarch64_ld4<mode>_dreg"
5239   [(set (match_operand:XI 0 "register_operand" "=w")
5240         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5241                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5242                    UNSPEC_LD4_DREG))]
5243   "TARGET_SIMD"
5244   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5245   [(set_attr "type" "neon_load4_4reg<q>")]
5246 )
5247
5248 (define_insn "aarch64_ld4<mode>_dreg"
5249   [(set (match_operand:XI 0 "register_operand" "=w")
5250         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5251                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5252                    UNSPEC_LD4_DREG))]
5253   "TARGET_SIMD"
5254   "ld1\\t{%S0.1d - %V0.1d}, %1"
5255   [(set_attr "type" "neon_load1_4reg<q>")]
5256 )
5257
5258 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5259  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5260   (match_operand:DI 1 "register_operand" "r")
5261   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5262   "TARGET_SIMD"
5263 {
5264   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5265   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5266
5267   emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5268   DONE;
5269 })
5270
5271 (define_expand "aarch64_ld1<VALL_F16:mode>"
5272  [(match_operand:VALL_F16 0 "register_operand")
5273   (match_operand:DI 1 "register_operand")]
5274   "TARGET_SIMD"
5275 {
5276   machine_mode mode = <VALL_F16:MODE>mode;
5277   rtx mem = gen_rtx_MEM (mode, operands[1]);
5278
5279   if (BYTES_BIG_ENDIAN)
5280     emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5281   else
5282     emit_move_insn (operands[0], mem);
5283   DONE;
5284 })
5285
5286 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5287  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5288   (match_operand:DI 1 "register_operand" "r")
5289   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5290   "TARGET_SIMD"
5291 {
5292   machine_mode mode = <VSTRUCT:MODE>mode;
5293   rtx mem = gen_rtx_MEM (mode, operands[1]);
5294
5295   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5296   DONE;
5297 })
5298
5299 (define_expand "aarch64_ld1x2<VQ:mode>"
5300  [(match_operand:OI 0 "register_operand" "=w")
5301   (match_operand:DI 1 "register_operand" "r")
5302   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5303   "TARGET_SIMD"
5304 {
5305   machine_mode mode = OImode;
5306   rtx mem = gen_rtx_MEM (mode, operands[1]);
5307
5308   emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5309   DONE;
5310 })
5311
5312 (define_expand "aarch64_ld1x2<VDC:mode>"
5313  [(match_operand:OI 0 "register_operand" "=w")
5314   (match_operand:DI 1 "register_operand" "r")
5315   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5316   "TARGET_SIMD"
5317 {
5318   machine_mode mode = OImode;
5319   rtx mem = gen_rtx_MEM (mode, operands[1]);
5320
5321   emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5322   DONE;
5323 })
5324
5325
5326 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5327   [(match_operand:VSTRUCT 0 "register_operand" "=w")
5328         (match_operand:DI 1 "register_operand" "w")
5329         (match_operand:VSTRUCT 2 "register_operand" "0")
5330         (match_operand:SI 3 "immediate_operand" "i")
5331         (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5332   "TARGET_SIMD"
5333 {
5334   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5335   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5336                      * <VSTRUCT:nregs>);
5337
5338   aarch64_simd_lane_bounds (operands[3], 0,
5339                             GET_MODE_NUNITS (<VALLDIF:MODE>mode),
5340                             NULL);
5341   emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5342         operands[0], mem, operands[2], operands[3]));
5343   DONE;
5344 })
5345
5346 ;; Expanders for builtins to extract vector registers from large
5347 ;; opaque integer modes.
5348
5349 ;; D-register list.
5350
5351 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5352  [(match_operand:VDC 0 "register_operand" "=w")
5353   (match_operand:VSTRUCT 1 "register_operand" "w")
5354   (match_operand:SI 2 "immediate_operand" "i")]
5355   "TARGET_SIMD"
5356 {
5357   int part = INTVAL (operands[2]);
5358   rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5359   int offset = part * 16;
5360
5361   emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5362   emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5363   DONE;
5364 })
5365
5366 ;; Q-register list.
5367
5368 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5369  [(match_operand:VQ 0 "register_operand" "=w")
5370   (match_operand:VSTRUCT 1 "register_operand" "w")
5371   (match_operand:SI 2 "immediate_operand" "i")]
5372   "TARGET_SIMD"
5373 {
5374   int part = INTVAL (operands[2]);
5375   int offset = part * 16;
5376
5377   emit_move_insn (operands[0],
5378                   gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5379   DONE;
5380 })
5381
5382 ;; Permuted-store expanders for neon intrinsics.
5383
5384 ;; Permute instructions
5385
5386 ;; vec_perm support
5387
5388 (define_expand "vec_perm<mode>"
5389   [(match_operand:VB 0 "register_operand")
5390    (match_operand:VB 1 "register_operand")
5391    (match_operand:VB 2 "register_operand")
5392    (match_operand:VB 3 "register_operand")]
5393   "TARGET_SIMD"
5394 {
5395   aarch64_expand_vec_perm (operands[0], operands[1],
5396                            operands[2], operands[3], <nunits>);
5397   DONE;
5398 })
5399
5400 (define_insn "aarch64_tbl1<mode>"
5401   [(set (match_operand:VB 0 "register_operand" "=w")
5402         (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5403                     (match_operand:VB 2 "register_operand" "w")]
5404                    UNSPEC_TBL))]
5405   "TARGET_SIMD"
5406   "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5407   [(set_attr "type" "neon_tbl1<q>")]
5408 )
5409
5410 ;; Two source registers.
5411
5412 (define_insn "aarch64_tbl2v16qi"
5413   [(set (match_operand:V16QI 0 "register_operand" "=w")
5414         (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5415                        (match_operand:V16QI 2 "register_operand" "w")]
5416                       UNSPEC_TBL))]
5417   "TARGET_SIMD"
5418   "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5419   [(set_attr "type" "neon_tbl2_q")]
5420 )
5421
5422 (define_insn "aarch64_tbl3<mode>"
5423   [(set (match_operand:VB 0 "register_operand" "=w")
5424         (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5425                       (match_operand:VB 2 "register_operand" "w")]
5426                       UNSPEC_TBL))]
5427   "TARGET_SIMD"
5428   "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5429   [(set_attr "type" "neon_tbl3")]
5430 )
5431
5432 (define_insn "aarch64_tbx4<mode>"
5433   [(set (match_operand:VB 0 "register_operand" "=w")
5434         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5435                       (match_operand:OI 2 "register_operand" "w")
5436                       (match_operand:VB 3 "register_operand" "w")]
5437                       UNSPEC_TBX))]
5438   "TARGET_SIMD"
5439   "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5440   [(set_attr "type" "neon_tbl4")]
5441 )
5442
5443 ;; Three source registers.
5444
5445 (define_insn "aarch64_qtbl3<mode>"
5446   [(set (match_operand:VB 0 "register_operand" "=w")
5447         (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5448                       (match_operand:VB 2 "register_operand" "w")]
5449                       UNSPEC_TBL))]
5450   "TARGET_SIMD"
5451   "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5452   [(set_attr "type" "neon_tbl3")]
5453 )
5454
5455 (define_insn "aarch64_qtbx3<mode>"
5456   [(set (match_operand:VB 0 "register_operand" "=w")
5457         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5458                       (match_operand:CI 2 "register_operand" "w")
5459                       (match_operand:VB 3 "register_operand" "w")]
5460                       UNSPEC_TBX))]
5461   "TARGET_SIMD"
5462   "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5463   [(set_attr "type" "neon_tbl3")]
5464 )
5465
5466 ;; Four source registers.
5467
5468 (define_insn "aarch64_qtbl4<mode>"
5469   [(set (match_operand:VB 0 "register_operand" "=w")
5470         (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5471                       (match_operand:VB 2 "register_operand" "w")]
5472                       UNSPEC_TBL))]
5473   "TARGET_SIMD"
5474   "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5475   [(set_attr "type" "neon_tbl4")]
5476 )
5477
5478 (define_insn "aarch64_qtbx4<mode>"
5479   [(set (match_operand:VB 0 "register_operand" "=w")
5480         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5481                       (match_operand:XI 2 "register_operand" "w")
5482                       (match_operand:VB 3 "register_operand" "w")]
5483                       UNSPEC_TBX))]
5484   "TARGET_SIMD"
5485   "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5486   [(set_attr "type" "neon_tbl4")]
5487 )
5488
5489 (define_insn_and_split "aarch64_combinev16qi"
5490   [(set (match_operand:OI 0 "register_operand" "=w")
5491         (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5492                     (match_operand:V16QI 2 "register_operand" "w")]
5493                    UNSPEC_CONCAT))]
5494   "TARGET_SIMD"
5495   "#"
5496   "&& reload_completed"
5497   [(const_int 0)]
5498 {
5499   aarch64_split_combinev16qi (operands);
5500   DONE;
5501 }
5502 [(set_attr "type" "multiple")]
5503 )
5504
5505 ;; This instruction's pattern is generated directly by
5506 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5507 ;; need corresponding changes there.
5508 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5509   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5510         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5511                           (match_operand:VALL_F16 2 "register_operand" "w")]
5512          PERMUTE))]
5513   "TARGET_SIMD"
5514   "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5515   [(set_attr "type" "neon_permute<q>")]
5516 )
5517
5518 ;; This instruction's pattern is generated directly by
5519 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5520 ;; need corresponding changes there.  Note that the immediate (third)
5521 ;; operand is a lane index not a byte index.
5522 (define_insn "aarch64_ext<mode>"
5523   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5524         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5525                           (match_operand:VALL_F16 2 "register_operand" "w")
5526                           (match_operand:SI 3 "immediate_operand" "i")]
5527          UNSPEC_EXT))]
5528   "TARGET_SIMD"
5529 {
5530   operands[3] = GEN_INT (INTVAL (operands[3])
5531       * GET_MODE_UNIT_SIZE (<MODE>mode));
5532   return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5533 }
5534   [(set_attr "type" "neon_ext<q>")]
5535 )
5536
5537 ;; This instruction's pattern is generated directly by
5538 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5539 ;; need corresponding changes there.
5540 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5541   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5542         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5543                     REVERSE))]
5544   "TARGET_SIMD"
5545   "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5546   [(set_attr "type" "neon_rev<q>")]
5547 )
5548
5549 (define_insn "aarch64_st2<mode>_dreg"
5550   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5551         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5552                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5553                    UNSPEC_ST2))]
5554   "TARGET_SIMD"
5555   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5556   [(set_attr "type" "neon_store2_2reg")]
5557 )
5558
5559 (define_insn "aarch64_st2<mode>_dreg"
5560   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5561         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5562                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5563                    UNSPEC_ST2))]
5564   "TARGET_SIMD"
5565   "st1\\t{%S1.1d - %T1.1d}, %0"
5566   [(set_attr "type" "neon_store1_2reg")]
5567 )
5568
5569 (define_insn "aarch64_st3<mode>_dreg"
5570   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5571         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5572                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5573                    UNSPEC_ST3))]
5574   "TARGET_SIMD"
5575   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5576   [(set_attr "type" "neon_store3_3reg")]
5577 )
5578
5579 (define_insn "aarch64_st3<mode>_dreg"
5580   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5581         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5582                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5583                    UNSPEC_ST3))]
5584   "TARGET_SIMD"
5585   "st1\\t{%S1.1d - %U1.1d}, %0"
5586   [(set_attr "type" "neon_store1_3reg")]
5587 )
5588
5589 (define_insn "aarch64_st4<mode>_dreg"
5590   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5591         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5592                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5593                    UNSPEC_ST4))]
5594   "TARGET_SIMD"
5595   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5596   [(set_attr "type" "neon_store4_4reg")]
5597 )
5598
5599 (define_insn "aarch64_st4<mode>_dreg"
5600   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5601         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5602                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5603                    UNSPEC_ST4))]
5604   "TARGET_SIMD"
5605   "st1\\t{%S1.1d - %V1.1d}, %0"
5606   [(set_attr "type" "neon_store1_4reg")]
5607 )
5608
5609 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5610  [(match_operand:DI 0 "register_operand" "r")
5611   (match_operand:VSTRUCT 1 "register_operand" "w")
5612   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5613   "TARGET_SIMD"
5614 {
5615   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5616   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5617
5618   emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5619   DONE;
5620 })
5621
5622 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5623  [(match_operand:DI 0 "register_operand" "r")
5624   (match_operand:VSTRUCT 1 "register_operand" "w")
5625   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5626   "TARGET_SIMD"
5627 {
5628   machine_mode mode = <VSTRUCT:MODE>mode;
5629   rtx mem = gen_rtx_MEM (mode, operands[0]);
5630
5631   emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5632   DONE;
5633 })
5634
5635 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5636  [(match_operand:DI 0 "register_operand" "r")
5637   (match_operand:VSTRUCT 1 "register_operand" "w")
5638   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5639   (match_operand:SI 2 "immediate_operand")]
5640   "TARGET_SIMD"
5641 {
5642   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5643   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5644                      * <VSTRUCT:nregs>);
5645
5646   emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5647                 mem, operands[1], operands[2]));
5648   DONE;
5649 })
5650
5651 (define_expand "aarch64_st1<VALL_F16:mode>"
5652  [(match_operand:DI 0 "register_operand")
5653   (match_operand:VALL_F16 1 "register_operand")]
5654   "TARGET_SIMD"
5655 {
5656   machine_mode mode = <VALL_F16:MODE>mode;
5657   rtx mem = gen_rtx_MEM (mode, operands[0]);
5658
5659   if (BYTES_BIG_ENDIAN)
5660     emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5661   else
5662     emit_move_insn (mem, operands[1]);
5663   DONE;
5664 })
5665
5666 ;; Expander for builtins to insert vector registers into large
5667 ;; opaque integer modes.
5668
5669 ;; Q-register list.  We don't need a D-reg inserter as we zero
5670 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5671
5672 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5673  [(match_operand:VSTRUCT 0 "register_operand" "+w")
5674   (match_operand:VSTRUCT 1 "register_operand" "0")
5675   (match_operand:VQ 2 "register_operand" "w")
5676   (match_operand:SI 3 "immediate_operand" "i")]
5677   "TARGET_SIMD"
5678 {
5679   int part = INTVAL (operands[3]);
5680   int offset = part * 16;
5681
5682   emit_move_insn (operands[0], operands[1]);
5683   emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5684                   operands[2]);
5685   DONE;
5686 })
5687
5688 ;; Standard pattern name vec_init<mode><Vel>.
5689
5690 (define_expand "vec_init<mode><Vel>"
5691   [(match_operand:VALL_F16 0 "register_operand" "")
5692    (match_operand 1 "" "")]
5693   "TARGET_SIMD"
5694 {
5695   aarch64_expand_vector_init (operands[0], operands[1]);
5696   DONE;
5697 })
5698
5699 (define_insn "*aarch64_simd_ld1r<mode>"
5700   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5701         (vec_duplicate:VALL_F16
5702           (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5703   "TARGET_SIMD"
5704   "ld1r\\t{%0.<Vtype>}, %1"
5705   [(set_attr "type" "neon_load1_all_lanes")]
5706 )
5707
5708 (define_insn "aarch64_simd_ld1<mode>_x2"
5709   [(set (match_operand:OI 0 "register_operand" "=w")
5710         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5711                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5712                    UNSPEC_LD1))]
5713   "TARGET_SIMD"
5714   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5715   [(set_attr "type" "neon_load1_2reg<q>")]
5716 )
5717
5718 (define_insn "aarch64_simd_ld1<mode>_x2"
5719   [(set (match_operand:OI 0 "register_operand" "=w")
5720         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5721                     (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5722                    UNSPEC_LD1))]
5723   "TARGET_SIMD"
5724   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5725   [(set_attr "type" "neon_load1_2reg<q>")]
5726 )
5727
5728
5729 (define_insn "aarch64_frecpe<mode>"
5730   [(set (match_operand:VHSDF 0 "register_operand" "=w")
5731         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5732          UNSPEC_FRECPE))]
5733   "TARGET_SIMD"
5734   "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5735   [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5736 )
5737
5738 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5739   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5740         (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5741          FRECP))]
5742   "TARGET_SIMD"
5743   "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5744   [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5745 )
5746
5747 (define_insn "aarch64_frecps<mode>"
5748   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5749         (unspec:VHSDF_HSDF
5750           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5751           (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5752           UNSPEC_FRECPS))]
5753   "TARGET_SIMD"
5754   "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5755   [(set_attr "type" "neon_fp_recps_<stype><q>")]
5756 )
5757
5758 (define_insn "aarch64_urecpe<mode>"
5759   [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5760         (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5761                 UNSPEC_URECPE))]
5762  "TARGET_SIMD"
5763  "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5764   [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5765
5766 ;; Standard pattern name vec_extract<mode><Vel>.
5767
5768 (define_expand "vec_extract<mode><Vel>"
5769   [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5770    (match_operand:VALL_F16 1 "register_operand" "")
5771    (match_operand:SI 2 "immediate_operand" "")]
5772   "TARGET_SIMD"
5773 {
5774     emit_insn
5775       (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5776     DONE;
5777 })
5778
5779 ;; aes
5780
5781 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5782   [(set (match_operand:V16QI 0 "register_operand" "=w")
5783         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5784                        (match_operand:V16QI 2 "register_operand" "w")]
5785          CRYPTO_AES))]
5786   "TARGET_SIMD && TARGET_CRYPTO"
5787   "aes<aes_op>\\t%0.16b, %2.16b"
5788   [(set_attr "type" "crypto_aese")]
5789 )
5790
5791 ;; When AES/AESMC fusion is enabled we want the register allocation to
5792 ;; look like:
5793 ;;    AESE Vn, _
5794 ;;    AESMC Vn, Vn
5795 ;; So prefer to tie operand 1 to operand 0 when fusing.
5796
5797 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5798   [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5799         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5800          CRYPTO_AESMC))]
5801   "TARGET_SIMD && TARGET_CRYPTO"
5802   "aes<aesmc_op>\\t%0.16b, %1.16b"
5803   [(set_attr "type" "crypto_aesmc")
5804    (set_attr_alternative "enabled"
5805      [(if_then_else (match_test
5806                        "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5807                      (const_string "yes" )
5808                      (const_string "no"))
5809       (const_string "yes")])]
5810 )
5811
5812 ;; sha1
5813
5814 (define_insn "aarch64_crypto_sha1hsi"
5815   [(set (match_operand:SI 0 "register_operand" "=w")
5816         (unspec:SI [(match_operand:SI 1
5817                        "register_operand" "w")]
5818          UNSPEC_SHA1H))]
5819   "TARGET_SIMD && TARGET_CRYPTO"
5820   "sha1h\\t%s0, %s1"
5821   [(set_attr "type" "crypto_sha1_fast")]
5822 )
5823
5824 (define_insn "aarch64_crypto_sha1hv4si"
5825   [(set (match_operand:SI 0 "register_operand" "=w")
5826         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5827                      (parallel [(const_int 0)]))]
5828          UNSPEC_SHA1H))]
5829   "TARGET_SIMD && TARGET_CRYPTO && !BYTES_BIG_ENDIAN"
5830   "sha1h\\t%s0, %s1"
5831   [(set_attr "type" "crypto_sha1_fast")]
5832 )
5833
5834 (define_insn "aarch64_be_crypto_sha1hv4si"
5835   [(set (match_operand:SI 0 "register_operand" "=w")
5836         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5837                      (parallel [(const_int 3)]))]
5838          UNSPEC_SHA1H))]
5839   "TARGET_SIMD && TARGET_CRYPTO && BYTES_BIG_ENDIAN"
5840   "sha1h\\t%s0, %s1"
5841   [(set_attr "type" "crypto_sha1_fast")]
5842 )
5843
5844 (define_insn "aarch64_crypto_sha1su1v4si"
5845   [(set (match_operand:V4SI 0 "register_operand" "=w")
5846         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5847                       (match_operand:V4SI 2 "register_operand" "w")]
5848          UNSPEC_SHA1SU1))]
5849   "TARGET_SIMD && TARGET_CRYPTO"
5850   "sha1su1\\t%0.4s, %2.4s"
5851   [(set_attr "type" "crypto_sha1_fast")]
5852 )
5853
5854 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5855   [(set (match_operand:V4SI 0 "register_operand" "=w")
5856         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5857                       (match_operand:SI 2 "register_operand" "w")
5858                       (match_operand:V4SI 3 "register_operand" "w")]
5859          CRYPTO_SHA1))]
5860   "TARGET_SIMD && TARGET_CRYPTO"
5861   "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5862   [(set_attr "type" "crypto_sha1_slow")]
5863 )
5864
5865 (define_insn "aarch64_crypto_sha1su0v4si"
5866   [(set (match_operand:V4SI 0 "register_operand" "=w")
5867         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5868                       (match_operand:V4SI 2 "register_operand" "w")
5869                       (match_operand:V4SI 3 "register_operand" "w")]
5870          UNSPEC_SHA1SU0))]
5871   "TARGET_SIMD && TARGET_CRYPTO"
5872   "sha1su0\\t%0.4s, %2.4s, %3.4s"
5873   [(set_attr "type" "crypto_sha1_xor")]
5874 )
5875
5876 ;; sha256
5877
5878 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
5879   [(set (match_operand:V4SI 0 "register_operand" "=w")
5880         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5881                       (match_operand:V4SI 2 "register_operand" "w")
5882                       (match_operand:V4SI 3 "register_operand" "w")]
5883          CRYPTO_SHA256))]
5884   "TARGET_SIMD && TARGET_CRYPTO"
5885   "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5886   [(set_attr "type" "crypto_sha256_slow")]
5887 )
5888
5889 (define_insn "aarch64_crypto_sha256su0v4si"
5890   [(set (match_operand:V4SI 0 "register_operand" "=w")
5891         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5892                       (match_operand:V4SI 2 "register_operand" "w")]
5893          UNSPEC_SHA256SU0))]
5894   "TARGET_SIMD &&TARGET_CRYPTO"
5895   "sha256su0\\t%0.4s, %2.4s"
5896   [(set_attr "type" "crypto_sha256_fast")]
5897 )
5898
5899 (define_insn "aarch64_crypto_sha256su1v4si"
5900   [(set (match_operand:V4SI 0 "register_operand" "=w")
5901         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5902                       (match_operand:V4SI 2 "register_operand" "w")
5903                       (match_operand:V4SI 3 "register_operand" "w")]
5904          UNSPEC_SHA256SU1))]
5905   "TARGET_SIMD &&TARGET_CRYPTO"
5906   "sha256su1\\t%0.4s, %2.4s, %3.4s"
5907   [(set_attr "type" "crypto_sha256_slow")]
5908 )
5909
5910 ;; pmull
5911
5912 (define_insn "aarch64_crypto_pmulldi"
5913   [(set (match_operand:TI 0 "register_operand" "=w")
5914         (unspec:TI  [(match_operand:DI 1 "register_operand" "w")
5915                      (match_operand:DI 2 "register_operand" "w")]
5916                     UNSPEC_PMULL))]
5917  "TARGET_SIMD && TARGET_CRYPTO"
5918  "pmull\\t%0.1q, %1.1d, %2.1d"
5919   [(set_attr "type" "crypto_pmull")]
5920 )
5921
5922 (define_insn "aarch64_crypto_pmullv2di"
5923  [(set (match_operand:TI 0 "register_operand" "=w")
5924        (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
5925                    (match_operand:V2DI 2 "register_operand" "w")]
5926                   UNSPEC_PMULL2))]
5927   "TARGET_SIMD && TARGET_CRYPTO"
5928   "pmull2\\t%0.1q, %1.2d, %2.2d"
5929   [(set_attr "type" "crypto_pmull")]
5930 )