gcc/config/aarch64/aarch64-simd.md

   1 ;; Machine description for AArch64 AdvSIMD architecture.
   2 ;; Copyright (C) 2011-2017 Free Software Foundation, Inc.
   3 ;; Contributed by ARM Ltd.
   4 ;;
   5 ;; This file is part of GCC.
   6 ;;
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published by
   9 ;; the Free Software Foundation; either version 3, or (at your option)
  10 ;; any later version.
  11 ;;
  12 ;; GCC is distributed in the hope that it will be useful, but
  13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 ;; General Public License for more details.
  16 ;;
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 (define_expand "mov<mode>"
  22   [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
  23         (match_operand:VALL_F16 1 "general_operand" ""))]
  24   "TARGET_SIMD"
  25   "
  26   /* Force the operand into a register if it is not an
  27      immediate whose use can be replaced with xzr.
  28      If the mode is 16 bytes wide, then we will be doing
  29      a stp in DI mode, so we check the validity of that.
  30      If the mode is 8 bytes wide, then we will do doing a
  31      normal str, so the check need not apply.  */
  32   if (GET_CODE (operands[0]) == MEM
  33       && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
  34            && ((GET_MODE_SIZE (<MODE>mode) == 16
  35                 && aarch64_mem_pair_operand (operands[0], DImode))
  36                || GET_MODE_SIZE (<MODE>mode) == 8)))
  37       operands[1] = force_reg (<MODE>mode, operands[1]);
  38   "
  39 )
  40
  41 (define_expand "movmisalign<mode>"
  42   [(set (match_operand:VALL 0 "nonimmediate_operand" "")
  43         (match_operand:VALL 1 "general_operand" ""))]
  44   "TARGET_SIMD"
  45 {
  46   /* This pattern is not permitted to fail during expansion: if both arguments
  47      are non-registers (e.g. memory := constant, which can be created by the
  48      auto-vectorizer), force operand 1 into a register.  */
  49   if (!register_operand (operands[0], <MODE>mode)
  50       && !register_operand (operands[1], <MODE>mode))
  51     operands[1] = force_reg (<MODE>mode, operands[1]);
  52 })
  53
  54 (define_insn "aarch64_simd_dup<mode>"
  55   [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
  56         (vec_duplicate:VDQ_I
  57           (match_operand:<VEL> 1 "register_operand" "w,?r")))]
  58   "TARGET_SIMD"
  59   "@
  60    dup\\t%0.<Vtype>, %1.<Vetype>[0]
  61    dup\\t%0.<Vtype>, %<vw>1"
  62   [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
  63 )
  64
  65 (define_insn "aarch64_simd_dup<mode>"
  66   [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
  67         (vec_duplicate:VDQF_F16
  68           (match_operand:<VEL> 1 "register_operand" "w")))]
  69   "TARGET_SIMD"
  70   "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
  71   [(set_attr "type" "neon_dup<q>")]
  72 )
  73
  74 (define_insn "aarch64_dup_lane<mode>"
  75   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
  76         (vec_duplicate:VALL_F16
  77           (vec_select:<VEL>
  78             (match_operand:VALL_F16 1 "register_operand" "w")
  79             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  80           )))]
  81   "TARGET_SIMD"
  82   {
  83     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
  84     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
  85   }
  86   [(set_attr "type" "neon_dup<q>")]
  87 )
  88
  89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
  90   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
  91         (vec_duplicate:VALL_F16_NO_V2Q
  92           (vec_select:<VEL>
  93             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
  94             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  95           )))]
  96   "TARGET_SIMD"
  97   {
  98     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
  99     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
 100   }
 101   [(set_attr "type" "neon_dup<q>")]
 102 )
 103
 104 (define_insn "*aarch64_simd_mov<VD:mode>"
 105   [(set (match_operand:VD 0 "nonimmediate_operand"
 106                 "=w, m,  m,  w, ?r, ?w, ?r, w")
 107         (match_operand:VD 1 "general_operand"
 108                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 109   "TARGET_SIMD
 110    && (register_operand (operands[0], <MODE>mode)
 111        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 112 {
 113    switch (which_alternative)
 114      {
 115      case 0: return "ldr\t%d0, %1";
 116      case 1: return "str\txzr, %0";
 117      case 2: return "str\t%d1, %0";
 118      case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 119      case 4: return "umov\t%0, %1.d[0]";
 120      case 5: return "fmov\t%d0, %1";
 121      case 6: return "mov\t%0, %1";
 122      case 7:
 123         return aarch64_output_simd_mov_immediate (operands[1],
 124                                                   <MODE>mode, 64);
 125      default: gcc_unreachable ();
 126      }
 127 }
 128   [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
 129                      neon_logic<q>, neon_to_gp<q>, f_mcr,\
 130                      mov_reg, neon_move<q>")]
 131 )
 132
 133 (define_insn "*aarch64_simd_mov<VQ:mode>"
 134   [(set (match_operand:VQ 0 "nonimmediate_operand"
 135                 "=w, Umq,  m,  w, ?r, ?w, ?r, w")
 136         (match_operand:VQ 1 "general_operand"
 137                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 138   "TARGET_SIMD
 139    && (register_operand (operands[0], <MODE>mode)
 140        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 141 {
 142   switch (which_alternative)
 143     {
 144     case 0:
 145         return "ldr\t%q0, %1";
 146     case 1:
 147         return "stp\txzr, xzr, %0";
 148     case 2:
 149         return "str\t%q1, %0";
 150     case 3:
 151         return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 152     case 4:
 153     case 5:
 154     case 6:
 155         return "#";
 156     case 7:
 157         return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
 158     default:
 159         gcc_unreachable ();
 160     }
 161 }
 162   [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
 163                      neon_logic<q>, multiple, multiple,\
 164                      multiple, neon_move<q>")
 165    (set_attr "length" "4,4,4,4,8,8,8,4")]
 166 )
 167
 168 ;; When storing lane zero we can use the normal STR and its more permissive
 169 ;; addressing modes.
 170
 171 (define_insn "aarch64_store_lane0<mode>"
 172   [(set (match_operand:<VEL> 0 "memory_operand" "=m")
 173         (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
 174                         (parallel [(match_operand 2 "const_int_operand" "n")])))]
 175   "TARGET_SIMD
 176    && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
 177   "str\\t%<Vetype>1, %0"
 178   [(set_attr "type" "neon_store1_1reg<q>")]
 179 )
 180
 181 (define_insn "load_pair<mode>"
 182   [(set (match_operand:VD 0 "register_operand" "=w")
 183         (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
 184    (set (match_operand:VD 2 "register_operand" "=w")
 185         (match_operand:VD 3 "memory_operand" "m"))]
 186   "TARGET_SIMD
 187    && rtx_equal_p (XEXP (operands[3], 0),
 188                    plus_constant (Pmode,
 189                                   XEXP (operands[1], 0),
 190                                   GET_MODE_SIZE (<MODE>mode)))"
 191   "ldp\\t%d0, %d2, %1"
 192   [(set_attr "type" "neon_ldp")]
 193 )
 194
 195 (define_insn "store_pair<mode>"
 196   [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
 197         (match_operand:VD 1 "register_operand" "w"))
 198    (set (match_operand:VD 2 "memory_operand" "=m")
 199         (match_operand:VD 3 "register_operand" "w"))]
 200   "TARGET_SIMD
 201    && rtx_equal_p (XEXP (operands[2], 0),
 202                    plus_constant (Pmode,
 203                                   XEXP (operands[0], 0),
 204                                   GET_MODE_SIZE (<MODE>mode)))"
 205   "stp\\t%d1, %d3, %0"
 206   [(set_attr "type" "neon_stp")]
 207 )
 208
 209 (define_split
 210   [(set (match_operand:VQ 0 "register_operand" "")
 211       (match_operand:VQ 1 "register_operand" ""))]
 212   "TARGET_SIMD && reload_completed
 213    && GP_REGNUM_P (REGNO (operands[0]))
 214    && GP_REGNUM_P (REGNO (operands[1]))"
 215   [(const_int 0)]
 216 {
 217   aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
 218   DONE;
 219 })
 220
 221 (define_split
 222   [(set (match_operand:VQ 0 "register_operand" "")
 223         (match_operand:VQ 1 "register_operand" ""))]
 224   "TARGET_SIMD && reload_completed
 225    && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
 226        || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
 227   [(const_int 0)]
 228 {
 229   aarch64_split_simd_move (operands[0], operands[1]);
 230   DONE;
 231 })
 232
 233 (define_expand "aarch64_split_simd_mov<mode>"
 234   [(set (match_operand:VQ 0)
 235         (match_operand:VQ 1))]
 236   "TARGET_SIMD"
 237   {
 238     rtx dst = operands[0];
 239     rtx src = operands[1];
 240
 241     if (GP_REGNUM_P (REGNO (src)))
 242       {
 243         rtx src_low_part = gen_lowpart (<VHALF>mode, src);
 244         rtx src_high_part = gen_highpart (<VHALF>mode, src);
 245
 246         emit_insn
 247           (gen_move_lo_quad_<mode> (dst, src_low_part));
 248         emit_insn
 249           (gen_move_hi_quad_<mode> (dst, src_high_part));
 250       }
 251
 252     else
 253       {
 254         rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
 255         rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
 256         rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
 257         rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
 258
 259         emit_insn
 260           (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
 261         emit_insn
 262           (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
 263       }
 264     DONE;
 265   }
 266 )
 267
 268 (define_insn "aarch64_simd_mov_from_<mode>low"
 269   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
 270         (vec_select:<VHALF>
 271           (match_operand:VQ 1 "register_operand" "w")
 272           (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
 273   "TARGET_SIMD && reload_completed"
 274   "umov\t%0, %1.d[0]"
 275   [(set_attr "type" "neon_to_gp<q>")
 276    (set_attr "length" "4")
 277   ])
 278
 279 (define_insn "aarch64_simd_mov_from_<mode>high"
 280   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
 281         (vec_select:<VHALF>
 282           (match_operand:VQ 1 "register_operand" "w")
 283           (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
 284   "TARGET_SIMD && reload_completed"
 285   "umov\t%0, %1.d[1]"
 286   [(set_attr "type" "neon_to_gp<q>")
 287    (set_attr "length" "4")
 288   ])
 289
 290 (define_insn "orn<mode>3"
 291  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 292        (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 293                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 294  "TARGET_SIMD"
 295  "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 296   [(set_attr "type" "neon_logic<q>")]
 297 )
 298
 299 (define_insn "bic<mode>3"
 300  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 301        (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 302                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 303  "TARGET_SIMD"
 304  "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 305   [(set_attr "type" "neon_logic<q>")]
 306 )
 307
 308 (define_insn "add<mode>3"
 309   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 310         (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 311                   (match_operand:VDQ_I 2 "register_operand" "w")))]
 312   "TARGET_SIMD"
 313   "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 314   [(set_attr "type" "neon_add<q>")]
 315 )
 316
 317 (define_insn "sub<mode>3"
 318   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 319         (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 320                    (match_operand:VDQ_I 2 "register_operand" "w")))]
 321   "TARGET_SIMD"
 322   "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 323   [(set_attr "type" "neon_sub<q>")]
 324 )
 325
 326 (define_insn "mul<mode>3"
 327   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 328         (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
 329                    (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
 330   "TARGET_SIMD"
 331   "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 332   [(set_attr "type" "neon_mul_<Vetype><q>")]
 333 )
 334
 335 (define_insn "bswap<mode>2"
 336   [(set (match_operand:VDQHSD 0 "register_operand" "=w")
 337         (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
 338   "TARGET_SIMD"
 339   "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
 340   [(set_attr "type" "neon_rev<q>")]
 341 )
 342
 343 (define_insn "aarch64_rbit<mode>"
 344   [(set (match_operand:VB 0 "register_operand" "=w")
 345         (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
 346                    UNSPEC_RBIT))]
 347   "TARGET_SIMD"
 348   "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
 349   [(set_attr "type" "neon_rbit")]
 350 )
 351
 352 (define_expand "ctz<mode>2"
 353   [(set (match_operand:VS 0 "register_operand")
 354         (ctz:VS (match_operand:VS 1 "register_operand")))]
 355   "TARGET_SIMD"
 356   {
 357      emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
 358      rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
 359                                              <MODE>mode, 0);
 360      emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
 361      emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
 362      DONE;
 363   }
 364 )
 365
 366 (define_expand "xorsign<mode>3"
 367   [(match_operand:VHSDF 0 "register_operand")
 368    (match_operand:VHSDF 1 "register_operand")
 369    (match_operand:VHSDF 2 "register_operand")]
 370   "TARGET_SIMD"
 371 {
 372
 373   machine_mode imode = <V_INT_EQUIV>mode;
 374   rtx v_bitmask = gen_reg_rtx (imode);
 375   rtx op1x = gen_reg_rtx (imode);
 376   rtx op2x = gen_reg_rtx (imode);
 377
 378   rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
 379   rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
 380
 381   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 382
 383   emit_move_insn (v_bitmask,
 384                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 385                                                      HOST_WIDE_INT_M1U << bits));
 386
 387   emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
 388   emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
 389   emit_move_insn (operands[0],
 390                   lowpart_subreg (<MODE>mode, op1x, imode));
 391   DONE;
 392 }
 393 )
 394
 395 ;; These instructions map to the __builtins for the Dot Product operations.
 396 (define_insn "aarch64_<sur>dot<vsi2qi>"
 397   [(set (match_operand:VS 0 "register_operand" "=w")
 398         (plus:VS (match_operand:VS 1 "register_operand" "0")
 399                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 400                             (match_operand:<VSI2QI> 3 "register_operand" "w")]
 401                 DOTPROD)))]
 402   "TARGET_DOTPROD"
 403   "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
 404   [(set_attr "type" "neon_dot")]
 405 )
 406
 407 ;; These expands map to the Dot Product optab the vectorizer checks for.
 408 ;; The auto-vectorizer expects a dot product builtin that also does an
 409 ;; accumulation into the provided register.
 410 ;; Given the following pattern
 411 ;;
 412 ;; for (i=0; i<len; i++) {
 413 ;;     c = a[i] * b[i];
 414 ;;     r += c;
 415 ;; }
 416 ;; return result;
 417 ;;
 418 ;; This can be auto-vectorized to
 419 ;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
 420 ;;
 421 ;; given enough iterations.  However the vectorizer can keep unrolling the loop
 422 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
 423 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
 424 ;; ...
 425 ;;
 426 ;; and so the vectorizer provides r, in which the result has to be accumulated.
 427 (define_expand "<sur>dot_prod<vsi2qi>"
 428   [(set (match_operand:VS 0 "register_operand")
 429         (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
 430                             (match_operand:<VSI2QI> 2 "register_operand")]
 431                  DOTPROD)
 432                 (match_operand:VS 3 "register_operand")))]
 433   "TARGET_DOTPROD"
 434 {
 435   emit_insn (
 436     gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
 437                                     operands[2]));
 438   emit_insn (gen_rtx_SET (operands[0], operands[3]));
 439   DONE;
 440 })
 441
 442 ;; These instructions map to the __builtins for the Dot Product
 443 ;; indexed operations.
 444 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
 445   [(set (match_operand:VS 0 "register_operand" "=w")
 446         (plus:VS (match_operand:VS 1 "register_operand" "0")
 447                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 448                             (match_operand:V8QI 3 "register_operand" "<h_con>")
 449                             (match_operand:SI 4 "immediate_operand" "i")]
 450                 DOTPROD)))]
 451   "TARGET_DOTPROD"
 452   {
 453     operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
 454     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 455   }
 456   [(set_attr "type" "neon_dot")]
 457 )
 458
 459 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
 460   [(set (match_operand:VS 0 "register_operand" "=w")
 461         (plus:VS (match_operand:VS 1 "register_operand" "0")
 462                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 463                             (match_operand:V16QI 3 "register_operand" "<h_con>")
 464                             (match_operand:SI 4 "immediate_operand" "i")]
 465                 DOTPROD)))]
 466   "TARGET_DOTPROD"
 467   {
 468     operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
 469     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 470   }
 471   [(set_attr "type" "neon_dot")]
 472 )
 473
 474 (define_expand "copysign<mode>3"
 475   [(match_operand:VHSDF 0 "register_operand")
 476    (match_operand:VHSDF 1 "register_operand")
 477    (match_operand:VHSDF 2 "register_operand")]
 478   "TARGET_FLOAT && TARGET_SIMD"
 479 {
 480   rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
 481   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 482
 483   emit_move_insn (v_bitmask,
 484                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 485                                                      HOST_WIDE_INT_M1U << bits));
 486   emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
 487                                          operands[2], operands[1]));
 488   DONE;
 489 }
 490 )
 491
 492 (define_insn "*aarch64_mul3_elt<mode>"
 493  [(set (match_operand:VMUL 0 "register_operand" "=w")
 494     (mult:VMUL
 495       (vec_duplicate:VMUL
 496           (vec_select:<VEL>
 497             (match_operand:VMUL 1 "register_operand" "<h_con>")
 498             (parallel [(match_operand:SI 2 "immediate_operand")])))
 499       (match_operand:VMUL 3 "register_operand" "w")))]
 500   "TARGET_SIMD"
 501   {
 502     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
 503     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 504   }
 505   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 506 )
 507
 508 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
 509   [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
 510      (mult:VMUL_CHANGE_NLANES
 511        (vec_duplicate:VMUL_CHANGE_NLANES
 512           (vec_select:<VEL>
 513             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
 514             (parallel [(match_operand:SI 2 "immediate_operand")])))
 515       (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
 516   "TARGET_SIMD"
 517   {
 518     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
 519     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 520   }
 521   [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
 522 )
 523
 524 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
 525  [(set (match_operand:VMUL 0 "register_operand" "=w")
 526     (mult:VMUL
 527       (vec_duplicate:VMUL
 528             (match_operand:<VEL> 1 "register_operand" "<h_con>"))
 529       (match_operand:VMUL 2 "register_operand" "w")))]
 530   "TARGET_SIMD"
 531   "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
 532   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 533 )
 534
 535 (define_insn "aarch64_rsqrte<mode>"
 536   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 537         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
 538                      UNSPEC_RSQRTE))]
 539   "TARGET_SIMD"
 540   "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 541   [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
 542
 543 (define_insn "aarch64_rsqrts<mode>"
 544   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 545         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
 546                             (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
 547          UNSPEC_RSQRTS))]
 548   "TARGET_SIMD"
 549   "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 550   [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
 551
 552 (define_expand "rsqrt<mode>2"
 553   [(set (match_operand:VALLF 0 "register_operand" "=w")
 554         (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
 555                      UNSPEC_RSQRT))]
 556   "TARGET_SIMD"
 557 {
 558   aarch64_emit_approx_sqrt (operands[0], operands[1], true);
 559   DONE;
 560 })
 561
 562 (define_insn "*aarch64_mul3_elt_to_64v2df"
 563   [(set (match_operand:DF 0 "register_operand" "=w")
 564      (mult:DF
 565        (vec_select:DF
 566          (match_operand:V2DF 1 "register_operand" "w")
 567          (parallel [(match_operand:SI 2 "immediate_operand")]))
 568        (match_operand:DF 3 "register_operand" "w")))]
 569   "TARGET_SIMD"
 570   {
 571     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
 572     return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
 573   }
 574   [(set_attr "type" "neon_fp_mul_d_scalar_q")]
 575 )
 576
 577 (define_insn "neg<mode>2"
 578   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 579         (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 580   "TARGET_SIMD"
 581   "neg\t%0.<Vtype>, %1.<Vtype>"
 582   [(set_attr "type" "neon_neg<q>")]
 583 )
 584
 585 (define_insn "abs<mode>2"
 586   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 587         (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 588   "TARGET_SIMD"
 589   "abs\t%0.<Vtype>, %1.<Vtype>"
 590   [(set_attr "type" "neon_abs<q>")]
 591 )
 592
 593 ;; The intrinsic version of integer ABS must not be allowed to
 594 ;; combine with any operation with an integerated ABS step, such
 595 ;; as SABD.
 596 (define_insn "aarch64_abs<mode>"
 597   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
 598           (unspec:VSDQ_I_DI
 599             [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
 600            UNSPEC_ABS))]
 601   "TARGET_SIMD"
 602   "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 603   [(set_attr "type" "neon_abs<q>")]
 604 )
 605
 606 (define_insn "abd<mode>_3"
 607   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 608         (abs:VDQ_BHSI (minus:VDQ_BHSI
 609                        (match_operand:VDQ_BHSI 1 "register_operand" "w")
 610                        (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
 611   "TARGET_SIMD"
 612   "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 613   [(set_attr "type" "neon_abd<q>")]
 614 )
 615
 616 (define_insn "aba<mode>_3"
 617   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 618         (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
 619                          (match_operand:VDQ_BHSI 1 "register_operand" "w")
 620                          (match_operand:VDQ_BHSI 2 "register_operand" "w")))
 621                        (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
 622   "TARGET_SIMD"
 623   "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 624   [(set_attr "type" "neon_arith_acc<q>")]
 625 )
 626
 627 (define_insn "fabd<mode>3"
 628   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 629         (abs:VHSDF_HSDF
 630           (minus:VHSDF_HSDF
 631             (match_operand:VHSDF_HSDF 1 "register_operand" "w")
 632             (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
 633   "TARGET_SIMD"
 634   "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 635   [(set_attr "type" "neon_fp_abd_<stype><q>")]
 636 )
 637
 638 ;; For AND (vector, register) and BIC (vector, immediate)
 639 (define_insn "and<mode>3"
 640   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
 641         (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
 642                    (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
 643   "TARGET_SIMD"
 644   {
 645     switch (which_alternative)
 646       {
 647       case 0:
 648         return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
 649       case 1:
 650         return aarch64_output_simd_mov_immediate (operands[2],
 651            <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), AARCH64_CHECK_BIC);
 652       default:
 653         gcc_unreachable ();
 654       }
 655   }
 656   [(set_attr "type" "neon_logic<q>")]
 657 )
 658
 659 ;; For ORR (vector, register) and ORR (vector, immediate)
 660 (define_insn "ior<mode>3"
 661   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
 662         (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
 663                    (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
 664   "TARGET_SIMD"
 665   {
 666     switch (which_alternative)
 667       {
 668       case 0:
 669         return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
 670       case 1:
 671         return aarch64_output_simd_mov_immediate (operands[2],
 672                 <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), AARCH64_CHECK_ORR);
 673       default:
 674         gcc_unreachable ();
 675       }
 676   }
 677   [(set_attr "type" "neon_logic<q>")]
 678 )
 679
 680 (define_insn "xor<mode>3"
 681   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 682         (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 683                  (match_operand:VDQ_I 2 "register_operand" "w")))]
 684   "TARGET_SIMD"
 685   "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
 686   [(set_attr "type" "neon_logic<q>")]
 687 )
 688
 689 (define_insn "one_cmpl<mode>2"
 690   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 691         (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 692   "TARGET_SIMD"
 693   "not\t%0.<Vbtype>, %1.<Vbtype>"
 694   [(set_attr "type" "neon_logic<q>")]
 695 )
 696
 697 (define_insn "aarch64_simd_vec_set<mode>"
 698   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
 699         (vec_merge:VDQ_BHSI
 700             (vec_duplicate:VDQ_BHSI
 701                 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
 702             (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
 703             (match_operand:SI 2 "immediate_operand" "i,i,i")))]
 704   "TARGET_SIMD"
 705   {
 706    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 707    operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
 708    switch (which_alternative)
 709      {
 710      case 0:
 711         return "ins\\t%0.<Vetype>[%p2], %w1";
 712      case 1:
 713         return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
 714      case 2:
 715         return "ld1\\t{%0.<Vetype>}[%p2], %1";
 716      default:
 717         gcc_unreachable ();
 718      }
 719   }
 720   [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_one_lane<q>")]
 721 )
 722
 723 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
 724   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
 725         (vec_merge:VALL_F16
 726             (vec_duplicate:VALL_F16
 727               (vec_select:<VEL>
 728                 (match_operand:VALL_F16 3 "register_operand" "w")
 729                 (parallel
 730                   [(match_operand:SI 4 "immediate_operand" "i")])))
 731             (match_operand:VALL_F16 1 "register_operand" "0")
 732             (match_operand:SI 2 "immediate_operand" "i")))]
 733   "TARGET_SIMD"
 734   {
 735     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 736     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
 737     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
 738
 739     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
 740   }
 741   [(set_attr "type" "neon_ins<q>")]
 742 )
 743
 744 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
 745   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
 746         (vec_merge:VALL_F16_NO_V2Q
 747             (vec_duplicate:VALL_F16_NO_V2Q
 748               (vec_select:<VEL>
 749                 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
 750                 (parallel
 751                   [(match_operand:SI 4 "immediate_operand" "i")])))
 752             (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
 753             (match_operand:SI 2 "immediate_operand" "i")))]
 754   "TARGET_SIMD"
 755   {
 756     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 757     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
 758     operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
 759                                            INTVAL (operands[4]));
 760
 761     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
 762   }
 763   [(set_attr "type" "neon_ins<q>")]
 764 )
 765
 766 (define_insn "aarch64_simd_lshr<mode>"
 767  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 768        (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 769                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
 770  "TARGET_SIMD"
 771  "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
 772   [(set_attr "type" "neon_shift_imm<q>")]
 773 )
 774
 775 (define_insn "aarch64_simd_ashr<mode>"
 776  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 777        (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 778                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
 779  "TARGET_SIMD"
 780  "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
 781   [(set_attr "type" "neon_shift_imm<q>")]
 782 )
 783
 784 (define_insn "aarch64_simd_imm_shl<mode>"
 785  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 786        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 787                    (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm" "Dl")))]
 788  "TARGET_SIMD"
 789   "shl\t%0.<Vtype>, %1.<Vtype>, %2"
 790   [(set_attr "type" "neon_shift_imm<q>")]
 791 )
 792
 793 (define_insn "aarch64_simd_reg_sshl<mode>"
 794  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 795        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 796                    (match_operand:VDQ_I 2 "register_operand" "w")))]
 797  "TARGET_SIMD"
 798  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 799   [(set_attr "type" "neon_shift_reg<q>")]
 800 )
 801
 802 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
 803  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 804        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
 805                     (match_operand:VDQ_I 2 "register_operand" "w")]
 806                    UNSPEC_ASHIFT_UNSIGNED))]
 807  "TARGET_SIMD"
 808  "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 809   [(set_attr "type" "neon_shift_reg<q>")]
 810 )
 811
 812 (define_insn "aarch64_simd_reg_shl<mode>_signed"
 813  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 814        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
 815                     (match_operand:VDQ_I 2 "register_operand" "w")]
 816                    UNSPEC_ASHIFT_SIGNED))]
 817  "TARGET_SIMD"
 818  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 819   [(set_attr "type" "neon_shift_reg<q>")]
 820 )
 821
 822 (define_expand "ashl<mode>3"
 823   [(match_operand:VDQ_I 0 "register_operand" "")
 824    (match_operand:VDQ_I 1 "register_operand" "")
 825    (match_operand:SI  2 "general_operand" "")]
 826  "TARGET_SIMD"
 827 {
 828   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
 829   int shift_amount;
 830
 831   if (CONST_INT_P (operands[2]))
 832     {
 833       shift_amount = INTVAL (operands[2]);
 834       if (shift_amount >= 0 && shift_amount < bit_width)
 835         {
 836           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 837                                                        shift_amount);
 838           emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
 839                                                      operands[1],
 840                                                      tmp));
 841           DONE;
 842         }
 843       else
 844         {
 845           operands[2] = force_reg (SImode, operands[2]);
 846         }
 847     }
 848   else if (MEM_P (operands[2]))
 849     {
 850       operands[2] = force_reg (SImode, operands[2]);
 851     }
 852
 853   if (REG_P (operands[2]))
 854     {
 855       rtx tmp = gen_reg_rtx (<MODE>mode);
 856       emit_insn (gen_aarch64_simd_dup<mode> (tmp,
 857                                              convert_to_mode (<VEL>mode,
 858                                                               operands[2],
 859                                                               0)));
 860       emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
 861                                                   tmp));
 862       DONE;
 863     }
 864   else
 865     FAIL;
 866 }
 867 )
 868
 869 (define_expand "lshr<mode>3"
 870   [(match_operand:VDQ_I 0 "register_operand" "")
 871    (match_operand:VDQ_I 1 "register_operand" "")
 872    (match_operand:SI  2 "general_operand" "")]
 873  "TARGET_SIMD"
 874 {
 875   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
 876   int shift_amount;
 877
 878   if (CONST_INT_P (operands[2]))
 879     {
 880       shift_amount = INTVAL (operands[2]);
 881       if (shift_amount > 0 && shift_amount <= bit_width)
 882         {
 883           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 884                                                        shift_amount);
 885           emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
 886                                                   operands[1],
 887                                                   tmp));
 888           DONE;
 889         }
 890       else
 891         operands[2] = force_reg (SImode, operands[2]);
 892     }
 893   else if (MEM_P (operands[2]))
 894     {
 895       operands[2] = force_reg (SImode, operands[2]);
 896     }
 897
 898   if (REG_P (operands[2]))
 899     {
 900       rtx tmp = gen_reg_rtx (SImode);
 901       rtx tmp1 = gen_reg_rtx (<MODE>mode);
 902       emit_insn (gen_negsi2 (tmp, operands[2]));
 903       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
 904                                              convert_to_mode (<VEL>mode,
 905                                                               tmp, 0)));
 906       emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
 907                                                           operands[1],
 908                                                           tmp1));
 909       DONE;
 910     }
 911   else
 912     FAIL;
 913 }
 914 )
 915
 916 (define_expand "ashr<mode>3"
 917   [(match_operand:VDQ_I 0 "register_operand" "")
 918    (match_operand:VDQ_I 1 "register_operand" "")
 919    (match_operand:SI  2 "general_operand" "")]
 920  "TARGET_SIMD"
 921 {
 922   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
 923   int shift_amount;
 924
 925   if (CONST_INT_P (operands[2]))
 926     {
 927       shift_amount = INTVAL (operands[2]);
 928       if (shift_amount > 0 && shift_amount <= bit_width)
 929         {
 930           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 931                                                        shift_amount);
 932           emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
 933                                                   operands[1],
 934                                                   tmp));
 935           DONE;
 936         }
 937       else
 938         operands[2] = force_reg (SImode, operands[2]);
 939     }
 940   else if (MEM_P (operands[2]))
 941     {
 942       operands[2] = force_reg (SImode, operands[2]);
 943     }
 944
 945   if (REG_P (operands[2]))
 946     {
 947       rtx tmp = gen_reg_rtx (SImode);
 948       rtx tmp1 = gen_reg_rtx (<MODE>mode);
 949       emit_insn (gen_negsi2 (tmp, operands[2]));
 950       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
 951                                              convert_to_mode (<VEL>mode,
 952                                                               tmp, 0)));
 953       emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
 954                                                         operands[1],
 955                                                         tmp1));
 956       DONE;
 957     }
 958   else
 959     FAIL;
 960 }
 961 )
 962
 963 (define_expand "vashl<mode>3"
 964  [(match_operand:VDQ_I 0 "register_operand" "")
 965   (match_operand:VDQ_I 1 "register_operand" "")
 966   (match_operand:VDQ_I 2 "register_operand" "")]
 967  "TARGET_SIMD"
 968 {
 969   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
 970                                               operands[2]));
 971   DONE;
 972 })
 973
 974 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
 975 ;; Negating individual lanes most certainly offsets the
 976 ;; gain from vectorization.
 977 (define_expand "vashr<mode>3"
 978  [(match_operand:VDQ_BHSI 0 "register_operand" "")
 979   (match_operand:VDQ_BHSI 1 "register_operand" "")
 980   (match_operand:VDQ_BHSI 2 "register_operand" "")]
 981  "TARGET_SIMD"
 982 {
 983   rtx neg = gen_reg_rtx (<MODE>mode);
 984   emit (gen_neg<mode>2 (neg, operands[2]));
 985   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
 986                                                     neg));
 987   DONE;
 988 })
 989
 990 ;; DI vector shift
 991 (define_expand "aarch64_ashr_simddi"
 992   [(match_operand:DI 0 "register_operand" "=w")
 993    (match_operand:DI 1 "register_operand" "w")
 994    (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
 995   "TARGET_SIMD"
 996   {
 997     /* An arithmetic shift right by 64 fills the result with copies of the sign
 998        bit, just like asr by 63 - however the standard pattern does not handle
 999        a shift by 64.  */
1000     if (INTVAL (operands[2]) == 64)
1001       operands[2] = GEN_INT (63);
1002     emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1003     DONE;
1004   }
1005 )
1006
1007 (define_expand "vlshr<mode>3"
1008  [(match_operand:VDQ_BHSI 0 "register_operand" "")
1009   (match_operand:VDQ_BHSI 1 "register_operand" "")
1010   (match_operand:VDQ_BHSI 2 "register_operand" "")]
1011  "TARGET_SIMD"
1012 {
1013   rtx neg = gen_reg_rtx (<MODE>mode);
1014   emit (gen_neg<mode>2 (neg, operands[2]));
1015   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1016                                                       neg));
1017   DONE;
1018 })
1019
1020 (define_expand "aarch64_lshr_simddi"
1021   [(match_operand:DI 0 "register_operand" "=w")
1022    (match_operand:DI 1 "register_operand" "w")
1023    (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1024   "TARGET_SIMD"
1025   {
1026     if (INTVAL (operands[2]) == 64)
1027       emit_move_insn (operands[0], const0_rtx);
1028     else
1029       emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1030     DONE;
1031   }
1032 )
1033
1034 (define_expand "vec_set<mode>"
1035   [(match_operand:VDQ_BHSI 0 "register_operand")
1036    (match_operand:<VEL> 1 "register_operand")
1037    (match_operand:SI 2 "immediate_operand")]
1038   "TARGET_SIMD"
1039   {
1040     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1041     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1042                                             GEN_INT (elem), operands[0]));
1043     DONE;
1044   }
1045 )
1046
1047 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1048 (define_insn "vec_shr_<mode>"
1049   [(set (match_operand:VD 0 "register_operand" "=w")
1050         (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1051                     (match_operand:SI 2 "immediate_operand" "i")]
1052                    UNSPEC_VEC_SHR))]
1053   "TARGET_SIMD"
1054   {
1055     if (BYTES_BIG_ENDIAN)
1056       return "shl %d0, %d1, %2";
1057     else
1058       return "ushr %d0, %d1, %2";
1059   }
1060   [(set_attr "type" "neon_shift_imm")]
1061 )
1062
1063 (define_insn "aarch64_simd_vec_setv2di"
1064   [(set (match_operand:V2DI 0 "register_operand" "=w,w")
1065         (vec_merge:V2DI
1066             (vec_duplicate:V2DI
1067                 (match_operand:DI 1 "register_operand" "r,w"))
1068             (match_operand:V2DI 3 "register_operand" "0,0")
1069             (match_operand:SI 2 "immediate_operand" "i,i")))]
1070   "TARGET_SIMD"
1071   {
1072     int elt = ENDIAN_LANE_N (2, exact_log2 (INTVAL (operands[2])));
1073     operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1074     switch (which_alternative)
1075       {
1076       case 0:
1077         return "ins\\t%0.d[%p2], %1";
1078       case 1:
1079         return "ins\\t%0.d[%p2], %1.d[0]";
1080       default:
1081         gcc_unreachable ();
1082       }
1083   }
1084   [(set_attr "type" "neon_from_gp, neon_ins_q")]
1085 )
1086
1087 (define_expand "vec_setv2di"
1088   [(match_operand:V2DI 0 "register_operand")
1089    (match_operand:DI 1 "register_operand")
1090    (match_operand:SI 2 "immediate_operand")]
1091   "TARGET_SIMD"
1092   {
1093     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1094     emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
1095                                           GEN_INT (elem), operands[0]));
1096     DONE;
1097   }
1098 )
1099
1100 (define_insn "aarch64_simd_vec_set<mode>"
1101   [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
1102         (vec_merge:VDQF_F16
1103             (vec_duplicate:VDQF_F16
1104                 (match_operand:<VEL> 1 "register_operand" "w"))
1105             (match_operand:VDQF_F16 3 "register_operand" "0")
1106             (match_operand:SI 2 "immediate_operand" "i")))]
1107   "TARGET_SIMD"
1108   {
1109     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1110
1111     operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
1112     return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
1113   }
1114   [(set_attr "type" "neon_ins<q>")]
1115 )
1116
1117 (define_expand "vec_set<mode>"
1118   [(match_operand:VDQF_F16 0 "register_operand" "+w")
1119    (match_operand:<VEL> 1 "register_operand" "w")
1120    (match_operand:SI 2 "immediate_operand" "")]
1121   "TARGET_SIMD"
1122   {
1123     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1124     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1125                                           GEN_INT (elem), operands[0]));
1126     DONE;
1127   }
1128 )
1129
1130
1131 (define_insn "aarch64_mla<mode>"
1132  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1133        (plus:VDQ_BHSI (mult:VDQ_BHSI
1134                         (match_operand:VDQ_BHSI 2 "register_operand" "w")
1135                         (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1136                       (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1137  "TARGET_SIMD"
1138  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1139   [(set_attr "type" "neon_mla_<Vetype><q>")]
1140 )
1141
1142 (define_insn "*aarch64_mla_elt<mode>"
1143  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1144        (plus:VDQHS
1145          (mult:VDQHS
1146            (vec_duplicate:VDQHS
1147               (vec_select:<VEL>
1148                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1149                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1150            (match_operand:VDQHS 3 "register_operand" "w"))
1151          (match_operand:VDQHS 4 "register_operand" "0")))]
1152  "TARGET_SIMD"
1153   {
1154     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1155     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1156   }
1157   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1158 )
1159
1160 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1161  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1162        (plus:VDQHS
1163          (mult:VDQHS
1164            (vec_duplicate:VDQHS
1165               (vec_select:<VEL>
1166                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1167                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1168            (match_operand:VDQHS 3 "register_operand" "w"))
1169          (match_operand:VDQHS 4 "register_operand" "0")))]
1170  "TARGET_SIMD"
1171   {
1172     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1173     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1174   }
1175   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1176 )
1177
1178 (define_insn "*aarch64_mla_elt_merge<mode>"
1179   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1180         (plus:VDQHS
1181           (mult:VDQHS (vec_duplicate:VDQHS
1182                   (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1183                 (match_operand:VDQHS 2 "register_operand" "w"))
1184           (match_operand:VDQHS 3 "register_operand" "0")))]
1185  "TARGET_SIMD"
1186  "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1187   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1188 )
1189
1190 (define_insn "aarch64_mls<mode>"
1191  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1192        (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1193                    (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1194                               (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1195  "TARGET_SIMD"
1196  "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1197   [(set_attr "type" "neon_mla_<Vetype><q>")]
1198 )
1199
1200 (define_insn "*aarch64_mls_elt<mode>"
1201  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1202        (minus:VDQHS
1203          (match_operand:VDQHS 4 "register_operand" "0")
1204          (mult:VDQHS
1205            (vec_duplicate:VDQHS
1206               (vec_select:<VEL>
1207                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1208                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1209            (match_operand:VDQHS 3 "register_operand" "w"))))]
1210  "TARGET_SIMD"
1211   {
1212     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1213     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1214   }
1215   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1216 )
1217
1218 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1219  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1220        (minus:VDQHS
1221          (match_operand:VDQHS 4 "register_operand" "0")
1222          (mult:VDQHS
1223            (vec_duplicate:VDQHS
1224               (vec_select:<VEL>
1225                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1226                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1227            (match_operand:VDQHS 3 "register_operand" "w"))))]
1228  "TARGET_SIMD"
1229   {
1230     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1231     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1232   }
1233   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1234 )
1235
1236 (define_insn "*aarch64_mls_elt_merge<mode>"
1237   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1238         (minus:VDQHS
1239           (match_operand:VDQHS 1 "register_operand" "0")
1240           (mult:VDQHS (vec_duplicate:VDQHS
1241                   (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1242                 (match_operand:VDQHS 3 "register_operand" "w"))))]
1243   "TARGET_SIMD"
1244   "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1245   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1246 )
1247
1248 ;; Max/Min operations.
1249 (define_insn "<su><maxmin><mode>3"
1250  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1251        (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1252                     (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1253  "TARGET_SIMD"
1254  "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1255   [(set_attr "type" "neon_minmax<q>")]
1256 )
1257
1258 (define_expand "<su><maxmin>v2di3"
1259  [(set (match_operand:V2DI 0 "register_operand" "")
1260        (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1261                     (match_operand:V2DI 2 "register_operand" "")))]
1262  "TARGET_SIMD"
1263 {
1264   enum rtx_code cmp_operator;
1265   rtx cmp_fmt;
1266
1267   switch (<CODE>)
1268     {
1269     case UMIN:
1270       cmp_operator = LTU;
1271       break;
1272     case SMIN:
1273       cmp_operator = LT;
1274       break;
1275     case UMAX:
1276       cmp_operator = GTU;
1277       break;
1278     case SMAX:
1279       cmp_operator = GT;
1280       break;
1281     default:
1282       gcc_unreachable ();
1283     }
1284
1285   cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1286   emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1287               operands[2], cmp_fmt, operands[1], operands[2]));
1288   DONE;
1289 })
1290
1291 ;; Pairwise Integer Max/Min operations.
1292 (define_insn "aarch64_<maxmin_uns>p<mode>"
1293  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1294        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1295                          (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1296                         MAXMINV))]
1297  "TARGET_SIMD"
1298  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1299   [(set_attr "type" "neon_minmax<q>")]
1300 )
1301
1302 ;; Pairwise FP Max/Min operations.
1303 (define_insn "aarch64_<maxmin_uns>p<mode>"
1304  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1305        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1306                       (match_operand:VHSDF 2 "register_operand" "w")]
1307                       FMAXMINV))]
1308  "TARGET_SIMD"
1309  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1310   [(set_attr "type" "neon_minmax<q>")]
1311 )
1312
1313 ;; vec_concat gives a new vector with the low elements from operand 1, and
1314 ;; the high elements from operand 2.  That is to say, given op1 = { a, b }
1315 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1316 ;; What that means, is that the RTL descriptions of the below patterns
1317 ;; need to change depending on endianness.
1318
1319 ;; Move to the low architectural bits of the register.
1320 ;; On little-endian this is { operand, zeroes }
1321 ;; On big-endian this is { zeroes, operand }
1322
1323 (define_insn "move_lo_quad_internal_<mode>"
1324   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1325         (vec_concat:VQ_NO2E
1326           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1327           (vec_duplicate:<VHALF> (const_int 0))))]
1328   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1329   "@
1330    dup\\t%d0, %1.d[0]
1331    fmov\\t%d0, %1
1332    dup\\t%d0, %1"
1333   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1334    (set_attr "simd" "yes,*,yes")
1335    (set_attr "fp" "*,yes,*")
1336    (set_attr "length" "4")]
1337 )
1338
1339 (define_insn "move_lo_quad_internal_<mode>"
1340   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1341         (vec_concat:VQ_2E
1342           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1343           (const_int 0)))]
1344   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1345   "@
1346    dup\\t%d0, %1.d[0]
1347    fmov\\t%d0, %1
1348    dup\\t%d0, %1"
1349   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1350    (set_attr "simd" "yes,*,yes")
1351    (set_attr "fp" "*,yes,*")
1352    (set_attr "length" "4")]
1353 )
1354
1355 (define_insn "move_lo_quad_internal_be_<mode>"
1356   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1357         (vec_concat:VQ_NO2E
1358           (vec_duplicate:<VHALF> (const_int 0))
1359           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1360   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1361   "@
1362    dup\\t%d0, %1.d[0]
1363    fmov\\t%d0, %1
1364    dup\\t%d0, %1"
1365   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1366    (set_attr "simd" "yes,*,yes")
1367    (set_attr "fp" "*,yes,*")
1368    (set_attr "length" "4")]
1369 )
1370
1371 (define_insn "move_lo_quad_internal_be_<mode>"
1372   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1373         (vec_concat:VQ_2E
1374           (const_int 0)
1375           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1376   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1377   "@
1378    dup\\t%d0, %1.d[0]
1379    fmov\\t%d0, %1
1380    dup\\t%d0, %1"
1381   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1382    (set_attr "simd" "yes,*,yes")
1383    (set_attr "fp" "*,yes,*")
1384    (set_attr "length" "4")]
1385 )
1386
1387 (define_expand "move_lo_quad_<mode>"
1388   [(match_operand:VQ 0 "register_operand")
1389    (match_operand:VQ 1 "register_operand")]
1390   "TARGET_SIMD"
1391 {
1392   if (BYTES_BIG_ENDIAN)
1393     emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1394   else
1395     emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1396   DONE;
1397 }
1398 )
1399
1400 ;; Move operand1 to the high architectural bits of the register, keeping
1401 ;; the low architectural bits of operand2.
1402 ;; For little-endian this is { operand2, operand1 }
1403 ;; For big-endian this is { operand1, operand2 }
1404
1405 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1406   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1407         (vec_concat:VQ
1408           (vec_select:<VHALF>
1409                 (match_dup 0)
1410                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1411           (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1412   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1413   "@
1414    ins\\t%0.d[1], %1.d[0]
1415    ins\\t%0.d[1], %1"
1416   [(set_attr "type" "neon_ins")]
1417 )
1418
1419 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1420   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1421         (vec_concat:VQ
1422           (match_operand:<VHALF> 1 "register_operand" "w,r")
1423           (vec_select:<VHALF>
1424                 (match_dup 0)
1425                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1426   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1427   "@
1428    ins\\t%0.d[1], %1.d[0]
1429    ins\\t%0.d[1], %1"
1430   [(set_attr "type" "neon_ins")]
1431 )
1432
1433 (define_expand "move_hi_quad_<mode>"
1434  [(match_operand:VQ 0 "register_operand" "")
1435   (match_operand:<VHALF> 1 "register_operand" "")]
1436  "TARGET_SIMD"
1437 {
1438   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1439   if (BYTES_BIG_ENDIAN)
1440     emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1441                     operands[1], p));
1442   else
1443     emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1444                     operands[1], p));
1445   DONE;
1446 })
1447
1448 ;; Narrowing operations.
1449
1450 ;; For doubles.
1451 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1452  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1453        (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1454  "TARGET_SIMD"
1455  "xtn\\t%0.<Vntype>, %1.<Vtype>"
1456   [(set_attr "type" "neon_shift_imm_narrow_q")]
1457 )
1458
1459 (define_expand "vec_pack_trunc_<mode>"
1460  [(match_operand:<VNARROWD> 0 "register_operand" "")
1461   (match_operand:VDN 1 "register_operand" "")
1462   (match_operand:VDN 2 "register_operand" "")]
1463  "TARGET_SIMD"
1464 {
1465   rtx tempreg = gen_reg_rtx (<VDBL>mode);
1466   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1467   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1468
1469   emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1470   emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1471   emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1472   DONE;
1473 })
1474
1475 ;; For quads.
1476
1477 (define_insn "vec_pack_trunc_<mode>"
1478  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1479        (vec_concat:<VNARROWQ2>
1480          (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1481          (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1482  "TARGET_SIMD"
1483  {
1484    if (BYTES_BIG_ENDIAN)
1485      return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1486    else
1487      return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1488  }
1489   [(set_attr "type" "multiple")
1490    (set_attr "length" "8")]
1491 )
1492
1493 ;; Widening operations.
1494
1495 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1496   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1497         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1498                                (match_operand:VQW 1 "register_operand" "w")
1499                                (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1500                             )))]
1501   "TARGET_SIMD"
1502   "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1503   [(set_attr "type" "neon_shift_imm_long")]
1504 )
1505
1506 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1507   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1508         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1509                                (match_operand:VQW 1 "register_operand" "w")
1510                                (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1511                             )))]
1512   "TARGET_SIMD"
1513   "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1514   [(set_attr "type" "neon_shift_imm_long")]
1515 )
1516
1517 (define_expand "vec_unpack<su>_hi_<mode>"
1518   [(match_operand:<VWIDE> 0 "register_operand" "")
1519    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1520   "TARGET_SIMD"
1521   {
1522     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1523     emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1524                                                           operands[1], p));
1525     DONE;
1526   }
1527 )
1528
1529 (define_expand "vec_unpack<su>_lo_<mode>"
1530   [(match_operand:<VWIDE> 0 "register_operand" "")
1531    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1532   "TARGET_SIMD"
1533   {
1534     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1535     emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1536                                                           operands[1], p));
1537     DONE;
1538   }
1539 )
1540
1541 ;; Widening arithmetic.
1542
1543 (define_insn "*aarch64_<su>mlal_lo<mode>"
1544   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1545         (plus:<VWIDE>
1546           (mult:<VWIDE>
1547               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1548                  (match_operand:VQW 2 "register_operand" "w")
1549                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1550               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1551                  (match_operand:VQW 4 "register_operand" "w")
1552                  (match_dup 3))))
1553           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1554   "TARGET_SIMD"
1555   "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1556   [(set_attr "type" "neon_mla_<Vetype>_long")]
1557 )
1558
1559 (define_insn "*aarch64_<su>mlal_hi<mode>"
1560   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1561         (plus:<VWIDE>
1562           (mult:<VWIDE>
1563               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1564                  (match_operand:VQW 2 "register_operand" "w")
1565                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1566               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1567                  (match_operand:VQW 4 "register_operand" "w")
1568                  (match_dup 3))))
1569           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1570   "TARGET_SIMD"
1571   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1572   [(set_attr "type" "neon_mla_<Vetype>_long")]
1573 )
1574
1575 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1576   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1577         (minus:<VWIDE>
1578           (match_operand:<VWIDE> 1 "register_operand" "0")
1579           (mult:<VWIDE>
1580               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1581                  (match_operand:VQW 2 "register_operand" "w")
1582                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1583               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1584                  (match_operand:VQW 4 "register_operand" "w")
1585                  (match_dup 3))))))]
1586   "TARGET_SIMD"
1587   "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1588   [(set_attr "type" "neon_mla_<Vetype>_long")]
1589 )
1590
1591 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1592   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1593         (minus:<VWIDE>
1594           (match_operand:<VWIDE> 1 "register_operand" "0")
1595           (mult:<VWIDE>
1596               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1597                  (match_operand:VQW 2 "register_operand" "w")
1598                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1599               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1600                  (match_operand:VQW 4 "register_operand" "w")
1601                  (match_dup 3))))))]
1602   "TARGET_SIMD"
1603   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1604   [(set_attr "type" "neon_mla_<Vetype>_long")]
1605 )
1606
1607 (define_insn "*aarch64_<su>mlal<mode>"
1608   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1609         (plus:<VWIDE>
1610           (mult:<VWIDE>
1611             (ANY_EXTEND:<VWIDE>
1612               (match_operand:VD_BHSI 1 "register_operand" "w"))
1613             (ANY_EXTEND:<VWIDE>
1614               (match_operand:VD_BHSI 2 "register_operand" "w")))
1615           (match_operand:<VWIDE> 3 "register_operand" "0")))]
1616   "TARGET_SIMD"
1617   "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1618   [(set_attr "type" "neon_mla_<Vetype>_long")]
1619 )
1620
1621 (define_insn "*aarch64_<su>mlsl<mode>"
1622   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1623         (minus:<VWIDE>
1624           (match_operand:<VWIDE> 1 "register_operand" "0")
1625           (mult:<VWIDE>
1626             (ANY_EXTEND:<VWIDE>
1627               (match_operand:VD_BHSI 2 "register_operand" "w"))
1628             (ANY_EXTEND:<VWIDE>
1629               (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1630   "TARGET_SIMD"
1631   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1632   [(set_attr "type" "neon_mla_<Vetype>_long")]
1633 )
1634
1635 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1636  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1637        (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1638                            (match_operand:VQW 1 "register_operand" "w")
1639                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1640                      (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1641                            (match_operand:VQW 2 "register_operand" "w")
1642                            (match_dup 3)))))]
1643   "TARGET_SIMD"
1644   "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1645   [(set_attr "type" "neon_mul_<Vetype>_long")]
1646 )
1647
1648 (define_expand "vec_widen_<su>mult_lo_<mode>"
1649   [(match_operand:<VWIDE> 0 "register_operand" "")
1650    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1651    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1652  "TARGET_SIMD"
1653  {
1654    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1655    emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1656                                                        operands[1],
1657                                                        operands[2], p));
1658    DONE;
1659  }
1660 )
1661
1662 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1663  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1664       (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1665                             (match_operand:VQW 1 "register_operand" "w")
1666                             (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1667                     (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1668                             (match_operand:VQW 2 "register_operand" "w")
1669                             (match_dup 3)))))]
1670   "TARGET_SIMD"
1671   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1672   [(set_attr "type" "neon_mul_<Vetype>_long")]
1673 )
1674
1675 (define_expand "vec_widen_<su>mult_hi_<mode>"
1676   [(match_operand:<VWIDE> 0 "register_operand" "")
1677    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1678    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1679  "TARGET_SIMD"
1680  {
1681    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1682    emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1683                                                        operands[1],
1684                                                        operands[2], p));
1685    DONE;
1686
1687  }
1688 )
1689
1690 ;; FP vector operations.
1691 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1692 ;; double-precision (64-bit) floating-point data types and arithmetic as
1693 ;; defined by the IEEE 754-2008 standard.  This makes them vectorizable
1694 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1695 ;;
1696 ;; Floating-point operations can raise an exception.  Vectorizing such
1697 ;; operations are safe because of reasons explained below.
1698 ;;
1699 ;; ARMv8 permits an extension to enable trapped floating-point
1700 ;; exception handling, however this is an optional feature.  In the
1701 ;; event of a floating-point exception being raised by vectorised
1702 ;; code then:
1703 ;; 1.  If trapped floating-point exceptions are available, then a trap
1704 ;;     will be taken when any lane raises an enabled exception.  A trap
1705 ;;     handler may determine which lane raised the exception.
1706 ;; 2.  Alternatively a sticky exception flag is set in the
1707 ;;     floating-point status register (FPSR).  Software may explicitly
1708 ;;     test the exception flags, in which case the tests will either
1709 ;;     prevent vectorisation, allowing precise identification of the
1710 ;;     failing operation, or if tested outside of vectorisable regions
1711 ;;     then the specific operation and lane are not of interest.
1712
1713 ;; FP arithmetic operations.
1714
1715 (define_insn "add<mode>3"
1716  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1717        (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1718                    (match_operand:VHSDF 2 "register_operand" "w")))]
1719  "TARGET_SIMD"
1720  "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1721   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1722 )
1723
1724 (define_insn "sub<mode>3"
1725  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1726        (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1727                     (match_operand:VHSDF 2 "register_operand" "w")))]
1728  "TARGET_SIMD"
1729  "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1730   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1731 )
1732
1733 (define_insn "mul<mode>3"
1734  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1735        (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1736                    (match_operand:VHSDF 2 "register_operand" "w")))]
1737  "TARGET_SIMD"
1738  "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1739   [(set_attr "type" "neon_fp_mul_<stype><q>")]
1740 )
1741
1742 (define_expand "div<mode>3"
1743  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1744        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1745                   (match_operand:VHSDF 2 "register_operand" "w")))]
1746  "TARGET_SIMD"
1747 {
1748   if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1749     DONE;
1750
1751   operands[1] = force_reg (<MODE>mode, operands[1]);
1752 })
1753
1754 (define_insn "*div<mode>3"
1755  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1756        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1757                  (match_operand:VHSDF 2 "register_operand" "w")))]
1758  "TARGET_SIMD"
1759  "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1760   [(set_attr "type" "neon_fp_div_<stype><q>")]
1761 )
1762
1763 (define_insn "neg<mode>2"
1764  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1765        (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1766  "TARGET_SIMD"
1767  "fneg\\t%0.<Vtype>, %1.<Vtype>"
1768   [(set_attr "type" "neon_fp_neg_<stype><q>")]
1769 )
1770
1771 (define_insn "abs<mode>2"
1772  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1773        (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1774  "TARGET_SIMD"
1775  "fabs\\t%0.<Vtype>, %1.<Vtype>"
1776   [(set_attr "type" "neon_fp_abs_<stype><q>")]
1777 )
1778
1779 (define_insn "fma<mode>4"
1780   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1781        (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1782                   (match_operand:VHSDF 2 "register_operand" "w")
1783                   (match_operand:VHSDF 3 "register_operand" "0")))]
1784   "TARGET_SIMD"
1785  "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1786   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1787 )
1788
1789 (define_insn "*aarch64_fma4_elt<mode>"
1790   [(set (match_operand:VDQF 0 "register_operand" "=w")
1791     (fma:VDQF
1792       (vec_duplicate:VDQF
1793         (vec_select:<VEL>
1794           (match_operand:VDQF 1 "register_operand" "<h_con>")
1795           (parallel [(match_operand:SI 2 "immediate_operand")])))
1796       (match_operand:VDQF 3 "register_operand" "w")
1797       (match_operand:VDQF 4 "register_operand" "0")))]
1798   "TARGET_SIMD"
1799   {
1800     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1801     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1802   }
1803   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1804 )
1805
1806 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1807   [(set (match_operand:VDQSF 0 "register_operand" "=w")
1808     (fma:VDQSF
1809       (vec_duplicate:VDQSF
1810         (vec_select:<VEL>
1811           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1812           (parallel [(match_operand:SI 2 "immediate_operand")])))
1813       (match_operand:VDQSF 3 "register_operand" "w")
1814       (match_operand:VDQSF 4 "register_operand" "0")))]
1815   "TARGET_SIMD"
1816   {
1817     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1818     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1819   }
1820   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1821 )
1822
1823 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1824   [(set (match_operand:VMUL 0 "register_operand" "=w")
1825     (fma:VMUL
1826       (vec_duplicate:VMUL
1827           (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1828       (match_operand:VMUL 2 "register_operand" "w")
1829       (match_operand:VMUL 3 "register_operand" "0")))]
1830   "TARGET_SIMD"
1831   "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1832   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1833 )
1834
1835 (define_insn "*aarch64_fma4_elt_to_64v2df"
1836   [(set (match_operand:DF 0 "register_operand" "=w")
1837     (fma:DF
1838         (vec_select:DF
1839           (match_operand:V2DF 1 "register_operand" "w")
1840           (parallel [(match_operand:SI 2 "immediate_operand")]))
1841       (match_operand:DF 3 "register_operand" "w")
1842       (match_operand:DF 4 "register_operand" "0")))]
1843   "TARGET_SIMD"
1844   {
1845     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1846     return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1847   }
1848   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1849 )
1850
1851 (define_insn "fnma<mode>4"
1852   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1853         (fma:VHSDF
1854           (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1855           (match_operand:VHSDF 2 "register_operand" "w")
1856           (match_operand:VHSDF 3 "register_operand" "0")))]
1857   "TARGET_SIMD"
1858   "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1859   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1860 )
1861
1862 (define_insn "*aarch64_fnma4_elt<mode>"
1863   [(set (match_operand:VDQF 0 "register_operand" "=w")
1864     (fma:VDQF
1865       (neg:VDQF
1866         (match_operand:VDQF 3 "register_operand" "w"))
1867       (vec_duplicate:VDQF
1868         (vec_select:<VEL>
1869           (match_operand:VDQF 1 "register_operand" "<h_con>")
1870           (parallel [(match_operand:SI 2 "immediate_operand")])))
1871       (match_operand:VDQF 4 "register_operand" "0")))]
1872   "TARGET_SIMD"
1873   {
1874     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1875     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1876   }
1877   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1878 )
1879
1880 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1881   [(set (match_operand:VDQSF 0 "register_operand" "=w")
1882     (fma:VDQSF
1883       (neg:VDQSF
1884         (match_operand:VDQSF 3 "register_operand" "w"))
1885       (vec_duplicate:VDQSF
1886         (vec_select:<VEL>
1887           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1888           (parallel [(match_operand:SI 2 "immediate_operand")])))
1889       (match_operand:VDQSF 4 "register_operand" "0")))]
1890   "TARGET_SIMD"
1891   {
1892     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1893     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1894   }
1895   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1896 )
1897
1898 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1899   [(set (match_operand:VMUL 0 "register_operand" "=w")
1900     (fma:VMUL
1901       (neg:VMUL
1902         (match_operand:VMUL 2 "register_operand" "w"))
1903       (vec_duplicate:VMUL
1904         (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1905       (match_operand:VMUL 3 "register_operand" "0")))]
1906   "TARGET_SIMD"
1907   "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1908   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1909 )
1910
1911 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1912   [(set (match_operand:DF 0 "register_operand" "=w")
1913     (fma:DF
1914       (vec_select:DF
1915         (match_operand:V2DF 1 "register_operand" "w")
1916         (parallel [(match_operand:SI 2 "immediate_operand")]))
1917       (neg:DF
1918         (match_operand:DF 3 "register_operand" "w"))
1919       (match_operand:DF 4 "register_operand" "0")))]
1920   "TARGET_SIMD"
1921   {
1922     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1923     return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1924   }
1925   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1926 )
1927
1928 ;; Vector versions of the floating-point frint patterns.
1929 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1930 (define_insn "<frint_pattern><mode>2"
1931   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1932         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1933                        FRINT))]
1934   "TARGET_SIMD"
1935   "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1936   [(set_attr "type" "neon_fp_round_<stype><q>")]
1937 )
1938
1939 ;; Vector versions of the fcvt standard patterns.
1940 ;; Expands to lbtrunc, lround, lceil, lfloor
1941 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1942   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1943         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1944                                [(match_operand:VHSDF 1 "register_operand" "w")]
1945                                FCVT)))]
1946   "TARGET_SIMD"
1947   "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1948   [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1949 )
1950
1951 ;; HF Scalar variants of related SIMD instructions.
1952 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1953   [(set (match_operand:HI 0 "register_operand" "=w")
1954         (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1955                       FCVT)))]
1956   "TARGET_SIMD_F16INST"
1957   "fcvt<frint_suffix><su>\t%h0, %h1"
1958   [(set_attr "type" "neon_fp_to_int_s")]
1959 )
1960
1961 (define_insn "<optab>_trunchfhi2"
1962   [(set (match_operand:HI 0 "register_operand" "=w")
1963         (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1964   "TARGET_SIMD_F16INST"
1965   "fcvtz<su>\t%h0, %h1"
1966   [(set_attr "type" "neon_fp_to_int_s")]
1967 )
1968
1969 (define_insn "<optab>hihf2"
1970   [(set (match_operand:HF 0 "register_operand" "=w")
1971         (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1972   "TARGET_SIMD_F16INST"
1973   "<su_optab>cvtf\t%h0, %h1"
1974   [(set_attr "type" "neon_int_to_fp_s")]
1975 )
1976
1977 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1978   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1979         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1980                                [(mult:VDQF
1981          (match_operand:VDQF 1 "register_operand" "w")
1982          (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1983                                UNSPEC_FRINTZ)))]
1984   "TARGET_SIMD
1985    && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1986                 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1987   {
1988     int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1989     char buf[64];
1990     snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1991     output_asm_insn (buf, operands);
1992     return "";
1993   }
1994   [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1995 )
1996
1997 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
1998   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1999         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2000                                [(match_operand:VHSDF 1 "register_operand")]
2001                                 UNSPEC_FRINTZ)))]
2002   "TARGET_SIMD"
2003   {})
2004
2005 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2006   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2007         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2008                                [(match_operand:VHSDF 1 "register_operand")]
2009                                 UNSPEC_FRINTZ)))]
2010   "TARGET_SIMD"
2011   {})
2012
2013 (define_expand "ftrunc<VHSDF:mode>2"
2014   [(set (match_operand:VHSDF 0 "register_operand")
2015         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2016                        UNSPEC_FRINTZ))]
2017   "TARGET_SIMD"
2018   {})
2019
2020 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2021   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2022         (FLOATUORS:VHSDF
2023           (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2024   "TARGET_SIMD"
2025   "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2026   [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2027 )
2028
2029 ;; Conversions between vectors of floats and doubles.
2030 ;; Contains a mix of patterns to match standard pattern names
2031 ;; and those for intrinsics.
2032
2033 ;; Float widening operations.
2034
2035 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2036   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2037         (float_extend:<VWIDE> (vec_select:<VHALF>
2038                                (match_operand:VQ_HSF 1 "register_operand" "w")
2039                                (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2040                             )))]
2041   "TARGET_SIMD"
2042   "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2043   [(set_attr "type" "neon_fp_cvt_widen_s")]
2044 )
2045
2046 ;; Convert between fixed-point and floating-point (vector modes)
2047
2048 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2049   [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2050         (unspec:<VHSDF:FCVT_TARGET>
2051           [(match_operand:VHSDF 1 "register_operand" "w")
2052            (match_operand:SI 2 "immediate_operand" "i")]
2053          FCVT_F2FIXED))]
2054   "TARGET_SIMD"
2055   "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2056   [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2057 )
2058
2059 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2060   [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2061         (unspec:<VDQ_HSDI:FCVT_TARGET>
2062           [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2063            (match_operand:SI 2 "immediate_operand" "i")]
2064          FCVT_FIXED2F))]
2065   "TARGET_SIMD"
2066   "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2067   [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2068 )
2069
2070 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2071 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2072 ;; the meaning of HI and LO changes depending on the target endianness.
2073 ;; While elsewhere we map the higher numbered elements of a vector to
2074 ;; the lower architectural lanes of the vector, for these patterns we want
2075 ;; to always treat "hi" as referring to the higher architectural lanes.
2076 ;; Consequently, while the patterns below look inconsistent with our
2077 ;; other big-endian patterns their behavior is as required.
2078
2079 (define_expand "vec_unpacks_lo_<mode>"
2080   [(match_operand:<VWIDE> 0 "register_operand" "")
2081    (match_operand:VQ_HSF 1 "register_operand" "")]
2082   "TARGET_SIMD"
2083   {
2084     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2085     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2086                                                        operands[1], p));
2087     DONE;
2088   }
2089 )
2090
2091 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2092   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2093         (float_extend:<VWIDE> (vec_select:<VHALF>
2094                                (match_operand:VQ_HSF 1 "register_operand" "w")
2095                                (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2096                             )))]
2097   "TARGET_SIMD"
2098   "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2099   [(set_attr "type" "neon_fp_cvt_widen_s")]
2100 )
2101
2102 (define_expand "vec_unpacks_hi_<mode>"
2103   [(match_operand:<VWIDE> 0 "register_operand" "")
2104    (match_operand:VQ_HSF 1 "register_operand" "")]
2105   "TARGET_SIMD"
2106   {
2107     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2108     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2109                                                        operands[1], p));
2110     DONE;
2111   }
2112 )
2113 (define_insn "aarch64_float_extend_lo_<Vwide>"
2114   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2115         (float_extend:<VWIDE>
2116           (match_operand:VDF 1 "register_operand" "w")))]
2117   "TARGET_SIMD"
2118   "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2119   [(set_attr "type" "neon_fp_cvt_widen_s")]
2120 )
2121
2122 ;; Float narrowing operations.
2123
2124 (define_insn "aarch64_float_truncate_lo_<mode>"
2125   [(set (match_operand:VDF 0 "register_operand" "=w")
2126       (float_truncate:VDF
2127         (match_operand:<VWIDE> 1 "register_operand" "w")))]
2128   "TARGET_SIMD"
2129   "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2130   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2131 )
2132
2133 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2134   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2135     (vec_concat:<VDBL>
2136       (match_operand:VDF 1 "register_operand" "0")
2137       (float_truncate:VDF
2138         (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2139   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2140   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2141   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2142 )
2143
2144 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2145   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2146     (vec_concat:<VDBL>
2147       (float_truncate:VDF
2148         (match_operand:<VWIDE> 2 "register_operand" "w"))
2149       (match_operand:VDF 1 "register_operand" "0")))]
2150   "TARGET_SIMD && BYTES_BIG_ENDIAN"
2151   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2152   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2153 )
2154
2155 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2156   [(match_operand:<VDBL> 0 "register_operand" "=w")
2157    (match_operand:VDF 1 "register_operand" "0")
2158    (match_operand:<VWIDE> 2 "register_operand" "w")]
2159   "TARGET_SIMD"
2160 {
2161   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2162                              ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2163                              : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2164   emit_insn (gen (operands[0], operands[1], operands[2]));
2165   DONE;
2166 }
2167 )
2168
2169 (define_expand "vec_pack_trunc_v2df"
2170   [(set (match_operand:V4SF 0 "register_operand")
2171       (vec_concat:V4SF
2172         (float_truncate:V2SF
2173             (match_operand:V2DF 1 "register_operand"))
2174         (float_truncate:V2SF
2175             (match_operand:V2DF 2 "register_operand"))
2176           ))]
2177   "TARGET_SIMD"
2178   {
2179     rtx tmp = gen_reg_rtx (V2SFmode);
2180     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2181     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2182
2183     emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2184     emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2185                                                    tmp, operands[hi]));
2186     DONE;
2187   }
2188 )
2189
2190 (define_expand "vec_pack_trunc_df"
2191   [(set (match_operand:V2SF 0 "register_operand")
2192       (vec_concat:V2SF
2193         (float_truncate:SF
2194             (match_operand:DF 1 "register_operand"))
2195         (float_truncate:SF
2196             (match_operand:DF 2 "register_operand"))
2197           ))]
2198   "TARGET_SIMD"
2199   {
2200     rtx tmp = gen_reg_rtx (V2SFmode);
2201     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2202     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2203
2204     emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2205     emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2206     emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2207     DONE;
2208   }
2209 )
2210
2211 ;; FP Max/Min
2212 ;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
2213 ;; expression like:
2214 ;;      a = (b < c) ? b : c;
2215 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
2216 ;; either explicitly or indirectly via -ffast-math.
2217 ;;
2218 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2219 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2220 ;; operand will be returned when both operands are zero (i.e. they may not
2221 ;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
2222 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2223 ;; NaNs.
2224
2225 (define_insn "<su><maxmin><mode>3"
2226   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2227         (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2228                        (match_operand:VHSDF 2 "register_operand" "w")))]
2229   "TARGET_SIMD"
2230   "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2231   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2232 )
2233
2234 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2235 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2236 ;; which implement the IEEE fmax ()/fmin () functions.
2237 (define_insn "<maxmin_uns><mode>3"
2238   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2239        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2240                       (match_operand:VHSDF 2 "register_operand" "w")]
2241                       FMAXMIN_UNS))]
2242   "TARGET_SIMD"
2243   "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2244   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2245 )
2246
2247 ;; 'across lanes' add.
2248
2249 (define_expand "reduc_plus_scal_<mode>"
2250   [(match_operand:<VEL> 0 "register_operand" "=w")
2251    (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2252                UNSPEC_ADDV)]
2253   "TARGET_SIMD"
2254   {
2255     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2256     rtx scratch = gen_reg_rtx (<MODE>mode);
2257     emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2258     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2259     DONE;
2260   }
2261 )
2262
2263 (define_insn "aarch64_faddp<mode>"
2264  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2265        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2266                       (match_operand:VHSDF 2 "register_operand" "w")]
2267         UNSPEC_FADDV))]
2268  "TARGET_SIMD"
2269  "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2270   [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2271 )
2272
2273 (define_insn "aarch64_reduc_plus_internal<mode>"
2274  [(set (match_operand:VDQV 0 "register_operand" "=w")
2275        (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2276                     UNSPEC_ADDV))]
2277  "TARGET_SIMD"
2278  "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2279   [(set_attr "type" "neon_reduc_add<q>")]
2280 )
2281
2282 (define_insn "aarch64_reduc_plus_internalv2si"
2283  [(set (match_operand:V2SI 0 "register_operand" "=w")
2284        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2285                     UNSPEC_ADDV))]
2286  "TARGET_SIMD"
2287  "addp\\t%0.2s, %1.2s, %1.2s"
2288   [(set_attr "type" "neon_reduc_add")]
2289 )
2290
2291 (define_insn "reduc_plus_scal_<mode>"
2292  [(set (match_operand:<VEL> 0 "register_operand" "=w")
2293        (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2294                    UNSPEC_FADDV))]
2295  "TARGET_SIMD"
2296  "faddp\\t%<Vetype>0, %1.<Vtype>"
2297   [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2298 )
2299
2300 (define_expand "reduc_plus_scal_v4sf"
2301  [(set (match_operand:SF 0 "register_operand")
2302        (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2303                     UNSPEC_FADDV))]
2304  "TARGET_SIMD"
2305 {
2306   rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2307   rtx scratch = gen_reg_rtx (V4SFmode);
2308   emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2309   emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2310   emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2311   DONE;
2312 })
2313
2314 (define_insn "clrsb<mode>2"
2315   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2316         (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2317   "TARGET_SIMD"
2318   "cls\\t%0.<Vtype>, %1.<Vtype>"
2319   [(set_attr "type" "neon_cls<q>")]
2320 )
2321
2322 (define_insn "clz<mode>2"
2323  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2324        (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2325  "TARGET_SIMD"
2326  "clz\\t%0.<Vtype>, %1.<Vtype>"
2327   [(set_attr "type" "neon_cls<q>")]
2328 )
2329
2330 (define_insn "popcount<mode>2"
2331   [(set (match_operand:VB 0 "register_operand" "=w")
2332         (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2333   "TARGET_SIMD"
2334   "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2335   [(set_attr "type" "neon_cnt<q>")]
2336 )
2337
2338 ;; 'across lanes' max and min ops.
2339
2340 ;; Template for outputting a scalar, so we can create __builtins which can be
2341 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function.  (This is FP smax/smin).
2342 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2343   [(match_operand:<VEL> 0 "register_operand")
2344    (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2345                   FMAXMINV)]
2346   "TARGET_SIMD"
2347   {
2348     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2349     rtx scratch = gen_reg_rtx (<MODE>mode);
2350     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2351                                                               operands[1]));
2352     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2353     DONE;
2354   }
2355 )
2356
2357 ;; Likewise for integer cases, signed and unsigned.
2358 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2359   [(match_operand:<VEL> 0 "register_operand")
2360    (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2361                     MAXMINV)]
2362   "TARGET_SIMD"
2363   {
2364     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2365     rtx scratch = gen_reg_rtx (<MODE>mode);
2366     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2367                                                               operands[1]));
2368     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2369     DONE;
2370   }
2371 )
2372
2373 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2374  [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2375        (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2376                     MAXMINV))]
2377  "TARGET_SIMD"
2378  "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2379   [(set_attr "type" "neon_reduc_minmax<q>")]
2380 )
2381
2382 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2383  [(set (match_operand:V2SI 0 "register_operand" "=w")
2384        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2385                     MAXMINV))]
2386  "TARGET_SIMD"
2387  "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2388   [(set_attr "type" "neon_reduc_minmax")]
2389 )
2390
2391 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2392  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2393        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2394                       FMAXMINV))]
2395  "TARGET_SIMD"
2396  "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2397   [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2398 )
2399
2400 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2401 ;; allocation.
2402 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2403 ;; to select.
2404 ;;
2405 ;; Thus our BSL is of the form:
2406 ;;   op0 = bsl (mask, op2, op3)
2407 ;; We can use any of:
2408 ;;
2409 ;;   if (op0 = mask)
2410 ;;     bsl mask, op1, op2
2411 ;;   if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2412 ;;     bit op0, op2, mask
2413 ;;   if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2414 ;;     bif op0, op1, mask
2415 ;;
2416 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2417 ;; Some forms of straight-line code may generate the equivalent form
2418 ;; in *aarch64_simd_bsl<mode>_alt.
2419
2420 (define_insn "aarch64_simd_bsl<mode>_internal"
2421   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2422         (xor:VDQ_I
2423            (and:VDQ_I
2424              (xor:VDQ_I
2425                (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2426                (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2427              (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2428           (match_dup:<V_INT_EQUIV> 3)
2429         ))]
2430   "TARGET_SIMD"
2431   "@
2432   bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2433   bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2434   bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2435   [(set_attr "type" "neon_bsl<q>")]
2436 )
2437
2438 ;; We need this form in addition to the above pattern to match the case
2439 ;; when combine tries merging three insns such that the second operand of
2440 ;; the outer XOR matches the second operand of the inner XOR rather than
2441 ;; the first.  The two are equivalent but since recog doesn't try all
2442 ;; permutations of commutative operations, we have to have a separate pattern.
2443
2444 (define_insn "*aarch64_simd_bsl<mode>_alt"
2445   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2446         (xor:VDQ_I
2447            (and:VDQ_I
2448              (xor:VDQ_I
2449                (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2450                (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2451               (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2452           (match_dup:<V_INT_EQUIV> 2)))]
2453   "TARGET_SIMD"
2454   "@
2455   bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2456   bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2457   bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2458   [(set_attr "type" "neon_bsl<q>")]
2459 )
2460
2461 ;; DImode is special, we want to avoid computing operations which are
2462 ;; more naturally computed in general purpose registers in the vector
2463 ;; registers.  If we do that, we need to move all three operands from general
2464 ;; purpose registers to vector registers, then back again.  However, we
2465 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2466 ;; optimizations based on the component operations of a BSL.
2467 ;;
2468 ;; That means we need a splitter back to the individual operations, if they
2469 ;; would be better calculated on the integer side.
2470
2471 (define_insn_and_split "aarch64_simd_bsldi_internal"
2472   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2473         (xor:DI
2474            (and:DI
2475              (xor:DI
2476                (match_operand:DI 3 "register_operand" "w,0,w,r")
2477                (match_operand:DI 2 "register_operand" "w,w,0,r"))
2478              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2479           (match_dup:DI 3)
2480         ))]
2481   "TARGET_SIMD"
2482   "@
2483   bsl\\t%0.8b, %2.8b, %3.8b
2484   bit\\t%0.8b, %2.8b, %1.8b
2485   bif\\t%0.8b, %3.8b, %1.8b
2486   #"
2487   "&& GP_REGNUM_P (REGNO (operands[0]))"
2488   [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2489 {
2490   /* Split back to individual operations.  If we're before reload, and
2491      able to create a temporary register, do so.  If we're after reload,
2492      we've got an early-clobber destination register, so use that.
2493      Otherwise, we can't create pseudos and we can't yet guarantee that
2494      operands[0] is safe to write, so FAIL to split.  */
2495
2496   rtx scratch;
2497   if (reload_completed)
2498     scratch = operands[0];
2499   else if (can_create_pseudo_p ())
2500     scratch = gen_reg_rtx (DImode);
2501   else
2502     FAIL;
2503
2504   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2505   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2506   emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2507   DONE;
2508 }
2509   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2510    (set_attr "length" "4,4,4,12")]
2511 )
2512
2513 (define_insn_and_split "aarch64_simd_bsldi_alt"
2514   [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2515         (xor:DI
2516            (and:DI
2517              (xor:DI
2518                (match_operand:DI 3 "register_operand" "w,w,0,r")
2519                (match_operand:DI 2 "register_operand" "w,0,w,r"))
2520              (match_operand:DI 1 "register_operand" "0,w,w,r"))
2521           (match_dup:DI 2)
2522         ))]
2523   "TARGET_SIMD"
2524   "@
2525   bsl\\t%0.8b, %3.8b, %2.8b
2526   bit\\t%0.8b, %3.8b, %1.8b
2527   bif\\t%0.8b, %2.8b, %1.8b
2528   #"
2529   "&& GP_REGNUM_P (REGNO (operands[0]))"
2530   [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2531 {
2532   /* Split back to individual operations.  If we're before reload, and
2533      able to create a temporary register, do so.  If we're after reload,
2534      we've got an early-clobber destination register, so use that.
2535      Otherwise, we can't create pseudos and we can't yet guarantee that
2536      operands[0] is safe to write, so FAIL to split.  */
2537
2538   rtx scratch;
2539   if (reload_completed)
2540     scratch = operands[0];
2541   else if (can_create_pseudo_p ())
2542     scratch = gen_reg_rtx (DImode);
2543   else
2544     FAIL;
2545
2546   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2547   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2548   emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2549   DONE;
2550 }
2551   [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2552    (set_attr "length" "4,4,4,12")]
2553 )
2554
2555 (define_expand "aarch64_simd_bsl<mode>"
2556   [(match_operand:VALLDIF 0 "register_operand")
2557    (match_operand:<V_INT_EQUIV> 1 "register_operand")
2558    (match_operand:VALLDIF 2 "register_operand")
2559    (match_operand:VALLDIF 3 "register_operand")]
2560  "TARGET_SIMD"
2561 {
2562   /* We can't alias operands together if they have different modes.  */
2563   rtx tmp = operands[0];
2564   if (FLOAT_MODE_P (<MODE>mode))
2565     {
2566       operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2567       operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2568       tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2569     }
2570   operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2571   emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2572                                                          operands[1],
2573                                                          operands[2],
2574                                                          operands[3]));
2575   if (tmp != operands[0])
2576     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2577
2578   DONE;
2579 })
2580
2581 (define_expand "vcond_mask_<mode><v_int_equiv>"
2582   [(match_operand:VALLDI 0 "register_operand")
2583    (match_operand:VALLDI 1 "nonmemory_operand")
2584    (match_operand:VALLDI 2 "nonmemory_operand")
2585    (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2586   "TARGET_SIMD"
2587 {
2588   /* If we have (a = (P) ? -1 : 0);
2589      Then we can simply move the generated mask (result must be int).  */
2590   if (operands[1] == CONSTM1_RTX (<MODE>mode)
2591       && operands[2] == CONST0_RTX (<MODE>mode))
2592     emit_move_insn (operands[0], operands[3]);
2593   /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask.  */
2594   else if (operands[1] == CONST0_RTX (<MODE>mode)
2595            && operands[2] == CONSTM1_RTX (<MODE>mode))
2596     emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2597   else
2598     {
2599       if (!REG_P (operands[1]))
2600         operands[1] = force_reg (<MODE>mode, operands[1]);
2601       if (!REG_P (operands[2]))
2602         operands[2] = force_reg (<MODE>mode, operands[2]);
2603       emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2604                                              operands[1], operands[2]));
2605     }
2606
2607   DONE;
2608 })
2609
2610 ;; Patterns comparing two vectors to produce a mask.
2611
2612 (define_expand "vec_cmp<mode><mode>"
2613   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2614           (match_operator 1 "comparison_operator"
2615             [(match_operand:VSDQ_I_DI 2 "register_operand")
2616              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2617   "TARGET_SIMD"
2618 {
2619   rtx mask = operands[0];
2620   enum rtx_code code = GET_CODE (operands[1]);
2621
2622   switch (code)
2623     {
2624     case NE:
2625     case LE:
2626     case LT:
2627     case GE:
2628     case GT:
2629     case EQ:
2630       if (operands[3] == CONST0_RTX (<MODE>mode))
2631         break;
2632
2633       /* Fall through.  */
2634     default:
2635       if (!REG_P (operands[3]))
2636         operands[3] = force_reg (<MODE>mode, operands[3]);
2637
2638       break;
2639     }
2640
2641   switch (code)
2642     {
2643     case LT:
2644       emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2645       break;
2646
2647     case GE:
2648       emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2649       break;
2650
2651     case LE:
2652       emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2653       break;
2654
2655     case GT:
2656       emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2657       break;
2658
2659     case LTU:
2660       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2661       break;
2662
2663     case GEU:
2664       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2665       break;
2666
2667     case LEU:
2668       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2669       break;
2670
2671     case GTU:
2672       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2673       break;
2674
2675     case NE:
2676       /* Handle NE as !EQ.  */
2677       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2678       emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2679       break;
2680
2681     case EQ:
2682       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2683       break;
2684
2685     default:
2686       gcc_unreachable ();
2687     }
2688
2689   DONE;
2690 })
2691
2692 (define_expand "vec_cmp<mode><v_int_equiv>"
2693   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2694         (match_operator 1 "comparison_operator"
2695             [(match_operand:VDQF 2 "register_operand")
2696              (match_operand:VDQF 3 "nonmemory_operand")]))]
2697   "TARGET_SIMD"
2698 {
2699   int use_zero_form = 0;
2700   enum rtx_code code = GET_CODE (operands[1]);
2701   rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2702
2703   rtx (*comparison) (rtx, rtx, rtx) = NULL;
2704
2705   switch (code)
2706     {
2707     case LE:
2708     case LT:
2709     case GE:
2710     case GT:
2711     case EQ:
2712       if (operands[3] == CONST0_RTX (<MODE>mode))
2713         {
2714           use_zero_form = 1;
2715           break;
2716         }
2717       /* Fall through.  */
2718     default:
2719       if (!REG_P (operands[3]))
2720         operands[3] = force_reg (<MODE>mode, operands[3]);
2721
2722       break;
2723     }
2724
2725   switch (code)
2726     {
2727     case LT:
2728       if (use_zero_form)
2729         {
2730           comparison = gen_aarch64_cmlt<mode>;
2731           break;
2732         }
2733       /* Fall through.  */
2734     case UNGE:
2735       std::swap (operands[2], operands[3]);
2736       /* Fall through.  */
2737     case UNLE:
2738     case GT:
2739       comparison = gen_aarch64_cmgt<mode>;
2740       break;
2741     case LE:
2742       if (use_zero_form)
2743         {
2744           comparison = gen_aarch64_cmle<mode>;
2745           break;
2746         }
2747       /* Fall through.  */
2748     case UNGT:
2749       std::swap (operands[2], operands[3]);
2750       /* Fall through.  */
2751     case UNLT:
2752     case GE:
2753       comparison = gen_aarch64_cmge<mode>;
2754       break;
2755     case NE:
2756     case EQ:
2757       comparison = gen_aarch64_cmeq<mode>;
2758       break;
2759     case UNEQ:
2760     case ORDERED:
2761     case UNORDERED:
2762       break;
2763     default:
2764       gcc_unreachable ();
2765     }
2766
2767   switch (code)
2768     {
2769     case UNGE:
2770     case UNGT:
2771     case UNLE:
2772     case UNLT:
2773     case NE:
2774       /* FCM returns false for lanes which are unordered, so if we use
2775          the inverse of the comparison we actually want to emit, then
2776          invert the result, we will end up with the correct result.
2777          Note that a NE NaN and NaN NE b are true for all a, b.
2778
2779          Our transformations are:
2780          a UNGE b -> !(b GT a)
2781          a UNGT b -> !(b GE a)
2782          a UNLE b -> !(a GT b)
2783          a UNLT b -> !(a GE b)
2784          a   NE b -> !(a EQ b)  */
2785       gcc_assert (comparison != NULL);
2786       emit_insn (comparison (operands[0], operands[2], operands[3]));
2787       emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2788       break;
2789
2790     case LT:
2791     case LE:
2792     case GT:
2793     case GE:
2794     case EQ:
2795       /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
2796          As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
2797          a GE b -> a GE b
2798          a GT b -> a GT b
2799          a LE b -> b GE a
2800          a LT b -> b GT a
2801          a EQ b -> a EQ b  */
2802       gcc_assert (comparison != NULL);
2803       emit_insn (comparison (operands[0], operands[2], operands[3]));
2804       break;
2805
2806     case UNEQ:
2807       /* We first check (a > b ||  b > a) which is !UNEQ, inverting
2808          this result will then give us (a == b || a UNORDERED b).  */
2809       emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2810                                          operands[2], operands[3]));
2811       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2812       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2813       emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2814       break;
2815
2816     case UNORDERED:
2817       /* Operands are ORDERED iff (a > b || b >= a), so we can compute
2818          UNORDERED as !ORDERED.  */
2819       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2820       emit_insn (gen_aarch64_cmge<mode> (operands[0],
2821                                          operands[3], operands[2]));
2822       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2823       emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2824       break;
2825
2826     case ORDERED:
2827       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2828       emit_insn (gen_aarch64_cmge<mode> (operands[0],
2829                                          operands[3], operands[2]));
2830       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2831       break;
2832
2833     default:
2834       gcc_unreachable ();
2835     }
2836
2837   DONE;
2838 })
2839
2840 (define_expand "vec_cmpu<mode><mode>"
2841   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2842           (match_operator 1 "comparison_operator"
2843             [(match_operand:VSDQ_I_DI 2 "register_operand")
2844              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2845   "TARGET_SIMD"
2846 {
2847   emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2848                                       operands[2], operands[3]));
2849   DONE;
2850 })
2851
2852 (define_expand "vcond<mode><mode>"
2853   [(set (match_operand:VALLDI 0 "register_operand")
2854         (if_then_else:VALLDI
2855           (match_operator 3 "comparison_operator"
2856             [(match_operand:VALLDI 4 "register_operand")
2857              (match_operand:VALLDI 5 "nonmemory_operand")])
2858           (match_operand:VALLDI 1 "nonmemory_operand")
2859           (match_operand:VALLDI 2 "nonmemory_operand")))]
2860   "TARGET_SIMD"
2861 {
2862   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2863   enum rtx_code code = GET_CODE (operands[3]);
2864
2865   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2866      it as well as switch operands 1/2 in order to avoid the additional
2867      NOT instruction.  */
2868   if (code == NE)
2869     {
2870       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2871                                     operands[4], operands[5]);
2872       std::swap (operands[1], operands[2]);
2873     }
2874   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2875                                              operands[4], operands[5]));
2876   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2877                                                  operands[2], mask));
2878
2879   DONE;
2880 })
2881
2882 (define_expand "vcond<v_cmp_mixed><mode>"
2883   [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2884         (if_then_else:<V_cmp_mixed>
2885           (match_operator 3 "comparison_operator"
2886             [(match_operand:VDQF_COND 4 "register_operand")
2887              (match_operand:VDQF_COND 5 "nonmemory_operand")])
2888           (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2889           (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2890   "TARGET_SIMD"
2891 {
2892   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2893   enum rtx_code code = GET_CODE (operands[3]);
2894
2895   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2896      it as well as switch operands 1/2 in order to avoid the additional
2897      NOT instruction.  */
2898   if (code == NE)
2899     {
2900       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2901                                     operands[4], operands[5]);
2902       std::swap (operands[1], operands[2]);
2903     }
2904   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2905                                              operands[4], operands[5]));
2906   emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
2907                                                 operands[0], operands[1],
2908                                                 operands[2], mask));
2909
2910   DONE;
2911 })
2912
2913 (define_expand "vcondu<mode><mode>"
2914   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2915         (if_then_else:VSDQ_I_DI
2916           (match_operator 3 "comparison_operator"
2917             [(match_operand:VSDQ_I_DI 4 "register_operand")
2918              (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2919           (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2920           (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2921   "TARGET_SIMD"
2922 {
2923   rtx mask = gen_reg_rtx (<MODE>mode);
2924   enum rtx_code code = GET_CODE (operands[3]);
2925
2926   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2927      it as well as switch operands 1/2 in order to avoid the additional
2928      NOT instruction.  */
2929   if (code == NE)
2930     {
2931       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2932                                     operands[4], operands[5]);
2933       std::swap (operands[1], operands[2]);
2934     }
2935   emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2936                                       operands[4], operands[5]));
2937   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2938                                                  operands[2], mask));
2939   DONE;
2940 })
2941
2942 (define_expand "vcondu<mode><v_cmp_mixed>"
2943   [(set (match_operand:VDQF 0 "register_operand")
2944         (if_then_else:VDQF
2945           (match_operator 3 "comparison_operator"
2946             [(match_operand:<V_cmp_mixed> 4 "register_operand")
2947              (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2948           (match_operand:VDQF 1 "nonmemory_operand")
2949           (match_operand:VDQF 2 "nonmemory_operand")))]
2950   "TARGET_SIMD"
2951 {
2952   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2953   enum rtx_code code = GET_CODE (operands[3]);
2954
2955   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2956      it as well as switch operands 1/2 in order to avoid the additional
2957      NOT instruction.  */
2958   if (code == NE)
2959     {
2960       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2961                                     operands[4], operands[5]);
2962       std::swap (operands[1], operands[2]);
2963     }
2964   emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
2965                                                   mask, operands[3],
2966                                                   operands[4], operands[5]));
2967   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2968                                                  operands[2], mask));
2969   DONE;
2970 })
2971
2972 ;; Patterns for AArch64 SIMD Intrinsics.
2973
2974 ;; Lane extraction with sign extension to general purpose register.
2975 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2976   [(set (match_operand:GPI 0 "register_operand" "=r")
2977         (sign_extend:GPI
2978           (vec_select:<VEL>
2979             (match_operand:VDQQH 1 "register_operand" "w")
2980             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2981   "TARGET_SIMD"
2982   {
2983     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2984     return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
2985   }
2986   [(set_attr "type" "neon_to_gp<q>")]
2987 )
2988
2989 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
2990   [(set (match_operand:SI 0 "register_operand" "=r")
2991         (zero_extend:SI
2992           (vec_select:<VEL>
2993             (match_operand:VDQQH 1 "register_operand" "w")
2994             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2995   "TARGET_SIMD"
2996   {
2997     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2998     return "umov\\t%w0, %1.<Vetype>[%2]";
2999   }
3000   [(set_attr "type" "neon_to_gp<q>")]
3001 )
3002
3003 ;; Lane extraction of a value, neither sign nor zero extension
3004 ;; is guaranteed so upper bits should be considered undefined.
3005 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3006 (define_insn "aarch64_get_lane<mode>"
3007   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
3008         (vec_select:<VEL>
3009           (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3010           (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3011   "TARGET_SIMD"
3012   {
3013     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3014     switch (which_alternative)
3015       {
3016         case 0:
3017           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3018         case 1:
3019           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3020         case 2:
3021           return "st1\\t{%1.<Vetype>}[%2], %0";
3022         default:
3023           gcc_unreachable ();
3024       }
3025   }
3026   [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3027 )
3028
3029 (define_insn "load_pair_lanes<mode>"
3030   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3031         (vec_concat:<VDBL>
3032            (match_operand:VDC 1 "memory_operand" "Utq")
3033            (match_operand:VDC 2 "memory_operand" "m")))]
3034   "TARGET_SIMD && !STRICT_ALIGNMENT
3035    && rtx_equal_p (XEXP (operands[2], 0),
3036                    plus_constant (Pmode,
3037                                   XEXP (operands[1], 0),
3038                                   GET_MODE_SIZE (<MODE>mode)))"
3039   "ldr\\t%q0, %1"
3040   [(set_attr "type" "neon_load1_1reg_q")]
3041 )
3042
3043 (define_insn "store_pair_lanes<mode>"
3044   [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Uml, Uml")
3045         (vec_concat:<VDBL>
3046            (match_operand:VDC 1 "register_operand" "w, r")
3047            (match_operand:VDC 2 "register_operand" "w, r")))]
3048   "TARGET_SIMD"
3049   "@
3050    stp\\t%d1, %d2, %y0
3051    stp\\t%x1, %x2, %y0"
3052   [(set_attr "type" "neon_stp, store_16")]
3053 )
3054
3055 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3056 ;; dest vector.
3057
3058 (define_insn "*aarch64_combinez<mode>"
3059   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3060         (vec_concat:<VDBL>
3061           (match_operand:VDC 1 "general_operand" "w,?r,m")
3062           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3063   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3064   "@
3065    mov\\t%0.8b, %1.8b
3066    fmov\t%d0, %1
3067    ldr\\t%d0, %1"
3068   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3069    (set_attr "simd" "yes,*,yes")
3070    (set_attr "fp" "*,yes,*")]
3071 )
3072
3073 (define_insn "*aarch64_combinez_be<mode>"
3074   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3075         (vec_concat:<VDBL>
3076           (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3077           (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3078   "TARGET_SIMD && BYTES_BIG_ENDIAN"
3079   "@
3080    mov\\t%0.8b, %1.8b
3081    fmov\t%d0, %1
3082    ldr\\t%d0, %1"
3083   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3084    (set_attr "simd" "yes,*,yes")
3085    (set_attr "fp" "*,yes,*")]
3086 )
3087
3088 (define_expand "aarch64_combine<mode>"
3089   [(match_operand:<VDBL> 0 "register_operand")
3090    (match_operand:VDC 1 "register_operand")
3091    (match_operand:VDC 2 "register_operand")]
3092   "TARGET_SIMD"
3093 {
3094   aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3095
3096   DONE;
3097 }
3098 )
3099
3100 (define_expand "aarch64_simd_combine<mode>"
3101   [(match_operand:<VDBL> 0 "register_operand")
3102    (match_operand:VDC 1 "register_operand")
3103    (match_operand:VDC 2 "register_operand")]
3104   "TARGET_SIMD"
3105   {
3106     emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3107     emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3108     DONE;
3109   }
3110 [(set_attr "type" "multiple")]
3111 )
3112
3113 ;; <su><addsub>l<q>.
3114
3115 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3116  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3117        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3118                            (match_operand:VQW 1 "register_operand" "w")
3119                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3120                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3121                            (match_operand:VQW 2 "register_operand" "w")
3122                            (match_dup 3)))))]
3123   "TARGET_SIMD"
3124   "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3125   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3126 )
3127
3128 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3129  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3130        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3131                            (match_operand:VQW 1 "register_operand" "w")
3132                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3133                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3134                            (match_operand:VQW 2 "register_operand" "w")
3135                            (match_dup 3)))))]
3136   "TARGET_SIMD"
3137   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3138   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3139 )
3140
3141
3142 (define_expand "aarch64_saddl2<mode>"
3143   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3144    (match_operand:VQW 1 "register_operand" "w")
3145    (match_operand:VQW 2 "register_operand" "w")]
3146   "TARGET_SIMD"
3147 {
3148   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3149   emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3150                                                   operands[2], p));
3151   DONE;
3152 })
3153
3154 (define_expand "aarch64_uaddl2<mode>"
3155   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3156    (match_operand:VQW 1 "register_operand" "w")
3157    (match_operand:VQW 2 "register_operand" "w")]
3158   "TARGET_SIMD"
3159 {
3160   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3161   emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3162                                                   operands[2], p));
3163   DONE;
3164 })
3165
3166 (define_expand "aarch64_ssubl2<mode>"
3167   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3168    (match_operand:VQW 1 "register_operand" "w")
3169    (match_operand:VQW 2 "register_operand" "w")]
3170   "TARGET_SIMD"
3171 {
3172   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3173   emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3174                                                 operands[2], p));
3175   DONE;
3176 })
3177
3178 (define_expand "aarch64_usubl2<mode>"
3179   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3180    (match_operand:VQW 1 "register_operand" "w")
3181    (match_operand:VQW 2 "register_operand" "w")]
3182   "TARGET_SIMD"
3183 {
3184   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3185   emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3186                                                 operands[2], p));
3187   DONE;
3188 })
3189
3190 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3191  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3192        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3193                            (match_operand:VD_BHSI 1 "register_operand" "w"))
3194                        (ANY_EXTEND:<VWIDE>
3195                            (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3196   "TARGET_SIMD"
3197   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3198   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3199 )
3200
3201 ;; <su><addsub>w<q>.
3202
3203 (define_expand "widen_ssum<mode>3"
3204   [(set (match_operand:<VDBLW> 0 "register_operand" "")
3205         (plus:<VDBLW> (sign_extend:<VDBLW>
3206                         (match_operand:VQW 1 "register_operand" ""))
3207                       (match_operand:<VDBLW> 2 "register_operand" "")))]
3208   "TARGET_SIMD"
3209   {
3210     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3211     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3212
3213     emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3214                                                 operands[1], p));
3215     emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3216     DONE;
3217   }
3218 )
3219
3220 (define_expand "widen_ssum<mode>3"
3221   [(set (match_operand:<VWIDE> 0 "register_operand" "")
3222         (plus:<VWIDE> (sign_extend:<VWIDE>
3223                         (match_operand:VD_BHSI 1 "register_operand" ""))
3224                       (match_operand:<VWIDE> 2 "register_operand" "")))]
3225   "TARGET_SIMD"
3226 {
3227   emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3228   DONE;
3229 })
3230
3231 (define_expand "widen_usum<mode>3"
3232   [(set (match_operand:<VDBLW> 0 "register_operand" "")
3233         (plus:<VDBLW> (zero_extend:<VDBLW>
3234                         (match_operand:VQW 1 "register_operand" ""))
3235                       (match_operand:<VDBLW> 2 "register_operand" "")))]
3236   "TARGET_SIMD"
3237   {
3238     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3239     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3240
3241     emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3242                                                  operands[1], p));
3243     emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3244     DONE;
3245   }
3246 )
3247
3248 (define_expand "widen_usum<mode>3"
3249   [(set (match_operand:<VWIDE> 0 "register_operand" "")
3250         (plus:<VWIDE> (zero_extend:<VWIDE>
3251                         (match_operand:VD_BHSI 1 "register_operand" ""))
3252                       (match_operand:<VWIDE> 2 "register_operand" "")))]
3253   "TARGET_SIMD"
3254 {
3255   emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3256   DONE;
3257 })
3258
3259 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
3260   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3261         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3262                         (ANY_EXTEND:<VWIDE>
3263                           (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3264   "TARGET_SIMD"
3265   "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3266   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3267 )
3268
3269 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
3270   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3271         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3272                         (ANY_EXTEND:<VWIDE>
3273                           (vec_select:<VHALF>
3274                            (match_operand:VQW 2 "register_operand" "w")
3275                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3276   "TARGET_SIMD"
3277   "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3278   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3279 )
3280
3281 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
3282   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3283         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3284                         (ANY_EXTEND:<VWIDE>
3285                           (vec_select:<VHALF>
3286                            (match_operand:VQW 2 "register_operand" "w")
3287                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3288   "TARGET_SIMD"
3289   "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3290   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3291 )
3292
3293 (define_expand "aarch64_saddw2<mode>"
3294   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3295    (match_operand:<VWIDE> 1 "register_operand" "w")
3296    (match_operand:VQW 2 "register_operand" "w")]
3297   "TARGET_SIMD"
3298 {
3299   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3300   emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3301                                                 operands[2], p));
3302   DONE;
3303 })
3304
3305 (define_expand "aarch64_uaddw2<mode>"
3306   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3307    (match_operand:<VWIDE> 1 "register_operand" "w")
3308    (match_operand:VQW 2 "register_operand" "w")]
3309   "TARGET_SIMD"
3310 {
3311   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3312   emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3313                                                 operands[2], p));
3314   DONE;
3315 })
3316
3317
3318 (define_expand "aarch64_ssubw2<mode>"
3319   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3320    (match_operand:<VWIDE> 1 "register_operand" "w")
3321    (match_operand:VQW 2 "register_operand" "w")]
3322   "TARGET_SIMD"
3323 {
3324   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3325   emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3326                                                 operands[2], p));
3327   DONE;
3328 })
3329
3330 (define_expand "aarch64_usubw2<mode>"
3331   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3332    (match_operand:<VWIDE> 1 "register_operand" "w")
3333    (match_operand:VQW 2 "register_operand" "w")]
3334   "TARGET_SIMD"
3335 {
3336   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3337   emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3338                                                 operands[2], p));
3339   DONE;
3340 })
3341
3342 ;; <su><r>h<addsub>.
3343
3344 (define_insn "aarch64_<sur>h<addsub><mode>"
3345   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3346         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3347                       (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3348                      HADDSUB))]
3349   "TARGET_SIMD"
3350   "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3351   [(set_attr "type" "neon_<addsub>_halve<q>")]
3352 )
3353
3354 ;; <r><addsub>hn<q>.
3355
3356 (define_insn "aarch64_<sur><addsub>hn<mode>"
3357   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3358         (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3359                             (match_operand:VQN 2 "register_operand" "w")]
3360                            ADDSUBHN))]
3361   "TARGET_SIMD"
3362   "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3363   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3364 )
3365
3366 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3367   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3368         (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3369                              (match_operand:VQN 2 "register_operand" "w")
3370                              (match_operand:VQN 3 "register_operand" "w")]
3371                             ADDSUBHN2))]
3372   "TARGET_SIMD"
3373   "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3374   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3375 )
3376
3377 ;; pmul.
3378
3379 (define_insn "aarch64_pmul<mode>"
3380   [(set (match_operand:VB 0 "register_operand" "=w")
3381         (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3382                     (match_operand:VB 2 "register_operand" "w")]
3383                    UNSPEC_PMUL))]
3384  "TARGET_SIMD"
3385  "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3386   [(set_attr "type" "neon_mul_<Vetype><q>")]
3387 )
3388
3389 ;; fmulx.
3390
3391 (define_insn "aarch64_fmulx<mode>"
3392   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3393         (unspec:VHSDF_HSDF
3394           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3395            (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3396            UNSPEC_FMULX))]
3397  "TARGET_SIMD"
3398  "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3399  [(set_attr "type" "neon_fp_mul_<stype>")]
3400 )
3401
3402 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3403
3404 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3405   [(set (match_operand:VDQSF 0 "register_operand" "=w")
3406         (unspec:VDQSF
3407          [(match_operand:VDQSF 1 "register_operand" "w")
3408           (vec_duplicate:VDQSF
3409            (vec_select:<VEL>
3410             (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3411             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3412          UNSPEC_FMULX))]
3413   "TARGET_SIMD"
3414   {
3415     operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3416     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3417   }
3418   [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3419 )
3420
3421 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3422
3423 (define_insn "*aarch64_mulx_elt<mode>"
3424   [(set (match_operand:VDQF 0 "register_operand" "=w")
3425         (unspec:VDQF
3426          [(match_operand:VDQF 1 "register_operand" "w")
3427           (vec_duplicate:VDQF
3428            (vec_select:<VEL>
3429             (match_operand:VDQF 2 "register_operand" "w")
3430             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3431          UNSPEC_FMULX))]
3432   "TARGET_SIMD"
3433   {
3434     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3435     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3436   }
3437   [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3438 )
3439
3440 ;; vmulxq_lane
3441
3442 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3443   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3444         (unspec:VHSDF
3445          [(match_operand:VHSDF 1 "register_operand" "w")
3446           (vec_duplicate:VHSDF
3447             (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3448          UNSPEC_FMULX))]
3449   "TARGET_SIMD"
3450   "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3451   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3452 )
3453
3454 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3455 ;; vmulxd_lane_f64 ==  vmulx_lane_f64
3456 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3457
3458 (define_insn "*aarch64_vgetfmulx<mode>"
3459   [(set (match_operand:<VEL> 0 "register_operand" "=w")
3460         (unspec:<VEL>
3461          [(match_operand:<VEL> 1 "register_operand" "w")
3462           (vec_select:<VEL>
3463            (match_operand:VDQF 2 "register_operand" "w")
3464             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3465          UNSPEC_FMULX))]
3466   "TARGET_SIMD"
3467   {
3468     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3469     return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3470   }
3471   [(set_attr "type" "fmul<Vetype>")]
3472 )
3473 ;; <su>q<addsub>
3474
3475 (define_insn "aarch64_<su_optab><optab><mode>"
3476   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3477         (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3478                           (match_operand:VSDQ_I 2 "register_operand" "w")))]
3479   "TARGET_SIMD"
3480   "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3481   [(set_attr "type" "neon_<optab><q>")]
3482 )
3483
3484 ;; suqadd and usqadd
3485
3486 (define_insn "aarch64_<sur>qadd<mode>"
3487   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3488         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3489                         (match_operand:VSDQ_I 2 "register_operand" "w")]
3490                        USSUQADD))]
3491   "TARGET_SIMD"
3492   "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3493   [(set_attr "type" "neon_qadd<q>")]
3494 )
3495
3496 ;; sqmovun
3497
3498 (define_insn "aarch64_sqmovun<mode>"
3499   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3500         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3501                             UNSPEC_SQXTUN))]
3502    "TARGET_SIMD"
3503    "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3504    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3505 )
3506
3507 ;; sqmovn and uqmovn
3508
3509 (define_insn "aarch64_<sur>qmovn<mode>"
3510   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3511         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3512                             SUQMOVN))]
3513   "TARGET_SIMD"
3514   "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3515    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3516 )
3517
3518 ;; <su>q<absneg>
3519
3520 (define_insn "aarch64_s<optab><mode>"
3521   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3522         (UNQOPS:VSDQ_I
3523           (match_operand:VSDQ_I 1 "register_operand" "w")))]
3524   "TARGET_SIMD"
3525   "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3526   [(set_attr "type" "neon_<optab><q>")]
3527 )
3528
3529 ;; sq<r>dmulh.
3530
3531 (define_insn "aarch64_sq<r>dmulh<mode>"
3532   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3533         (unspec:VSDQ_HSI
3534           [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3535            (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3536          VQDMULH))]
3537   "TARGET_SIMD"
3538   "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3539   [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3540 )
3541
3542 ;; sq<r>dmulh_lane
3543
3544 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3545   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3546         (unspec:VDQHS
3547           [(match_operand:VDQHS 1 "register_operand" "w")
3548            (vec_select:<VEL>
3549              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3550              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3551          VQDMULH))]
3552   "TARGET_SIMD"
3553   "*
3554    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3555    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3556   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3557 )
3558
3559 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3560   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3561         (unspec:VDQHS
3562           [(match_operand:VDQHS 1 "register_operand" "w")
3563            (vec_select:<VEL>
3564              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3565              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3566          VQDMULH))]
3567   "TARGET_SIMD"
3568   "*
3569    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3570    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3571   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3572 )
3573
3574 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3575   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3576         (unspec:SD_HSI
3577           [(match_operand:SD_HSI 1 "register_operand" "w")
3578            (vec_select:<VEL>
3579              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3580              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3581          VQDMULH))]
3582   "TARGET_SIMD"
3583   "*
3584    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3585    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3586   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3587 )
3588
3589 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3590   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3591         (unspec:SD_HSI
3592           [(match_operand:SD_HSI 1 "register_operand" "w")
3593            (vec_select:<VEL>
3594              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3595              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3596          VQDMULH))]
3597   "TARGET_SIMD"
3598   "*
3599    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3600    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3601   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3602 )
3603
3604 ;; sqrdml[as]h.
3605
3606 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3607   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3608         (unspec:VSDQ_HSI
3609           [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3610            (match_operand:VSDQ_HSI 2 "register_operand" "w")
3611            (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3612           SQRDMLH_AS))]
3613    "TARGET_SIMD_RDMA"
3614    "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3615    [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3616 )
3617
3618 ;; sqrdml[as]h_lane.
3619
3620 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3621   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3622         (unspec:VDQHS
3623           [(match_operand:VDQHS 1 "register_operand" "0")
3624            (match_operand:VDQHS 2 "register_operand" "w")
3625            (vec_select:<VEL>
3626              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3627              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3628           SQRDMLH_AS))]
3629    "TARGET_SIMD_RDMA"
3630    {
3631      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3632      return
3633       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3634    }
3635    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3636 )
3637
3638 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3639   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3640         (unspec:SD_HSI
3641           [(match_operand:SD_HSI 1 "register_operand" "0")
3642            (match_operand:SD_HSI 2 "register_operand" "w")
3643            (vec_select:<VEL>
3644              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3645              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3646           SQRDMLH_AS))]
3647    "TARGET_SIMD_RDMA"
3648    {
3649      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3650      return
3651       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3652    }
3653    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3654 )
3655
3656 ;; sqrdml[as]h_laneq.
3657
3658 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3659   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3660         (unspec:VDQHS
3661           [(match_operand:VDQHS 1 "register_operand" "0")
3662            (match_operand:VDQHS 2 "register_operand" "w")
3663            (vec_select:<VEL>
3664              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3665              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3666           SQRDMLH_AS))]
3667    "TARGET_SIMD_RDMA"
3668    {
3669      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3670      return
3671       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3672    }
3673    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3674 )
3675
3676 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3677   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3678         (unspec:SD_HSI
3679           [(match_operand:SD_HSI 1 "register_operand" "0")
3680            (match_operand:SD_HSI 2 "register_operand" "w")
3681            (vec_select:<VEL>
3682              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3683              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3684           SQRDMLH_AS))]
3685    "TARGET_SIMD_RDMA"
3686    {
3687      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3688      return
3689       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3690    }
3691    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3692 )
3693
3694 ;; vqdml[sa]l
3695
3696 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3697   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3698         (SBINQOPS:<VWIDE>
3699           (match_operand:<VWIDE> 1 "register_operand" "0")
3700           (ss_ashift:<VWIDE>
3701               (mult:<VWIDE>
3702                 (sign_extend:<VWIDE>
3703                       (match_operand:VSD_HSI 2 "register_operand" "w"))
3704                 (sign_extend:<VWIDE>
3705                       (match_operand:VSD_HSI 3 "register_operand" "w")))
3706               (const_int 1))))]
3707   "TARGET_SIMD"
3708   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3709   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3710 )
3711
3712 ;; vqdml[sa]l_lane
3713
3714 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3715   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3716         (SBINQOPS:<VWIDE>
3717           (match_operand:<VWIDE> 1 "register_operand" "0")
3718           (ss_ashift:<VWIDE>
3719             (mult:<VWIDE>
3720               (sign_extend:<VWIDE>
3721                 (match_operand:VD_HSI 2 "register_operand" "w"))
3722               (sign_extend:<VWIDE>
3723                 (vec_duplicate:VD_HSI
3724                   (vec_select:<VEL>
3725                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3726                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3727               ))
3728             (const_int 1))))]
3729   "TARGET_SIMD"
3730   {
3731     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3732     return
3733       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3734   }
3735   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3736 )
3737
3738 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3739   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3740         (SBINQOPS:<VWIDE>
3741           (match_operand:<VWIDE> 1 "register_operand" "0")
3742           (ss_ashift:<VWIDE>
3743             (mult:<VWIDE>
3744               (sign_extend:<VWIDE>
3745                 (match_operand:VD_HSI 2 "register_operand" "w"))
3746               (sign_extend:<VWIDE>
3747                 (vec_duplicate:VD_HSI
3748                   (vec_select:<VEL>
3749                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3750                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3751               ))
3752             (const_int 1))))]
3753   "TARGET_SIMD"
3754   {
3755     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3756     return
3757       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3758   }
3759   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3760 )
3761
3762 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3763   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3764         (SBINQOPS:<VWIDE>
3765           (match_operand:<VWIDE> 1 "register_operand" "0")
3766           (ss_ashift:<VWIDE>
3767             (mult:<VWIDE>
3768               (sign_extend:<VWIDE>
3769                 (match_operand:SD_HSI 2 "register_operand" "w"))
3770               (sign_extend:<VWIDE>
3771                 (vec_select:<VEL>
3772                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3773                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3774               )
3775             (const_int 1))))]
3776   "TARGET_SIMD"
3777   {
3778     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3779     return
3780       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3781   }
3782   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3783 )
3784
3785 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3786   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3787         (SBINQOPS:<VWIDE>
3788           (match_operand:<VWIDE> 1 "register_operand" "0")
3789           (ss_ashift:<VWIDE>
3790             (mult:<VWIDE>
3791               (sign_extend:<VWIDE>
3792                 (match_operand:SD_HSI 2 "register_operand" "w"))
3793               (sign_extend:<VWIDE>
3794                 (vec_select:<VEL>
3795                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3796                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3797               )
3798             (const_int 1))))]
3799   "TARGET_SIMD"
3800   {
3801     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3802     return
3803       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3804   }
3805   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3806 )
3807
3808 ;; vqdml[sa]l_n
3809
3810 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3811   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3812         (SBINQOPS:<VWIDE>
3813           (match_operand:<VWIDE> 1 "register_operand" "0")
3814           (ss_ashift:<VWIDE>
3815               (mult:<VWIDE>
3816                 (sign_extend:<VWIDE>
3817                       (match_operand:VD_HSI 2 "register_operand" "w"))
3818                 (sign_extend:<VWIDE>
3819                   (vec_duplicate:VD_HSI
3820                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3821               (const_int 1))))]
3822   "TARGET_SIMD"
3823   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3824   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3825 )
3826
3827 ;; sqdml[as]l2
3828
3829 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3830   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3831         (SBINQOPS:<VWIDE>
3832          (match_operand:<VWIDE> 1 "register_operand" "0")
3833          (ss_ashift:<VWIDE>
3834              (mult:<VWIDE>
3835                (sign_extend:<VWIDE>
3836                  (vec_select:<VHALF>
3837                      (match_operand:VQ_HSI 2 "register_operand" "w")
3838                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3839                (sign_extend:<VWIDE>
3840                  (vec_select:<VHALF>
3841                      (match_operand:VQ_HSI 3 "register_operand" "w")
3842                      (match_dup 4))))
3843              (const_int 1))))]
3844   "TARGET_SIMD"
3845   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3846   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3847 )
3848
3849 (define_expand "aarch64_sqdmlal2<mode>"
3850   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3851    (match_operand:<VWIDE> 1 "register_operand" "w")
3852    (match_operand:VQ_HSI 2 "register_operand" "w")
3853    (match_operand:VQ_HSI 3 "register_operand" "w")]
3854   "TARGET_SIMD"
3855 {
3856   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3857   emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3858                                                   operands[2], operands[3], p));
3859   DONE;
3860 })
3861
3862 (define_expand "aarch64_sqdmlsl2<mode>"
3863   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3864    (match_operand:<VWIDE> 1 "register_operand" "w")
3865    (match_operand:VQ_HSI 2 "register_operand" "w")
3866    (match_operand:VQ_HSI 3 "register_operand" "w")]
3867   "TARGET_SIMD"
3868 {
3869   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3870   emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3871                                                   operands[2], operands[3], p));
3872   DONE;
3873 })
3874
3875 ;; vqdml[sa]l2_lane
3876
3877 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3878   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3879         (SBINQOPS:<VWIDE>
3880           (match_operand:<VWIDE> 1 "register_operand" "0")
3881           (ss_ashift:<VWIDE>
3882               (mult:<VWIDE>
3883                 (sign_extend:<VWIDE>
3884                   (vec_select:<VHALF>
3885                     (match_operand:VQ_HSI 2 "register_operand" "w")
3886                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3887                 (sign_extend:<VWIDE>
3888                   (vec_duplicate:<VHALF>
3889                     (vec_select:<VEL>
3890                       (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3891                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3892                     ))))
3893               (const_int 1))))]
3894   "TARGET_SIMD"
3895   {
3896     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3897     return
3898      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3899   }
3900   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3901 )
3902
3903 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3904   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3905         (SBINQOPS:<VWIDE>
3906           (match_operand:<VWIDE> 1 "register_operand" "0")
3907           (ss_ashift:<VWIDE>
3908               (mult:<VWIDE>
3909                 (sign_extend:<VWIDE>
3910                   (vec_select:<VHALF>
3911                     (match_operand:VQ_HSI 2 "register_operand" "w")
3912                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3913                 (sign_extend:<VWIDE>
3914                   (vec_duplicate:<VHALF>
3915                     (vec_select:<VEL>
3916                       (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3917                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3918                     ))))
3919               (const_int 1))))]
3920   "TARGET_SIMD"
3921   {
3922     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3923     return
3924      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3925   }
3926   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3927 )
3928
3929 (define_expand "aarch64_sqdmlal2_lane<mode>"
3930   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3931    (match_operand:<VWIDE> 1 "register_operand" "w")
3932    (match_operand:VQ_HSI 2 "register_operand" "w")
3933    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3934    (match_operand:SI 4 "immediate_operand" "i")]
3935   "TARGET_SIMD"
3936 {
3937   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3938   emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3939                                                        operands[2], operands[3],
3940                                                        operands[4], p));
3941   DONE;
3942 })
3943
3944 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3945   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3946    (match_operand:<VWIDE> 1 "register_operand" "w")
3947    (match_operand:VQ_HSI 2 "register_operand" "w")
3948    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3949    (match_operand:SI 4 "immediate_operand" "i")]
3950   "TARGET_SIMD"
3951 {
3952   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3953   emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3954                                                        operands[2], operands[3],
3955                                                        operands[4], p));
3956   DONE;
3957 })
3958
3959 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3960   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3961    (match_operand:<VWIDE> 1 "register_operand" "w")
3962    (match_operand:VQ_HSI 2 "register_operand" "w")
3963    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3964    (match_operand:SI 4 "immediate_operand" "i")]
3965   "TARGET_SIMD"
3966 {
3967   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3968   emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3969                                                        operands[2], operands[3],
3970                                                        operands[4], p));
3971   DONE;
3972 })
3973
3974 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3975   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3976    (match_operand:<VWIDE> 1 "register_operand" "w")
3977    (match_operand:VQ_HSI 2 "register_operand" "w")
3978    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3979    (match_operand:SI 4 "immediate_operand" "i")]
3980   "TARGET_SIMD"
3981 {
3982   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3983   emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
3984                                                        operands[2], operands[3],
3985                                                        operands[4], p));
3986   DONE;
3987 })
3988
3989 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
3990   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3991         (SBINQOPS:<VWIDE>
3992           (match_operand:<VWIDE> 1 "register_operand" "0")
3993           (ss_ashift:<VWIDE>
3994             (mult:<VWIDE>
3995               (sign_extend:<VWIDE>
3996                 (vec_select:<VHALF>
3997                   (match_operand:VQ_HSI 2 "register_operand" "w")
3998                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3999               (sign_extend:<VWIDE>
4000                 (vec_duplicate:<VHALF>
4001                   (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4002             (const_int 1))))]
4003   "TARGET_SIMD"
4004   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4005   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4006 )
4007
4008 (define_expand "aarch64_sqdmlal2_n<mode>"
4009   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4010    (match_operand:<VWIDE> 1 "register_operand" "w")
4011    (match_operand:VQ_HSI 2 "register_operand" "w")
4012    (match_operand:<VEL> 3 "register_operand" "w")]
4013   "TARGET_SIMD"
4014 {
4015   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4016   emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4017                                                     operands[2], operands[3],
4018                                                     p));
4019   DONE;
4020 })
4021
4022 (define_expand "aarch64_sqdmlsl2_n<mode>"
4023   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4024    (match_operand:<VWIDE> 1 "register_operand" "w")
4025    (match_operand:VQ_HSI 2 "register_operand" "w")
4026    (match_operand:<VEL> 3 "register_operand" "w")]
4027   "TARGET_SIMD"
4028 {
4029   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4030   emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4031                                                     operands[2], operands[3],
4032                                                     p));
4033   DONE;
4034 })
4035
4036 ;; vqdmull
4037
4038 (define_insn "aarch64_sqdmull<mode>"
4039   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4040         (ss_ashift:<VWIDE>
4041              (mult:<VWIDE>
4042                (sign_extend:<VWIDE>
4043                      (match_operand:VSD_HSI 1 "register_operand" "w"))
4044                (sign_extend:<VWIDE>
4045                      (match_operand:VSD_HSI 2 "register_operand" "w")))
4046              (const_int 1)))]
4047   "TARGET_SIMD"
4048   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4049   [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4050 )
4051
4052 ;; vqdmull_lane
4053
4054 (define_insn "aarch64_sqdmull_lane<mode>"
4055   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4056         (ss_ashift:<VWIDE>
4057              (mult:<VWIDE>
4058                (sign_extend:<VWIDE>
4059                  (match_operand:VD_HSI 1 "register_operand" "w"))
4060                (sign_extend:<VWIDE>
4061                  (vec_duplicate:VD_HSI
4062                    (vec_select:<VEL>
4063                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4064                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4065                ))
4066              (const_int 1)))]
4067   "TARGET_SIMD"
4068   {
4069     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4070     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4071   }
4072   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4073 )
4074
4075 (define_insn "aarch64_sqdmull_laneq<mode>"
4076   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4077         (ss_ashift:<VWIDE>
4078              (mult:<VWIDE>
4079                (sign_extend:<VWIDE>
4080                  (match_operand:VD_HSI 1 "register_operand" "w"))
4081                (sign_extend:<VWIDE>
4082                  (vec_duplicate:VD_HSI
4083                    (vec_select:<VEL>
4084                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4085                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4086                ))
4087              (const_int 1)))]
4088   "TARGET_SIMD"
4089   {
4090     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4091     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4092   }
4093   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4094 )
4095
4096 (define_insn "aarch64_sqdmull_lane<mode>"
4097   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4098         (ss_ashift:<VWIDE>
4099              (mult:<VWIDE>
4100                (sign_extend:<VWIDE>
4101                  (match_operand:SD_HSI 1 "register_operand" "w"))
4102                (sign_extend:<VWIDE>
4103                  (vec_select:<VEL>
4104                    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4105                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4106                ))
4107              (const_int 1)))]
4108   "TARGET_SIMD"
4109   {
4110     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4111     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4112   }
4113   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4114 )
4115
4116 (define_insn "aarch64_sqdmull_laneq<mode>"
4117   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4118         (ss_ashift:<VWIDE>
4119              (mult:<VWIDE>
4120                (sign_extend:<VWIDE>
4121                  (match_operand:SD_HSI 1 "register_operand" "w"))
4122                (sign_extend:<VWIDE>
4123                  (vec_select:<VEL>
4124                    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4125                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4126                ))
4127              (const_int 1)))]
4128   "TARGET_SIMD"
4129   {
4130     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4131     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4132   }
4133   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4134 )
4135
4136 ;; vqdmull_n
4137
4138 (define_insn "aarch64_sqdmull_n<mode>"
4139   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4140         (ss_ashift:<VWIDE>
4141              (mult:<VWIDE>
4142                (sign_extend:<VWIDE>
4143                  (match_operand:VD_HSI 1 "register_operand" "w"))
4144                (sign_extend:<VWIDE>
4145                  (vec_duplicate:VD_HSI
4146                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4147                )
4148              (const_int 1)))]
4149   "TARGET_SIMD"
4150   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4151   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4152 )
4153
4154 ;; vqdmull2
4155
4156
4157
4158 (define_insn "aarch64_sqdmull2<mode>_internal"
4159   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4160         (ss_ashift:<VWIDE>
4161              (mult:<VWIDE>
4162                (sign_extend:<VWIDE>
4163                  (vec_select:<VHALF>
4164                    (match_operand:VQ_HSI 1 "register_operand" "w")
4165                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4166                (sign_extend:<VWIDE>
4167                  (vec_select:<VHALF>
4168                    (match_operand:VQ_HSI 2 "register_operand" "w")
4169                    (match_dup 3)))
4170                )
4171              (const_int 1)))]
4172   "TARGET_SIMD"
4173   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4174   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4175 )
4176
4177 (define_expand "aarch64_sqdmull2<mode>"
4178   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4179    (match_operand:VQ_HSI 1 "register_operand" "w")
4180    (match_operand:VQ_HSI 2 "register_operand" "w")]
4181   "TARGET_SIMD"
4182 {
4183   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4184   emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4185                                                   operands[2], p));
4186   DONE;
4187 })
4188
4189 ;; vqdmull2_lane
4190
4191 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4192   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4193         (ss_ashift:<VWIDE>
4194              (mult:<VWIDE>
4195                (sign_extend:<VWIDE>
4196                  (vec_select:<VHALF>
4197                    (match_operand:VQ_HSI 1 "register_operand" "w")
4198                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4199                (sign_extend:<VWIDE>
4200                  (vec_duplicate:<VHALF>
4201                    (vec_select:<VEL>
4202                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4203                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4204                ))
4205              (const_int 1)))]
4206   "TARGET_SIMD"
4207   {
4208     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4209     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4210   }
4211   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4212 )
4213
4214 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4215   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4216         (ss_ashift:<VWIDE>
4217              (mult:<VWIDE>
4218                (sign_extend:<VWIDE>
4219                  (vec_select:<VHALF>
4220                    (match_operand:VQ_HSI 1 "register_operand" "w")
4221                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4222                (sign_extend:<VWIDE>
4223                  (vec_duplicate:<VHALF>
4224                    (vec_select:<VEL>
4225                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4226                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4227                ))
4228              (const_int 1)))]
4229   "TARGET_SIMD"
4230   {
4231     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4232     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4233   }
4234   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4235 )
4236
4237 (define_expand "aarch64_sqdmull2_lane<mode>"
4238   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4239    (match_operand:VQ_HSI 1 "register_operand" "w")
4240    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4241    (match_operand:SI 3 "immediate_operand" "i")]
4242   "TARGET_SIMD"
4243 {
4244   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4245   emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4246                                                        operands[2], operands[3],
4247                                                        p));
4248   DONE;
4249 })
4250
4251 (define_expand "aarch64_sqdmull2_laneq<mode>"
4252   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4253    (match_operand:VQ_HSI 1 "register_operand" "w")
4254    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4255    (match_operand:SI 3 "immediate_operand" "i")]
4256   "TARGET_SIMD"
4257 {
4258   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4259   emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4260                                                        operands[2], operands[3],
4261                                                        p));
4262   DONE;
4263 })
4264
4265 ;; vqdmull2_n
4266
4267 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4268   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4269         (ss_ashift:<VWIDE>
4270              (mult:<VWIDE>
4271                (sign_extend:<VWIDE>
4272                  (vec_select:<VHALF>
4273                    (match_operand:VQ_HSI 1 "register_operand" "w")
4274                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4275                (sign_extend:<VWIDE>
4276                  (vec_duplicate:<VHALF>
4277                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4278                )
4279              (const_int 1)))]
4280   "TARGET_SIMD"
4281   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4282   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4283 )
4284
4285 (define_expand "aarch64_sqdmull2_n<mode>"
4286   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4287    (match_operand:VQ_HSI 1 "register_operand" "w")
4288    (match_operand:<VEL> 2 "register_operand" "w")]
4289   "TARGET_SIMD"
4290 {
4291   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4292   emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4293                                                     operands[2], p));
4294   DONE;
4295 })
4296
4297 ;; vshl
4298
4299 (define_insn "aarch64_<sur>shl<mode>"
4300   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4301         (unspec:VSDQ_I_DI
4302           [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4303            (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4304          VSHL))]
4305   "TARGET_SIMD"
4306   "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4307   [(set_attr "type" "neon_shift_reg<q>")]
4308 )
4309
4310
4311 ;; vqshl
4312
4313 (define_insn "aarch64_<sur>q<r>shl<mode>"
4314   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4315         (unspec:VSDQ_I
4316           [(match_operand:VSDQ_I 1 "register_operand" "w")
4317            (match_operand:VSDQ_I 2 "register_operand" "w")]
4318          VQSHL))]
4319   "TARGET_SIMD"
4320   "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4321   [(set_attr "type" "neon_sat_shift_reg<q>")]
4322 )
4323
4324 ;; vshll_n
4325
4326 (define_insn "aarch64_<sur>shll_n<mode>"
4327   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4328         (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4329                          (match_operand:SI 2
4330                            "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4331                          VSHLL))]
4332   "TARGET_SIMD"
4333   {
4334     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4335       return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4336     else
4337       return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4338   }
4339   [(set_attr "type" "neon_shift_imm_long")]
4340 )
4341
4342 ;; vshll_high_n
4343
4344 (define_insn "aarch64_<sur>shll2_n<mode>"
4345   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4346         (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4347                          (match_operand:SI 2 "immediate_operand" "i")]
4348                          VSHLL))]
4349   "TARGET_SIMD"
4350   {
4351     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4352       return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4353     else
4354       return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4355   }
4356   [(set_attr "type" "neon_shift_imm_long")]
4357 )
4358
4359 ;; vrshr_n
4360
4361 (define_insn "aarch64_<sur>shr_n<mode>"
4362   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4363         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4364                            (match_operand:SI 2
4365                              "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4366                           VRSHR_N))]
4367   "TARGET_SIMD"
4368   "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4369   [(set_attr "type" "neon_sat_shift_imm<q>")]
4370 )
4371
4372 ;; v(r)sra_n
4373
4374 (define_insn "aarch64_<sur>sra_n<mode>"
4375   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4376         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4377                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4378                        (match_operand:SI 3
4379                          "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4380                       VSRA))]
4381   "TARGET_SIMD"
4382   "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4383   [(set_attr "type" "neon_shift_acc<q>")]
4384 )
4385
4386 ;; vs<lr>i_n
4387
4388 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4389   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4390         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4391                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4392                        (match_operand:SI 3
4393                          "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4394                       VSLRI))]
4395   "TARGET_SIMD"
4396   "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4397   [(set_attr "type" "neon_shift_imm<q>")]
4398 )
4399
4400 ;; vqshl(u)
4401
4402 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4403   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4404         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4405                        (match_operand:SI 2
4406                          "aarch64_simd_shift_imm_<ve_mode>" "i")]
4407                       VQSHL_N))]
4408   "TARGET_SIMD"
4409   "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4410   [(set_attr "type" "neon_sat_shift_imm<q>")]
4411 )
4412
4413
4414 ;; vq(r)shr(u)n_n
4415
4416 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4417   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4418         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4419                             (match_operand:SI 2
4420                               "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4421                            VQSHRN_N))]
4422   "TARGET_SIMD"
4423   "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4424   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4425 )
4426
4427
4428 ;; cm(eq|ge|gt|lt|le)
4429 ;; Note, we have constraints for Dz and Z as different expanders
4430 ;; have different ideas of what should be passed to this pattern.
4431
4432 (define_insn "aarch64_cm<optab><mode>"
4433   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4434         (neg:<V_INT_EQUIV>
4435           (COMPARISONS:<V_INT_EQUIV>
4436             (match_operand:VDQ_I 1 "register_operand" "w,w")
4437             (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4438           )))]
4439   "TARGET_SIMD"
4440   "@
4441   cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4442   cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4443   [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4444 )
4445
4446 (define_insn_and_split "aarch64_cm<optab>di"
4447   [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4448         (neg:DI
4449           (COMPARISONS:DI
4450             (match_operand:DI 1 "register_operand" "w,w,r")
4451             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4452           )))
4453      (clobber (reg:CC CC_REGNUM))]
4454   "TARGET_SIMD"
4455   "#"
4456   "reload_completed"
4457   [(set (match_operand:DI 0 "register_operand")
4458         (neg:DI
4459           (COMPARISONS:DI
4460             (match_operand:DI 1 "register_operand")
4461             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4462           )))]
4463   {
4464     /* If we are in the general purpose register file,
4465        we split to a sequence of comparison and store.  */
4466     if (GP_REGNUM_P (REGNO (operands[0]))
4467         && GP_REGNUM_P (REGNO (operands[1])))
4468       {
4469         machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4470         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4471         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4472         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4473         DONE;
4474       }
4475     /* Otherwise, we expand to a similar pattern which does not
4476        clobber CC_REGNUM.  */
4477   }
4478   [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4479 )
4480
4481 (define_insn "*aarch64_cm<optab>di"
4482   [(set (match_operand:DI 0 "register_operand" "=w,w")
4483         (neg:DI
4484           (COMPARISONS:DI
4485             (match_operand:DI 1 "register_operand" "w,w")
4486             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4487           )))]
4488   "TARGET_SIMD && reload_completed"
4489   "@
4490   cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4491   cm<optab>\t%d0, %d1, #0"
4492   [(set_attr "type" "neon_compare, neon_compare_zero")]
4493 )
4494
4495 ;; cm(hs|hi)
4496
4497 (define_insn "aarch64_cm<optab><mode>"
4498   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4499         (neg:<V_INT_EQUIV>
4500           (UCOMPARISONS:<V_INT_EQUIV>
4501             (match_operand:VDQ_I 1 "register_operand" "w")
4502             (match_operand:VDQ_I 2 "register_operand" "w")
4503           )))]
4504   "TARGET_SIMD"
4505   "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4506   [(set_attr "type" "neon_compare<q>")]
4507 )
4508
4509 (define_insn_and_split "aarch64_cm<optab>di"
4510   [(set (match_operand:DI 0 "register_operand" "=w,r")
4511         (neg:DI
4512           (UCOMPARISONS:DI
4513             (match_operand:DI 1 "register_operand" "w,r")
4514             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4515           )))
4516     (clobber (reg:CC CC_REGNUM))]
4517   "TARGET_SIMD"
4518   "#"
4519   "reload_completed"
4520   [(set (match_operand:DI 0 "register_operand")
4521         (neg:DI
4522           (UCOMPARISONS:DI
4523             (match_operand:DI 1 "register_operand")
4524             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4525           )))]
4526   {
4527     /* If we are in the general purpose register file,
4528        we split to a sequence of comparison and store.  */
4529     if (GP_REGNUM_P (REGNO (operands[0]))
4530         && GP_REGNUM_P (REGNO (operands[1])))
4531       {
4532         machine_mode mode = CCmode;
4533         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4534         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4535         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4536         DONE;
4537       }
4538     /* Otherwise, we expand to a similar pattern which does not
4539        clobber CC_REGNUM.  */
4540   }
4541   [(set_attr "type" "neon_compare,multiple")]
4542 )
4543
4544 (define_insn "*aarch64_cm<optab>di"
4545   [(set (match_operand:DI 0 "register_operand" "=w")
4546         (neg:DI
4547           (UCOMPARISONS:DI
4548             (match_operand:DI 1 "register_operand" "w")
4549             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4550           )))]
4551   "TARGET_SIMD && reload_completed"
4552   "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4553   [(set_attr "type" "neon_compare")]
4554 )
4555
4556 ;; cmtst
4557
4558 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4559 ;; we don't have any insns using ne, and aarch64_vcond outputs
4560 ;; not (neg (eq (and x y) 0))
4561 ;; which is rewritten by simplify_rtx as
4562 ;; plus (eq (and x y) 0) -1.
4563
4564 (define_insn "aarch64_cmtst<mode>"
4565   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4566         (plus:<V_INT_EQUIV>
4567           (eq:<V_INT_EQUIV>
4568             (and:VDQ_I
4569               (match_operand:VDQ_I 1 "register_operand" "w")
4570               (match_operand:VDQ_I 2 "register_operand" "w"))
4571             (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4572           (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4573   ]
4574   "TARGET_SIMD"
4575   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4576   [(set_attr "type" "neon_tst<q>")]
4577 )
4578
4579 (define_insn_and_split "aarch64_cmtstdi"
4580   [(set (match_operand:DI 0 "register_operand" "=w,r")
4581         (neg:DI
4582           (ne:DI
4583             (and:DI
4584               (match_operand:DI 1 "register_operand" "w,r")
4585               (match_operand:DI 2 "register_operand" "w,r"))
4586             (const_int 0))))
4587     (clobber (reg:CC CC_REGNUM))]
4588   "TARGET_SIMD"
4589   "#"
4590   "reload_completed"
4591   [(set (match_operand:DI 0 "register_operand")
4592         (neg:DI
4593           (ne:DI
4594             (and:DI
4595               (match_operand:DI 1 "register_operand")
4596               (match_operand:DI 2 "register_operand"))
4597             (const_int 0))))]
4598   {
4599     /* If we are in the general purpose register file,
4600        we split to a sequence of comparison and store.  */
4601     if (GP_REGNUM_P (REGNO (operands[0]))
4602         && GP_REGNUM_P (REGNO (operands[1])))
4603       {
4604         rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4605         machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4606         rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4607         rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4608         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4609         DONE;
4610       }
4611     /* Otherwise, we expand to a similar pattern which does not
4612        clobber CC_REGNUM.  */
4613   }
4614   [(set_attr "type" "neon_tst,multiple")]
4615 )
4616
4617 (define_insn "*aarch64_cmtstdi"
4618   [(set (match_operand:DI 0 "register_operand" "=w")
4619         (neg:DI
4620           (ne:DI
4621             (and:DI
4622               (match_operand:DI 1 "register_operand" "w")
4623               (match_operand:DI 2 "register_operand" "w"))
4624             (const_int 0))))]
4625   "TARGET_SIMD"
4626   "cmtst\t%d0, %d1, %d2"
4627   [(set_attr "type" "neon_tst")]
4628 )
4629
4630 ;; fcm(eq|ge|gt|le|lt)
4631
4632 (define_insn "aarch64_cm<optab><mode>"
4633   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4634         (neg:<V_INT_EQUIV>
4635           (COMPARISONS:<V_INT_EQUIV>
4636             (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4637             (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4638           )))]
4639   "TARGET_SIMD"
4640   "@
4641   fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4642   fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4643   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4644 )
4645
4646 ;; fac(ge|gt)
4647 ;; Note we can also handle what would be fac(le|lt) by
4648 ;; generating fac(ge|gt).
4649
4650 (define_insn "aarch64_fac<optab><mode>"
4651   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4652         (neg:<V_INT_EQUIV>
4653           (FAC_COMPARISONS:<V_INT_EQUIV>
4654             (abs:VHSDF_HSDF
4655               (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4656             (abs:VHSDF_HSDF
4657               (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4658   )))]
4659   "TARGET_SIMD"
4660   "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4661   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4662 )
4663
4664 ;; addp
4665
4666 (define_insn "aarch64_addp<mode>"
4667   [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4668         (unspec:VD_BHSI
4669           [(match_operand:VD_BHSI 1 "register_operand" "w")
4670            (match_operand:VD_BHSI 2 "register_operand" "w")]
4671           UNSPEC_ADDP))]
4672   "TARGET_SIMD"
4673   "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4674   [(set_attr "type" "neon_reduc_add<q>")]
4675 )
4676
4677 (define_insn "aarch64_addpdi"
4678   [(set (match_operand:DI 0 "register_operand" "=w")
4679         (unspec:DI
4680           [(match_operand:V2DI 1 "register_operand" "w")]
4681           UNSPEC_ADDP))]
4682   "TARGET_SIMD"
4683   "addp\t%d0, %1.2d"
4684   [(set_attr "type" "neon_reduc_add")]
4685 )
4686
4687 ;; sqrt
4688
4689 (define_expand "sqrt<mode>2"
4690   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4691         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4692   "TARGET_SIMD"
4693 {
4694   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4695     DONE;
4696 })
4697
4698 (define_insn "*sqrt<mode>2"
4699   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4700         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4701   "TARGET_SIMD"
4702   "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4703   [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4704 )
4705
4706 ;; Patterns for vector struct loads and stores.
4707
4708 (define_insn "aarch64_simd_ld2<mode>"
4709   [(set (match_operand:OI 0 "register_operand" "=w")
4710         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4711                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4712                    UNSPEC_LD2))]
4713   "TARGET_SIMD"
4714   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4715   [(set_attr "type" "neon_load2_2reg<q>")]
4716 )
4717
4718 (define_insn "aarch64_simd_ld2r<mode>"
4719   [(set (match_operand:OI 0 "register_operand" "=w")
4720        (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4721                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4722                   UNSPEC_LD2_DUP))]
4723   "TARGET_SIMD"
4724   "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4725   [(set_attr "type" "neon_load2_all_lanes<q>")]
4726 )
4727
4728 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4729   [(set (match_operand:OI 0 "register_operand" "=w")
4730         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4731                     (match_operand:OI 2 "register_operand" "0")
4732                     (match_operand:SI 3 "immediate_operand" "i")
4733                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4734                    UNSPEC_LD2_LANE))]
4735   "TARGET_SIMD"
4736   {
4737     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4738     return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4739   }
4740   [(set_attr "type" "neon_load2_one_lane")]
4741 )
4742
4743 (define_expand "vec_load_lanesoi<mode>"
4744   [(set (match_operand:OI 0 "register_operand" "=w")
4745         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4746                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4747                    UNSPEC_LD2))]
4748   "TARGET_SIMD"
4749 {
4750   if (BYTES_BIG_ENDIAN)
4751     {
4752       rtx tmp = gen_reg_rtx (OImode);
4753       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4754       emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4755       emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4756     }
4757   else
4758     emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4759   DONE;
4760 })
4761
4762 (define_insn "aarch64_simd_st2<mode>"
4763   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4764         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4765                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4766                    UNSPEC_ST2))]
4767   "TARGET_SIMD"
4768   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4769   [(set_attr "type" "neon_store2_2reg<q>")]
4770 )
4771
4772 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4773 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4774   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4775         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4776                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4777                     (match_operand:SI 2 "immediate_operand" "i")]
4778                    UNSPEC_ST2_LANE))]
4779   "TARGET_SIMD"
4780   {
4781     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4782     return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4783   }
4784   [(set_attr "type" "neon_store2_one_lane<q>")]
4785 )
4786
4787 (define_expand "vec_store_lanesoi<mode>"
4788   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4789         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4790                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4791                    UNSPEC_ST2))]
4792   "TARGET_SIMD"
4793 {
4794   if (BYTES_BIG_ENDIAN)
4795     {
4796       rtx tmp = gen_reg_rtx (OImode);
4797       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4798       emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4799       emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4800     }
4801   else
4802     emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4803   DONE;
4804 })
4805
4806 (define_insn "aarch64_simd_ld3<mode>"
4807   [(set (match_operand:CI 0 "register_operand" "=w")
4808         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4809                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4810                    UNSPEC_LD3))]
4811   "TARGET_SIMD"
4812   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4813   [(set_attr "type" "neon_load3_3reg<q>")]
4814 )
4815
4816 (define_insn "aarch64_simd_ld3r<mode>"
4817   [(set (match_operand:CI 0 "register_operand" "=w")
4818        (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4819                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4820                   UNSPEC_LD3_DUP))]
4821   "TARGET_SIMD"
4822   "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4823   [(set_attr "type" "neon_load3_all_lanes<q>")]
4824 )
4825
4826 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4827   [(set (match_operand:CI 0 "register_operand" "=w")
4828         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4829                     (match_operand:CI 2 "register_operand" "0")
4830                     (match_operand:SI 3 "immediate_operand" "i")
4831                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4832                    UNSPEC_LD3_LANE))]
4833   "TARGET_SIMD"
4834 {
4835     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4836     return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4837 }
4838   [(set_attr "type" "neon_load3_one_lane")]
4839 )
4840
4841 (define_expand "vec_load_lanesci<mode>"
4842   [(set (match_operand:CI 0 "register_operand" "=w")
4843         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4844                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4845                    UNSPEC_LD3))]
4846   "TARGET_SIMD"
4847 {
4848   if (BYTES_BIG_ENDIAN)
4849     {
4850       rtx tmp = gen_reg_rtx (CImode);
4851       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4852       emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4853       emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4854     }
4855   else
4856     emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4857   DONE;
4858 })
4859
4860 (define_insn "aarch64_simd_st3<mode>"
4861   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4862         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4863                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4864                    UNSPEC_ST3))]
4865   "TARGET_SIMD"
4866   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4867   [(set_attr "type" "neon_store3_3reg<q>")]
4868 )
4869
4870 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4871 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4872   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4873         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4874                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4875                      (match_operand:SI 2 "immediate_operand" "i")]
4876                     UNSPEC_ST3_LANE))]
4877   "TARGET_SIMD"
4878   {
4879     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4880     return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4881   }
4882   [(set_attr "type" "neon_store3_one_lane<q>")]
4883 )
4884
4885 (define_expand "vec_store_lanesci<mode>"
4886   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4887         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4888                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4889                    UNSPEC_ST3))]
4890   "TARGET_SIMD"
4891 {
4892   if (BYTES_BIG_ENDIAN)
4893     {
4894       rtx tmp = gen_reg_rtx (CImode);
4895       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4896       emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4897       emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4898     }
4899   else
4900     emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4901   DONE;
4902 })
4903
4904 (define_insn "aarch64_simd_ld4<mode>"
4905   [(set (match_operand:XI 0 "register_operand" "=w")
4906         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4907                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4908                    UNSPEC_LD4))]
4909   "TARGET_SIMD"
4910   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4911   [(set_attr "type" "neon_load4_4reg<q>")]
4912 )
4913
4914 (define_insn "aarch64_simd_ld4r<mode>"
4915   [(set (match_operand:XI 0 "register_operand" "=w")
4916        (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4917                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4918                   UNSPEC_LD4_DUP))]
4919   "TARGET_SIMD"
4920   "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4921   [(set_attr "type" "neon_load4_all_lanes<q>")]
4922 )
4923
4924 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4925   [(set (match_operand:XI 0 "register_operand" "=w")
4926         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4927                     (match_operand:XI 2 "register_operand" "0")
4928                     (match_operand:SI 3 "immediate_operand" "i")
4929                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4930                    UNSPEC_LD4_LANE))]
4931   "TARGET_SIMD"
4932 {
4933     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4934     return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4935 }
4936   [(set_attr "type" "neon_load4_one_lane")]
4937 )
4938
4939 (define_expand "vec_load_lanesxi<mode>"
4940   [(set (match_operand:XI 0 "register_operand" "=w")
4941         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4942                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4943                    UNSPEC_LD4))]
4944   "TARGET_SIMD"
4945 {
4946   if (BYTES_BIG_ENDIAN)
4947     {
4948       rtx tmp = gen_reg_rtx (XImode);
4949       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4950       emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4951       emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4952     }
4953   else
4954     emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4955   DONE;
4956 })
4957
4958 (define_insn "aarch64_simd_st4<mode>"
4959   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4960         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4961                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4962                    UNSPEC_ST4))]
4963   "TARGET_SIMD"
4964   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4965   [(set_attr "type" "neon_store4_4reg<q>")]
4966 )
4967
4968 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4969 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4970   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4971         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4972                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4973                      (match_operand:SI 2 "immediate_operand" "i")]
4974                     UNSPEC_ST4_LANE))]
4975   "TARGET_SIMD"
4976   {
4977     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4978     return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
4979   }
4980   [(set_attr "type" "neon_store4_one_lane<q>")]
4981 )
4982
4983 (define_expand "vec_store_lanesxi<mode>"
4984   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4985         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4986                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4987                    UNSPEC_ST4))]
4988   "TARGET_SIMD"
4989 {
4990   if (BYTES_BIG_ENDIAN)
4991     {
4992       rtx tmp = gen_reg_rtx (XImode);
4993       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4994       emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
4995       emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
4996     }
4997   else
4998     emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
4999   DONE;
5000 })
5001
5002 (define_insn_and_split "aarch64_rev_reglist<mode>"
5003 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5004         (unspec:VSTRUCT
5005                    [(match_operand:VSTRUCT 1 "register_operand" "w")
5006                     (match_operand:V16QI 2 "register_operand" "w")]
5007                    UNSPEC_REV_REGLIST))]
5008   "TARGET_SIMD"
5009   "#"
5010   "&& reload_completed"
5011   [(const_int 0)]
5012 {
5013   int i;
5014   int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5015   for (i = 0; i < nregs; i++)
5016     {
5017       rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5018       rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5019       emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5020     }
5021   DONE;
5022 }
5023   [(set_attr "type" "neon_tbl1_q")
5024    (set_attr "length" "<insn_count>")]
5025 )
5026
5027 ;; Reload patterns for AdvSIMD register list operands.
5028
5029 (define_expand "mov<mode>"
5030   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
5031         (match_operand:VSTRUCT 1 "general_operand" ""))]
5032   "TARGET_SIMD"
5033 {
5034   if (can_create_pseudo_p ())
5035     {
5036       if (GET_CODE (operands[0]) != REG)
5037         operands[1] = force_reg (<MODE>mode, operands[1]);
5038     }
5039 })
5040
5041 (define_insn "*aarch64_mov<mode>"
5042   [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5043         (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5044   "TARGET_SIMD && !BYTES_BIG_ENDIAN
5045    && (register_operand (operands[0], <MODE>mode)
5046        || register_operand (operands[1], <MODE>mode))"
5047   "@
5048    #
5049    st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5050    ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5051   [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5052                      neon_load<nregs>_<nregs>reg_q")
5053    (set_attr "length" "<insn_count>,4,4")]
5054 )
5055
5056 (define_insn "aarch64_be_ld1<mode>"
5057   [(set (match_operand:VALLDI_F16 0     "register_operand" "=w")
5058         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5059                              "aarch64_simd_struct_operand" "Utv")]
5060         UNSPEC_LD1))]
5061   "TARGET_SIMD"
5062   "ld1\\t{%0<Vmtype>}, %1"
5063   [(set_attr "type" "neon_load1_1reg<q>")]
5064 )
5065
5066 (define_insn "aarch64_be_st1<mode>"
5067   [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5068         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5069         UNSPEC_ST1))]
5070   "TARGET_SIMD"
5071   "st1\\t{%1<Vmtype>}, %0"
5072   [(set_attr "type" "neon_store1_1reg<q>")]
5073 )
5074
5075 (define_insn "*aarch64_be_movoi"
5076   [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5077         (match_operand:OI 1 "general_operand"      " w,w,m"))]
5078   "TARGET_SIMD && BYTES_BIG_ENDIAN
5079    && (register_operand (operands[0], OImode)
5080        || register_operand (operands[1], OImode))"
5081   "@
5082    #
5083    stp\\t%q1, %R1, %0
5084    ldp\\t%q0, %R0, %1"
5085   [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5086    (set_attr "length" "8,4,4")]
5087 )
5088
5089 (define_insn "*aarch64_be_movci"
5090   [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5091         (match_operand:CI 1 "general_operand"      " w,w,o"))]
5092   "TARGET_SIMD && BYTES_BIG_ENDIAN
5093    && (register_operand (operands[0], CImode)
5094        || register_operand (operands[1], CImode))"
5095   "#"
5096   [(set_attr "type" "multiple")
5097    (set_attr "length" "12,4,4")]
5098 )
5099
5100 (define_insn "*aarch64_be_movxi"
5101   [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5102         (match_operand:XI 1 "general_operand"      " w,w,o"))]
5103   "TARGET_SIMD && BYTES_BIG_ENDIAN
5104    && (register_operand (operands[0], XImode)
5105        || register_operand (operands[1], XImode))"
5106   "#"
5107   [(set_attr "type" "multiple")
5108    (set_attr "length" "16,4,4")]
5109 )
5110
5111 (define_split
5112   [(set (match_operand:OI 0 "register_operand")
5113         (match_operand:OI 1 "register_operand"))]
5114   "TARGET_SIMD && reload_completed"
5115   [(const_int 0)]
5116 {
5117   aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5118   DONE;
5119 })
5120
5121 (define_split
5122   [(set (match_operand:CI 0 "nonimmediate_operand")
5123         (match_operand:CI 1 "general_operand"))]
5124   "TARGET_SIMD && reload_completed"
5125   [(const_int 0)]
5126 {
5127   if (register_operand (operands[0], CImode)
5128       && register_operand (operands[1], CImode))
5129     {
5130       aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5131       DONE;
5132     }
5133   else if (BYTES_BIG_ENDIAN)
5134     {
5135       emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5136                       simplify_gen_subreg (OImode, operands[1], CImode, 0));
5137       emit_move_insn (gen_lowpart (V16QImode,
5138                                    simplify_gen_subreg (TImode, operands[0],
5139                                                         CImode, 32)),
5140                       gen_lowpart (V16QImode,
5141                                    simplify_gen_subreg (TImode, operands[1],
5142                                                         CImode, 32)));
5143       DONE;
5144     }
5145   else
5146     FAIL;
5147 })
5148
5149 (define_split
5150   [(set (match_operand:XI 0 "nonimmediate_operand")
5151         (match_operand:XI 1 "general_operand"))]
5152   "TARGET_SIMD && reload_completed"
5153   [(const_int 0)]
5154 {
5155   if (register_operand (operands[0], XImode)
5156       && register_operand (operands[1], XImode))
5157     {
5158       aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5159       DONE;
5160     }
5161   else if (BYTES_BIG_ENDIAN)
5162     {
5163       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5164                       simplify_gen_subreg (OImode, operands[1], XImode, 0));
5165       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5166                       simplify_gen_subreg (OImode, operands[1], XImode, 32));
5167       DONE;
5168     }
5169   else
5170     FAIL;
5171 })
5172
5173 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5174   [(match_operand:VSTRUCT 0 "register_operand" "=w")
5175    (match_operand:DI 1 "register_operand" "w")
5176    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5177   "TARGET_SIMD"
5178 {
5179   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5180   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5181                      * <VSTRUCT:nregs>);
5182
5183   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5184                                                                 mem));
5185   DONE;
5186 })
5187
5188 (define_insn "aarch64_ld2<mode>_dreg"
5189   [(set (match_operand:OI 0 "register_operand" "=w")
5190         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5191                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5192                    UNSPEC_LD2_DREG))]
5193   "TARGET_SIMD"
5194   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5195   [(set_attr "type" "neon_load2_2reg<q>")]
5196 )
5197
5198 (define_insn "aarch64_ld2<mode>_dreg"
5199   [(set (match_operand:OI 0 "register_operand" "=w")
5200         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5201                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5202                    UNSPEC_LD2_DREG))]
5203   "TARGET_SIMD"
5204   "ld1\\t{%S0.1d - %T0.1d}, %1"
5205   [(set_attr "type" "neon_load1_2reg<q>")]
5206 )
5207
5208 (define_insn "aarch64_ld3<mode>_dreg"
5209   [(set (match_operand:CI 0 "register_operand" "=w")
5210         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5211                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5212                    UNSPEC_LD3_DREG))]
5213   "TARGET_SIMD"
5214   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5215   [(set_attr "type" "neon_load3_3reg<q>")]
5216 )
5217
5218 (define_insn "aarch64_ld3<mode>_dreg"
5219   [(set (match_operand:CI 0 "register_operand" "=w")
5220         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5221                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5222                    UNSPEC_LD3_DREG))]
5223   "TARGET_SIMD"
5224   "ld1\\t{%S0.1d - %U0.1d}, %1"
5225   [(set_attr "type" "neon_load1_3reg<q>")]
5226 )
5227
5228 (define_insn "aarch64_ld4<mode>_dreg"
5229   [(set (match_operand:XI 0 "register_operand" "=w")
5230         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5231                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5232                    UNSPEC_LD4_DREG))]
5233   "TARGET_SIMD"
5234   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5235   [(set_attr "type" "neon_load4_4reg<q>")]
5236 )
5237
5238 (define_insn "aarch64_ld4<mode>_dreg"
5239   [(set (match_operand:XI 0 "register_operand" "=w")
5240         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5241                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5242                    UNSPEC_LD4_DREG))]
5243   "TARGET_SIMD"
5244   "ld1\\t{%S0.1d - %V0.1d}, %1"
5245   [(set_attr "type" "neon_load1_4reg<q>")]
5246 )
5247
5248 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5249  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5250   (match_operand:DI 1 "register_operand" "r")
5251   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5252   "TARGET_SIMD"
5253 {
5254   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5255   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5256
5257   emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5258   DONE;
5259 })
5260
5261 (define_expand "aarch64_ld1<VALL_F16:mode>"
5262  [(match_operand:VALL_F16 0 "register_operand")
5263   (match_operand:DI 1 "register_operand")]
5264   "TARGET_SIMD"
5265 {
5266   machine_mode mode = <VALL_F16:MODE>mode;
5267   rtx mem = gen_rtx_MEM (mode, operands[1]);
5268
5269   if (BYTES_BIG_ENDIAN)
5270     emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5271   else
5272     emit_move_insn (operands[0], mem);
5273   DONE;
5274 })
5275
5276 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5277  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5278   (match_operand:DI 1 "register_operand" "r")
5279   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5280   "TARGET_SIMD"
5281 {
5282   machine_mode mode = <VSTRUCT:MODE>mode;
5283   rtx mem = gen_rtx_MEM (mode, operands[1]);
5284
5285   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5286   DONE;
5287 })
5288
5289 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5290   [(match_operand:VSTRUCT 0 "register_operand" "=w")
5291         (match_operand:DI 1 "register_operand" "w")
5292         (match_operand:VSTRUCT 2 "register_operand" "0")
5293         (match_operand:SI 3 "immediate_operand" "i")
5294         (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5295   "TARGET_SIMD"
5296 {
5297   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5298   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5299                      * <VSTRUCT:nregs>);
5300
5301   aarch64_simd_lane_bounds (operands[3], 0,
5302                             GET_MODE_NUNITS (<VALLDIF:MODE>mode),
5303                             NULL);
5304   emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5305         operands[0], mem, operands[2], operands[3]));
5306   DONE;
5307 })
5308
5309 ;; Expanders for builtins to extract vector registers from large
5310 ;; opaque integer modes.
5311
5312 ;; D-register list.
5313
5314 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5315  [(match_operand:VDC 0 "register_operand" "=w")
5316   (match_operand:VSTRUCT 1 "register_operand" "w")
5317   (match_operand:SI 2 "immediate_operand" "i")]
5318   "TARGET_SIMD"
5319 {
5320   int part = INTVAL (operands[2]);
5321   rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5322   int offset = part * 16;
5323
5324   emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5325   emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5326   DONE;
5327 })
5328
5329 ;; Q-register list.
5330
5331 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5332  [(match_operand:VQ 0 "register_operand" "=w")
5333   (match_operand:VSTRUCT 1 "register_operand" "w")
5334   (match_operand:SI 2 "immediate_operand" "i")]
5335   "TARGET_SIMD"
5336 {
5337   int part = INTVAL (operands[2]);
5338   int offset = part * 16;
5339
5340   emit_move_insn (operands[0],
5341                   gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5342   DONE;
5343 })
5344
5345 ;; Permuted-store expanders for neon intrinsics.
5346
5347 ;; Permute instructions
5348
5349 ;; vec_perm support
5350
5351 (define_expand "vec_perm_const<mode>"
5352   [(match_operand:VALL_F16 0 "register_operand")
5353    (match_operand:VALL_F16 1 "register_operand")
5354    (match_operand:VALL_F16 2 "register_operand")
5355    (match_operand:<V_INT_EQUIV> 3)]
5356   "TARGET_SIMD"
5357 {
5358   if (aarch64_expand_vec_perm_const (operands[0], operands[1],
5359                                      operands[2], operands[3], <nunits>))
5360     DONE;
5361   else
5362     FAIL;
5363 })
5364
5365 (define_expand "vec_perm<mode>"
5366   [(match_operand:VB 0 "register_operand")
5367    (match_operand:VB 1 "register_operand")
5368    (match_operand:VB 2 "register_operand")
5369    (match_operand:VB 3 "register_operand")]
5370   "TARGET_SIMD"
5371 {
5372   aarch64_expand_vec_perm (operands[0], operands[1],
5373                            operands[2], operands[3], <nunits>);
5374   DONE;
5375 })
5376
5377 (define_insn "aarch64_tbl1<mode>"
5378   [(set (match_operand:VB 0 "register_operand" "=w")
5379         (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5380                     (match_operand:VB 2 "register_operand" "w")]
5381                    UNSPEC_TBL))]
5382   "TARGET_SIMD"
5383   "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5384   [(set_attr "type" "neon_tbl1<q>")]
5385 )
5386
5387 ;; Two source registers.
5388
5389 (define_insn "aarch64_tbl2v16qi"
5390   [(set (match_operand:V16QI 0 "register_operand" "=w")
5391         (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5392                        (match_operand:V16QI 2 "register_operand" "w")]
5393                       UNSPEC_TBL))]
5394   "TARGET_SIMD"
5395   "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5396   [(set_attr "type" "neon_tbl2_q")]
5397 )
5398
5399 (define_insn "aarch64_tbl3<mode>"
5400   [(set (match_operand:VB 0 "register_operand" "=w")
5401         (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5402                       (match_operand:VB 2 "register_operand" "w")]
5403                       UNSPEC_TBL))]
5404   "TARGET_SIMD"
5405   "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5406   [(set_attr "type" "neon_tbl3")]
5407 )
5408
5409 (define_insn "aarch64_tbx4<mode>"
5410   [(set (match_operand:VB 0 "register_operand" "=w")
5411         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5412                       (match_operand:OI 2 "register_operand" "w")
5413                       (match_operand:VB 3 "register_operand" "w")]
5414                       UNSPEC_TBX))]
5415   "TARGET_SIMD"
5416   "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5417   [(set_attr "type" "neon_tbl4")]
5418 )
5419
5420 ;; Three source registers.
5421
5422 (define_insn "aarch64_qtbl3<mode>"
5423   [(set (match_operand:VB 0 "register_operand" "=w")
5424         (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5425                       (match_operand:VB 2 "register_operand" "w")]
5426                       UNSPEC_TBL))]
5427   "TARGET_SIMD"
5428   "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5429   [(set_attr "type" "neon_tbl3")]
5430 )
5431
5432 (define_insn "aarch64_qtbx3<mode>"
5433   [(set (match_operand:VB 0 "register_operand" "=w")
5434         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5435                       (match_operand:CI 2 "register_operand" "w")
5436                       (match_operand:VB 3 "register_operand" "w")]
5437                       UNSPEC_TBX))]
5438   "TARGET_SIMD"
5439   "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5440   [(set_attr "type" "neon_tbl3")]
5441 )
5442
5443 ;; Four source registers.
5444
5445 (define_insn "aarch64_qtbl4<mode>"
5446   [(set (match_operand:VB 0 "register_operand" "=w")
5447         (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5448                       (match_operand:VB 2 "register_operand" "w")]
5449                       UNSPEC_TBL))]
5450   "TARGET_SIMD"
5451   "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5452   [(set_attr "type" "neon_tbl4")]
5453 )
5454
5455 (define_insn "aarch64_qtbx4<mode>"
5456   [(set (match_operand:VB 0 "register_operand" "=w")
5457         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5458                       (match_operand:XI 2 "register_operand" "w")
5459                       (match_operand:VB 3 "register_operand" "w")]
5460                       UNSPEC_TBX))]
5461   "TARGET_SIMD"
5462   "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5463   [(set_attr "type" "neon_tbl4")]
5464 )
5465
5466 (define_insn_and_split "aarch64_combinev16qi"
5467   [(set (match_operand:OI 0 "register_operand" "=w")
5468         (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5469                     (match_operand:V16QI 2 "register_operand" "w")]
5470                    UNSPEC_CONCAT))]
5471   "TARGET_SIMD"
5472   "#"
5473   "&& reload_completed"
5474   [(const_int 0)]
5475 {
5476   aarch64_split_combinev16qi (operands);
5477   DONE;
5478 }
5479 [(set_attr "type" "multiple")]
5480 )
5481
5482 ;; This instruction's pattern is generated directly by
5483 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5484 ;; need corresponding changes there.
5485 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5486   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5487         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5488                           (match_operand:VALL_F16 2 "register_operand" "w")]
5489          PERMUTE))]
5490   "TARGET_SIMD"
5491   "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5492   [(set_attr "type" "neon_permute<q>")]
5493 )
5494
5495 ;; This instruction's pattern is generated directly by
5496 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5497 ;; need corresponding changes there.  Note that the immediate (third)
5498 ;; operand is a lane index not a byte index.
5499 (define_insn "aarch64_ext<mode>"
5500   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5501         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5502                           (match_operand:VALL_F16 2 "register_operand" "w")
5503                           (match_operand:SI 3 "immediate_operand" "i")]
5504          UNSPEC_EXT))]
5505   "TARGET_SIMD"
5506 {
5507   operands[3] = GEN_INT (INTVAL (operands[3])
5508       * GET_MODE_UNIT_SIZE (<MODE>mode));
5509   return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5510 }
5511   [(set_attr "type" "neon_ext<q>")]
5512 )
5513
5514 ;; This instruction's pattern is generated directly by
5515 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5516 ;; need corresponding changes there.
5517 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5518   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5519         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5520                     REVERSE))]
5521   "TARGET_SIMD"
5522   "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5523   [(set_attr "type" "neon_rev<q>")]
5524 )
5525
5526 (define_insn "aarch64_st2<mode>_dreg"
5527   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5528         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5529                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5530                    UNSPEC_ST2))]
5531   "TARGET_SIMD"
5532   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5533   [(set_attr "type" "neon_store2_2reg")]
5534 )
5535
5536 (define_insn "aarch64_st2<mode>_dreg"
5537   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5538         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5539                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5540                    UNSPEC_ST2))]
5541   "TARGET_SIMD"
5542   "st1\\t{%S1.1d - %T1.1d}, %0"
5543   [(set_attr "type" "neon_store1_2reg")]
5544 )
5545
5546 (define_insn "aarch64_st3<mode>_dreg"
5547   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5548         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5549                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5550                    UNSPEC_ST3))]
5551   "TARGET_SIMD"
5552   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5553   [(set_attr "type" "neon_store3_3reg")]
5554 )
5555
5556 (define_insn "aarch64_st3<mode>_dreg"
5557   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5558         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5559                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5560                    UNSPEC_ST3))]
5561   "TARGET_SIMD"
5562   "st1\\t{%S1.1d - %U1.1d}, %0"
5563   [(set_attr "type" "neon_store1_3reg")]
5564 )
5565
5566 (define_insn "aarch64_st4<mode>_dreg"
5567   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5568         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5569                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5570                    UNSPEC_ST4))]
5571   "TARGET_SIMD"
5572   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5573   [(set_attr "type" "neon_store4_4reg")]
5574 )
5575
5576 (define_insn "aarch64_st4<mode>_dreg"
5577   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5578         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5579                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5580                    UNSPEC_ST4))]
5581   "TARGET_SIMD"
5582   "st1\\t{%S1.1d - %V1.1d}, %0"
5583   [(set_attr "type" "neon_store1_4reg")]
5584 )
5585
5586 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5587  [(match_operand:DI 0 "register_operand" "r")
5588   (match_operand:VSTRUCT 1 "register_operand" "w")
5589   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5590   "TARGET_SIMD"
5591 {
5592   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5593   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5594
5595   emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5596   DONE;
5597 })
5598
5599 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5600  [(match_operand:DI 0 "register_operand" "r")
5601   (match_operand:VSTRUCT 1 "register_operand" "w")
5602   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5603   "TARGET_SIMD"
5604 {
5605   machine_mode mode = <VSTRUCT:MODE>mode;
5606   rtx mem = gen_rtx_MEM (mode, operands[0]);
5607
5608   emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5609   DONE;
5610 })
5611
5612 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5613  [(match_operand:DI 0 "register_operand" "r")
5614   (match_operand:VSTRUCT 1 "register_operand" "w")
5615   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5616   (match_operand:SI 2 "immediate_operand")]
5617   "TARGET_SIMD"
5618 {
5619   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5620   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5621                      * <VSTRUCT:nregs>);
5622
5623   emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5624                 mem, operands[1], operands[2]));
5625   DONE;
5626 })
5627
5628 (define_expand "aarch64_st1<VALL_F16:mode>"
5629  [(match_operand:DI 0 "register_operand")
5630   (match_operand:VALL_F16 1 "register_operand")]
5631   "TARGET_SIMD"
5632 {
5633   machine_mode mode = <VALL_F16:MODE>mode;
5634   rtx mem = gen_rtx_MEM (mode, operands[0]);
5635
5636   if (BYTES_BIG_ENDIAN)
5637     emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5638   else
5639     emit_move_insn (mem, operands[1]);
5640   DONE;
5641 })
5642
5643 ;; Expander for builtins to insert vector registers into large
5644 ;; opaque integer modes.
5645
5646 ;; Q-register list.  We don't need a D-reg inserter as we zero
5647 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5648
5649 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5650  [(match_operand:VSTRUCT 0 "register_operand" "+w")
5651   (match_operand:VSTRUCT 1 "register_operand" "0")
5652   (match_operand:VQ 2 "register_operand" "w")
5653   (match_operand:SI 3 "immediate_operand" "i")]
5654   "TARGET_SIMD"
5655 {
5656   int part = INTVAL (operands[3]);
5657   int offset = part * 16;
5658
5659   emit_move_insn (operands[0], operands[1]);
5660   emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5661                   operands[2]);
5662   DONE;
5663 })
5664
5665 ;; Standard pattern name vec_init<mode><Vel>.
5666
5667 (define_expand "vec_init<mode><Vel>"
5668   [(match_operand:VALL_F16 0 "register_operand" "")
5669    (match_operand 1 "" "")]
5670   "TARGET_SIMD"
5671 {
5672   aarch64_expand_vector_init (operands[0], operands[1]);
5673   DONE;
5674 })
5675
5676 (define_insn "*aarch64_simd_ld1r<mode>"
5677   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5678         (vec_duplicate:VALL_F16
5679           (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5680   "TARGET_SIMD"
5681   "ld1r\\t{%0.<Vtype>}, %1"
5682   [(set_attr "type" "neon_load1_all_lanes")]
5683 )
5684
5685 (define_insn "aarch64_frecpe<mode>"
5686   [(set (match_operand:VHSDF 0 "register_operand" "=w")
5687         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5688          UNSPEC_FRECPE))]
5689   "TARGET_SIMD"
5690   "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5691   [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5692 )
5693
5694 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5695   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5696         (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5697          FRECP))]
5698   "TARGET_SIMD"
5699   "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5700   [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5701 )
5702
5703 (define_insn "aarch64_frecps<mode>"
5704   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5705         (unspec:VHSDF_HSDF
5706           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5707           (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5708           UNSPEC_FRECPS))]
5709   "TARGET_SIMD"
5710   "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5711   [(set_attr "type" "neon_fp_recps_<stype><q>")]
5712 )
5713
5714 (define_insn "aarch64_urecpe<mode>"
5715   [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5716         (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5717                 UNSPEC_URECPE))]
5718  "TARGET_SIMD"
5719  "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5720   [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5721
5722 ;; Standard pattern name vec_extract<mode><Vel>.
5723
5724 (define_expand "vec_extract<mode><Vel>"
5725   [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5726    (match_operand:VALL_F16 1 "register_operand" "")
5727    (match_operand:SI 2 "immediate_operand" "")]
5728   "TARGET_SIMD"
5729 {
5730     emit_insn
5731       (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5732     DONE;
5733 })
5734
5735 ;; aes
5736
5737 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5738   [(set (match_operand:V16QI 0 "register_operand" "=w")
5739         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5740                        (match_operand:V16QI 2 "register_operand" "w")]
5741          CRYPTO_AES))]
5742   "TARGET_SIMD && TARGET_CRYPTO"
5743   "aes<aes_op>\\t%0.16b, %2.16b"
5744   [(set_attr "type" "crypto_aese")]
5745 )
5746
5747 ;; When AES/AESMC fusion is enabled we want the register allocation to
5748 ;; look like:
5749 ;;    AESE Vn, _
5750 ;;    AESMC Vn, Vn
5751 ;; So prefer to tie operand 1 to operand 0 when fusing.
5752
5753 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5754   [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5755         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5756          CRYPTO_AESMC))]
5757   "TARGET_SIMD && TARGET_CRYPTO"
5758   "aes<aesmc_op>\\t%0.16b, %1.16b"
5759   [(set_attr "type" "crypto_aesmc")
5760    (set_attr_alternative "enabled"
5761      [(if_then_else (match_test
5762                        "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5763                      (const_string "yes" )
5764                      (const_string "no"))
5765       (const_string "yes")])]
5766 )
5767
5768 ;; sha1
5769
5770 (define_insn "aarch64_crypto_sha1hsi"
5771   [(set (match_operand:SI 0 "register_operand" "=w")
5772         (unspec:SI [(match_operand:SI 1
5773                        "register_operand" "w")]
5774          UNSPEC_SHA1H))]
5775   "TARGET_SIMD && TARGET_CRYPTO"
5776   "sha1h\\t%s0, %s1"
5777   [(set_attr "type" "crypto_sha1_fast")]
5778 )
5779
5780 (define_insn "aarch64_crypto_sha1hv4si"
5781   [(set (match_operand:SI 0 "register_operand" "=w")
5782         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5783                      (parallel [(const_int 0)]))]
5784          UNSPEC_SHA1H))]
5785   "TARGET_SIMD && TARGET_CRYPTO && !BYTES_BIG_ENDIAN"
5786   "sha1h\\t%s0, %s1"
5787   [(set_attr "type" "crypto_sha1_fast")]
5788 )
5789
5790 (define_insn "aarch64_be_crypto_sha1hv4si"
5791   [(set (match_operand:SI 0 "register_operand" "=w")
5792         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5793                      (parallel [(const_int 3)]))]
5794          UNSPEC_SHA1H))]
5795   "TARGET_SIMD && TARGET_CRYPTO && BYTES_BIG_ENDIAN"
5796   "sha1h\\t%s0, %s1"
5797   [(set_attr "type" "crypto_sha1_fast")]
5798 )
5799
5800 (define_insn "aarch64_crypto_sha1su1v4si"
5801   [(set (match_operand:V4SI 0 "register_operand" "=w")
5802         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5803                       (match_operand:V4SI 2 "register_operand" "w")]
5804          UNSPEC_SHA1SU1))]
5805   "TARGET_SIMD && TARGET_CRYPTO"
5806   "sha1su1\\t%0.4s, %2.4s"
5807   [(set_attr "type" "crypto_sha1_fast")]
5808 )
5809
5810 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5811   [(set (match_operand:V4SI 0 "register_operand" "=w")
5812         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5813                       (match_operand:SI 2 "register_operand" "w")
5814                       (match_operand:V4SI 3 "register_operand" "w")]
5815          CRYPTO_SHA1))]
5816   "TARGET_SIMD && TARGET_CRYPTO"
5817   "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5818   [(set_attr "type" "crypto_sha1_slow")]
5819 )
5820
5821 (define_insn "aarch64_crypto_sha1su0v4si"
5822   [(set (match_operand:V4SI 0 "register_operand" "=w")
5823         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5824                       (match_operand:V4SI 2 "register_operand" "w")
5825                       (match_operand:V4SI 3 "register_operand" "w")]
5826          UNSPEC_SHA1SU0))]
5827   "TARGET_SIMD && TARGET_CRYPTO"
5828   "sha1su0\\t%0.4s, %2.4s, %3.4s"
5829   [(set_attr "type" "crypto_sha1_xor")]
5830 )
5831
5832 ;; sha256
5833
5834 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
5835   [(set (match_operand:V4SI 0 "register_operand" "=w")
5836         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5837                       (match_operand:V4SI 2 "register_operand" "w")
5838                       (match_operand:V4SI 3 "register_operand" "w")]
5839          CRYPTO_SHA256))]
5840   "TARGET_SIMD && TARGET_CRYPTO"
5841   "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5842   [(set_attr "type" "crypto_sha256_slow")]
5843 )
5844
5845 (define_insn "aarch64_crypto_sha256su0v4si"
5846   [(set (match_operand:V4SI 0 "register_operand" "=w")
5847         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5848                       (match_operand:V4SI 2 "register_operand" "w")]
5849          UNSPEC_SHA256SU0))]
5850   "TARGET_SIMD &&TARGET_CRYPTO"
5851   "sha256su0\\t%0.4s, %2.4s"
5852   [(set_attr "type" "crypto_sha256_fast")]
5853 )
5854
5855 (define_insn "aarch64_crypto_sha256su1v4si"
5856   [(set (match_operand:V4SI 0 "register_operand" "=w")
5857         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5858                       (match_operand:V4SI 2 "register_operand" "w")
5859                       (match_operand:V4SI 3 "register_operand" "w")]
5860          UNSPEC_SHA256SU1))]
5861   "TARGET_SIMD &&TARGET_CRYPTO"
5862   "sha256su1\\t%0.4s, %2.4s, %3.4s"
5863   [(set_attr "type" "crypto_sha256_slow")]
5864 )
5865
5866 ;; pmull
5867
5868 (define_insn "aarch64_crypto_pmulldi"
5869   [(set (match_operand:TI 0 "register_operand" "=w")
5870         (unspec:TI  [(match_operand:DI 1 "register_operand" "w")
5871                      (match_operand:DI 2 "register_operand" "w")]
5872                     UNSPEC_PMULL))]
5873  "TARGET_SIMD && TARGET_CRYPTO"
5874  "pmull\\t%0.1q, %1.1d, %2.1d"
5875   [(set_attr "type" "crypto_pmull")]
5876 )
5877
5878 (define_insn "aarch64_crypto_pmullv2di"
5879  [(set (match_operand:TI 0 "register_operand" "=w")
5880        (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
5881                    (match_operand:V2DI 2 "register_operand" "w")]
5882                   UNSPEC_PMULL2))]
5883   "TARGET_SIMD && TARGET_CRYPTO"
5884   "pmull2\\t%0.1q, %1.2d, %2.2d"
5885   [(set_attr "type" "crypto_pmull")]
5886 )