gcc/config/aarch64/aarch64-simd.md

   1 ;; Machine description for AArch64 AdvSIMD architecture.
   2 ;; Copyright (C) 2011-2017 Free Software Foundation, Inc.
   3 ;; Contributed by ARM Ltd.
   4 ;;
   5 ;; This file is part of GCC.
   6 ;;
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published by
   9 ;; the Free Software Foundation; either version 3, or (at your option)
  10 ;; any later version.
  11 ;;
  12 ;; GCC is distributed in the hope that it will be useful, but
  13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 ;; General Public License for more details.
  16 ;;
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 (define_expand "mov<mode>"
  22   [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
  23         (match_operand:VALL_F16 1 "general_operand" ""))]
  24   "TARGET_SIMD"
  25   "
  26   /* Force the operand into a register if it is not an
  27      immediate whose use can be replaced with xzr.
  28      If the mode is 16 bytes wide, then we will be doing
  29      a stp in DI mode, so we check the validity of that.
  30      If the mode is 8 bytes wide, then we will do doing a
  31      normal str, so the check need not apply.  */
  32   if (GET_CODE (operands[0]) == MEM
  33       && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
  34            && ((GET_MODE_SIZE (<MODE>mode) == 16
  35                 && aarch64_mem_pair_operand (operands[0], DImode))
  36                || GET_MODE_SIZE (<MODE>mode) == 8)))
  37       operands[1] = force_reg (<MODE>mode, operands[1]);
  38   "
  39 )
  40
  41 (define_expand "movmisalign<mode>"
  42   [(set (match_operand:VALL 0 "nonimmediate_operand" "")
  43         (match_operand:VALL 1 "general_operand" ""))]
  44   "TARGET_SIMD"
  45 {
  46   /* This pattern is not permitted to fail during expansion: if both arguments
  47      are non-registers (e.g. memory := constant, which can be created by the
  48      auto-vectorizer), force operand 1 into a register.  */
  49   if (!register_operand (operands[0], <MODE>mode)
  50       && !register_operand (operands[1], <MODE>mode))
  51     operands[1] = force_reg (<MODE>mode, operands[1]);
  52 })
  53
  54 (define_insn "aarch64_simd_dup<mode>"
  55   [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
  56         (vec_duplicate:VDQ_I
  57           (match_operand:<VEL> 1 "register_operand" "w,?r")))]
  58   "TARGET_SIMD"
  59   "@
  60    dup\\t%0.<Vtype>, %1.<Vetype>[0]
  61    dup\\t%0.<Vtype>, %<vw>1"
  62   [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
  63 )
  64
  65 (define_insn "aarch64_simd_dup<mode>"
  66   [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
  67         (vec_duplicate:VDQF_F16
  68           (match_operand:<VEL> 1 "register_operand" "w")))]
  69   "TARGET_SIMD"
  70   "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
  71   [(set_attr "type" "neon_dup<q>")]
  72 )
  73
  74 (define_insn "aarch64_dup_lane<mode>"
  75   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
  76         (vec_duplicate:VALL_F16
  77           (vec_select:<VEL>
  78             (match_operand:VALL_F16 1 "register_operand" "w")
  79             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  80           )))]
  81   "TARGET_SIMD"
  82   {
  83     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
  84     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
  85   }
  86   [(set_attr "type" "neon_dup<q>")]
  87 )
  88
  89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
  90   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
  91         (vec_duplicate:VALL_F16_NO_V2Q
  92           (vec_select:<VEL>
  93             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
  94             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  95           )))]
  96   "TARGET_SIMD"
  97   {
  98     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
  99     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
 100   }
 101   [(set_attr "type" "neon_dup<q>")]
 102 )
 103
 104 (define_insn "*aarch64_simd_mov<VD:mode>"
 105   [(set (match_operand:VD 0 "nonimmediate_operand"
 106                 "=w, m,  m,  w, ?r, ?w, ?r, w")
 107         (match_operand:VD 1 "general_operand"
 108                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 109   "TARGET_SIMD
 110    && (register_operand (operands[0], <MODE>mode)
 111        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 112 {
 113    switch (which_alternative)
 114      {
 115      case 0: return "ldr\t%d0, %1";
 116      case 1: return "str\txzr, %0";
 117      case 2: return "str\t%d1, %0";
 118      case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 119      case 4: return "umov\t%0, %1.d[0]";
 120      case 5: return "fmov\t%d0, %1";
 121      case 6: return "mov\t%0, %1";
 122      case 7:
 123         return aarch64_output_simd_mov_immediate (operands[1],
 124                                                   <MODE>mode, 64);
 125      default: gcc_unreachable ();
 126      }
 127 }
 128   [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
 129                      neon_logic<q>, neon_to_gp<q>, f_mcr,\
 130                      mov_reg, neon_move<q>")]
 131 )
 132
 133 (define_insn "*aarch64_simd_mov<VQ:mode>"
 134   [(set (match_operand:VQ 0 "nonimmediate_operand"
 135                 "=w, Umq,  m,  w, ?r, ?w, ?r, w")
 136         (match_operand:VQ 1 "general_operand"
 137                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 138   "TARGET_SIMD
 139    && (register_operand (operands[0], <MODE>mode)
 140        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 141 {
 142   switch (which_alternative)
 143     {
 144     case 0:
 145         return "ldr\t%q0, %1";
 146     case 1:
 147         return "stp\txzr, xzr, %0";
 148     case 2:
 149         return "str\t%q1, %0";
 150     case 3:
 151         return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 152     case 4:
 153     case 5:
 154     case 6:
 155         return "#";
 156     case 7:
 157         return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
 158     default:
 159         gcc_unreachable ();
 160     }
 161 }
 162   [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
 163                      neon_logic<q>, multiple, multiple,\
 164                      multiple, neon_move<q>")
 165    (set_attr "length" "4,4,4,4,8,8,8,4")]
 166 )
 167
 168 ;; When storing lane zero we can use the normal STR and its more permissive
 169 ;; addressing modes.
 170
 171 (define_insn "aarch64_store_lane0<mode>"
 172   [(set (match_operand:<VEL> 0 "memory_operand" "=m")
 173         (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
 174                         (parallel [(match_operand 2 "const_int_operand" "n")])))]
 175   "TARGET_SIMD
 176    && ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])) == 0"
 177   "str\\t%<Vetype>1, %0"
 178   [(set_attr "type" "neon_store1_1reg<q>")]
 179 )
 180
 181 (define_insn "load_pair<mode>"
 182   [(set (match_operand:VD 0 "register_operand" "=w")
 183         (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
 184    (set (match_operand:VD 2 "register_operand" "=w")
 185         (match_operand:VD 3 "memory_operand" "m"))]
 186   "TARGET_SIMD
 187    && rtx_equal_p (XEXP (operands[3], 0),
 188                    plus_constant (Pmode,
 189                                   XEXP (operands[1], 0),
 190                                   GET_MODE_SIZE (<MODE>mode)))"
 191   "ldp\\t%d0, %d2, %1"
 192   [(set_attr "type" "neon_ldp")]
 193 )
 194
 195 (define_insn "store_pair<mode>"
 196   [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
 197         (match_operand:VD 1 "register_operand" "w"))
 198    (set (match_operand:VD 2 "memory_operand" "=m")
 199         (match_operand:VD 3 "register_operand" "w"))]
 200   "TARGET_SIMD
 201    && rtx_equal_p (XEXP (operands[2], 0),
 202                    plus_constant (Pmode,
 203                                   XEXP (operands[0], 0),
 204                                   GET_MODE_SIZE (<MODE>mode)))"
 205   "stp\\t%d1, %d3, %0"
 206   [(set_attr "type" "neon_stp")]
 207 )
 208
 209 (define_split
 210   [(set (match_operand:VQ 0 "register_operand" "")
 211       (match_operand:VQ 1 "register_operand" ""))]
 212   "TARGET_SIMD && reload_completed
 213    && GP_REGNUM_P (REGNO (operands[0]))
 214    && GP_REGNUM_P (REGNO (operands[1]))"
 215   [(const_int 0)]
 216 {
 217   aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
 218   DONE;
 219 })
 220
 221 (define_split
 222   [(set (match_operand:VQ 0 "register_operand" "")
 223         (match_operand:VQ 1 "register_operand" ""))]
 224   "TARGET_SIMD && reload_completed
 225    && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
 226        || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
 227   [(const_int 0)]
 228 {
 229   aarch64_split_simd_move (operands[0], operands[1]);
 230   DONE;
 231 })
 232
 233 (define_expand "aarch64_split_simd_mov<mode>"
 234   [(set (match_operand:VQ 0)
 235         (match_operand:VQ 1))]
 236   "TARGET_SIMD"
 237   {
 238     rtx dst = operands[0];
 239     rtx src = operands[1];
 240
 241     if (GP_REGNUM_P (REGNO (src)))
 242       {
 243         rtx src_low_part = gen_lowpart (<VHALF>mode, src);
 244         rtx src_high_part = gen_highpart (<VHALF>mode, src);
 245
 246         emit_insn
 247           (gen_move_lo_quad_<mode> (dst, src_low_part));
 248         emit_insn
 249           (gen_move_hi_quad_<mode> (dst, src_high_part));
 250       }
 251
 252     else
 253       {
 254         rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
 255         rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
 256         rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
 257         rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
 258
 259         emit_insn
 260           (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
 261         emit_insn
 262           (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
 263       }
 264     DONE;
 265   }
 266 )
 267
 268 (define_insn "aarch64_simd_mov_from_<mode>low"
 269   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
 270         (vec_select:<VHALF>
 271           (match_operand:VQ 1 "register_operand" "w")
 272           (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
 273   "TARGET_SIMD && reload_completed"
 274   "umov\t%0, %1.d[0]"
 275   [(set_attr "type" "neon_to_gp<q>")
 276    (set_attr "length" "4")
 277   ])
 278
 279 (define_insn "aarch64_simd_mov_from_<mode>high"
 280   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
 281         (vec_select:<VHALF>
 282           (match_operand:VQ 1 "register_operand" "w")
 283           (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
 284   "TARGET_SIMD && reload_completed"
 285   "umov\t%0, %1.d[1]"
 286   [(set_attr "type" "neon_to_gp<q>")
 287    (set_attr "length" "4")
 288   ])
 289
 290 (define_insn "orn<mode>3"
 291  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 292        (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 293                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 294  "TARGET_SIMD"
 295  "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 296   [(set_attr "type" "neon_logic<q>")]
 297 )
 298
 299 (define_insn "bic<mode>3"
 300  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 301        (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 302                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 303  "TARGET_SIMD"
 304  "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 305   [(set_attr "type" "neon_logic<q>")]
 306 )
 307
 308 (define_insn "add<mode>3"
 309   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 310         (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 311                   (match_operand:VDQ_I 2 "register_operand" "w")))]
 312   "TARGET_SIMD"
 313   "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 314   [(set_attr "type" "neon_add<q>")]
 315 )
 316
 317 (define_insn "sub<mode>3"
 318   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 319         (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 320                    (match_operand:VDQ_I 2 "register_operand" "w")))]
 321   "TARGET_SIMD"
 322   "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 323   [(set_attr "type" "neon_sub<q>")]
 324 )
 325
 326 (define_insn "mul<mode>3"
 327   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 328         (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
 329                    (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
 330   "TARGET_SIMD"
 331   "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 332   [(set_attr "type" "neon_mul_<Vetype><q>")]
 333 )
 334
 335 (define_insn "bswap<mode>2"
 336   [(set (match_operand:VDQHSD 0 "register_operand" "=w")
 337         (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
 338   "TARGET_SIMD"
 339   "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
 340   [(set_attr "type" "neon_rev<q>")]
 341 )
 342
 343 (define_insn "aarch64_rbit<mode>"
 344   [(set (match_operand:VB 0 "register_operand" "=w")
 345         (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
 346                    UNSPEC_RBIT))]
 347   "TARGET_SIMD"
 348   "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
 349   [(set_attr "type" "neon_rbit")]
 350 )
 351
 352 (define_expand "ctz<mode>2"
 353   [(set (match_operand:VS 0 "register_operand")
 354         (ctz:VS (match_operand:VS 1 "register_operand")))]
 355   "TARGET_SIMD"
 356   {
 357      emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
 358      rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
 359                                              <MODE>mode, 0);
 360      emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
 361      emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
 362      DONE;
 363   }
 364 )
 365
 366 (define_expand "xorsign<mode>3"
 367   [(match_operand:VHSDF 0 "register_operand")
 368    (match_operand:VHSDF 1 "register_operand")
 369    (match_operand:VHSDF 2 "register_operand")]
 370   "TARGET_SIMD"
 371 {
 372
 373   machine_mode imode = <V_INT_EQUIV>mode;
 374   rtx v_bitmask = gen_reg_rtx (imode);
 375   rtx op1x = gen_reg_rtx (imode);
 376   rtx op2x = gen_reg_rtx (imode);
 377
 378   rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
 379   rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
 380
 381   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 382
 383   emit_move_insn (v_bitmask,
 384                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 385                                                      HOST_WIDE_INT_M1U << bits));
 386
 387   emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
 388   emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
 389   emit_move_insn (operands[0],
 390                   lowpart_subreg (<MODE>mode, op1x, imode));
 391   DONE;
 392 }
 393 )
 394
 395 ;; These instructions map to the __builtins for the Dot Product operations.
 396 (define_insn "aarch64_<sur>dot<vsi2qi>"
 397   [(set (match_operand:VS 0 "register_operand" "=w")
 398         (plus:VS (match_operand:VS 1 "register_operand" "0")
 399                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 400                             (match_operand:<VSI2QI> 3 "register_operand" "w")]
 401                 DOTPROD)))]
 402   "TARGET_DOTPROD"
 403   "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
 404   [(set_attr "type" "neon_dot")]
 405 )
 406
 407 ;; These expands map to the Dot Product optab the vectorizer checks for.
 408 ;; The auto-vectorizer expects a dot product builtin that also does an
 409 ;; accumulation into the provided register.
 410 ;; Given the following pattern
 411 ;;
 412 ;; for (i=0; i<len; i++) {
 413 ;;     c = a[i] * b[i];
 414 ;;     r += c;
 415 ;; }
 416 ;; return result;
 417 ;;
 418 ;; This can be auto-vectorized to
 419 ;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
 420 ;;
 421 ;; given enough iterations.  However the vectorizer can keep unrolling the loop
 422 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
 423 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
 424 ;; ...
 425 ;;
 426 ;; and so the vectorizer provides r, in which the result has to be accumulated.
 427 (define_expand "<sur>dot_prod<vsi2qi>"
 428   [(set (match_operand:VS 0 "register_operand")
 429         (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
 430                             (match_operand:<VSI2QI> 2 "register_operand")]
 431                  DOTPROD)
 432                 (match_operand:VS 3 "register_operand")))]
 433   "TARGET_DOTPROD"
 434 {
 435   emit_insn (
 436     gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
 437                                     operands[2]));
 438   emit_insn (gen_rtx_SET (operands[0], operands[3]));
 439   DONE;
 440 })
 441
 442 ;; These instructions map to the __builtins for the Dot Product
 443 ;; indexed operations.
 444 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
 445   [(set (match_operand:VS 0 "register_operand" "=w")
 446         (plus:VS (match_operand:VS 1 "register_operand" "0")
 447                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 448                             (match_operand:V8QI 3 "register_operand" "<h_con>")
 449                             (match_operand:SI 4 "immediate_operand" "i")]
 450                 DOTPROD)))]
 451   "TARGET_DOTPROD"
 452   {
 453     operands[4]
 454       = GEN_INT (ENDIAN_LANE_N (V8QImode, INTVAL (operands[4])));
 455     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 456   }
 457   [(set_attr "type" "neon_dot")]
 458 )
 459
 460 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
 461   [(set (match_operand:VS 0 "register_operand" "=w")
 462         (plus:VS (match_operand:VS 1 "register_operand" "0")
 463                 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 464                             (match_operand:V16QI 3 "register_operand" "<h_con>")
 465                             (match_operand:SI 4 "immediate_operand" "i")]
 466                 DOTPROD)))]
 467   "TARGET_DOTPROD"
 468   {
 469     operands[4]
 470       = GEN_INT (ENDIAN_LANE_N (V16QImode, INTVAL (operands[4])));
 471     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 472   }
 473   [(set_attr "type" "neon_dot")]
 474 )
 475
 476 (define_expand "copysign<mode>3"
 477   [(match_operand:VHSDF 0 "register_operand")
 478    (match_operand:VHSDF 1 "register_operand")
 479    (match_operand:VHSDF 2 "register_operand")]
 480   "TARGET_FLOAT && TARGET_SIMD"
 481 {
 482   rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
 483   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 484
 485   emit_move_insn (v_bitmask,
 486                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 487                                                      HOST_WIDE_INT_M1U << bits));
 488   emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
 489                                          operands[2], operands[1]));
 490   DONE;
 491 }
 492 )
 493
 494 (define_insn "*aarch64_mul3_elt<mode>"
 495  [(set (match_operand:VMUL 0 "register_operand" "=w")
 496     (mult:VMUL
 497       (vec_duplicate:VMUL
 498           (vec_select:<VEL>
 499             (match_operand:VMUL 1 "register_operand" "<h_con>")
 500             (parallel [(match_operand:SI 2 "immediate_operand")])))
 501       (match_operand:VMUL 3 "register_operand" "w")))]
 502   "TARGET_SIMD"
 503   {
 504     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
 505     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 506   }
 507   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 508 )
 509
 510 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
 511   [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
 512      (mult:VMUL_CHANGE_NLANES
 513        (vec_duplicate:VMUL_CHANGE_NLANES
 514           (vec_select:<VEL>
 515             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
 516             (parallel [(match_operand:SI 2 "immediate_operand")])))
 517       (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
 518   "TARGET_SIMD"
 519   {
 520     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
 521     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 522   }
 523   [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
 524 )
 525
 526 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
 527  [(set (match_operand:VMUL 0 "register_operand" "=w")
 528     (mult:VMUL
 529       (vec_duplicate:VMUL
 530             (match_operand:<VEL> 1 "register_operand" "<h_con>"))
 531       (match_operand:VMUL 2 "register_operand" "w")))]
 532   "TARGET_SIMD"
 533   "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
 534   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 535 )
 536
 537 (define_insn "aarch64_rsqrte<mode>"
 538   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 539         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
 540                      UNSPEC_RSQRTE))]
 541   "TARGET_SIMD"
 542   "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 543   [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
 544
 545 (define_insn "aarch64_rsqrts<mode>"
 546   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 547         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
 548                             (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
 549          UNSPEC_RSQRTS))]
 550   "TARGET_SIMD"
 551   "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 552   [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
 553
 554 (define_expand "rsqrt<mode>2"
 555   [(set (match_operand:VALLF 0 "register_operand" "=w")
 556         (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
 557                      UNSPEC_RSQRT))]
 558   "TARGET_SIMD"
 559 {
 560   aarch64_emit_approx_sqrt (operands[0], operands[1], true);
 561   DONE;
 562 })
 563
 564 (define_insn "*aarch64_mul3_elt_to_64v2df"
 565   [(set (match_operand:DF 0 "register_operand" "=w")
 566      (mult:DF
 567        (vec_select:DF
 568          (match_operand:V2DF 1 "register_operand" "w")
 569          (parallel [(match_operand:SI 2 "immediate_operand")]))
 570        (match_operand:DF 3 "register_operand" "w")))]
 571   "TARGET_SIMD"
 572   {
 573     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
 574     return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
 575   }
 576   [(set_attr "type" "neon_fp_mul_d_scalar_q")]
 577 )
 578
 579 (define_insn "neg<mode>2"
 580   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 581         (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 582   "TARGET_SIMD"
 583   "neg\t%0.<Vtype>, %1.<Vtype>"
 584   [(set_attr "type" "neon_neg<q>")]
 585 )
 586
 587 (define_insn "abs<mode>2"
 588   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 589         (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 590   "TARGET_SIMD"
 591   "abs\t%0.<Vtype>, %1.<Vtype>"
 592   [(set_attr "type" "neon_abs<q>")]
 593 )
 594
 595 ;; The intrinsic version of integer ABS must not be allowed to
 596 ;; combine with any operation with an integerated ABS step, such
 597 ;; as SABD.
 598 (define_insn "aarch64_abs<mode>"
 599   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
 600           (unspec:VSDQ_I_DI
 601             [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
 602            UNSPEC_ABS))]
 603   "TARGET_SIMD"
 604   "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 605   [(set_attr "type" "neon_abs<q>")]
 606 )
 607
 608 (define_insn "abd<mode>_3"
 609   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 610         (abs:VDQ_BHSI (minus:VDQ_BHSI
 611                        (match_operand:VDQ_BHSI 1 "register_operand" "w")
 612                        (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
 613   "TARGET_SIMD"
 614   "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 615   [(set_attr "type" "neon_abd<q>")]
 616 )
 617
 618 (define_insn "aba<mode>_3"
 619   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 620         (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
 621                          (match_operand:VDQ_BHSI 1 "register_operand" "w")
 622                          (match_operand:VDQ_BHSI 2 "register_operand" "w")))
 623                        (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
 624   "TARGET_SIMD"
 625   "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 626   [(set_attr "type" "neon_arith_acc<q>")]
 627 )
 628
 629 (define_insn "fabd<mode>3"
 630   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 631         (abs:VHSDF_HSDF
 632           (minus:VHSDF_HSDF
 633             (match_operand:VHSDF_HSDF 1 "register_operand" "w")
 634             (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
 635   "TARGET_SIMD"
 636   "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 637   [(set_attr "type" "neon_fp_abd_<stype><q>")]
 638 )
 639
 640 ;; For AND (vector, register) and BIC (vector, immediate)
 641 (define_insn "and<mode>3"
 642   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
 643         (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
 644                    (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
 645   "TARGET_SIMD"
 646   {
 647     switch (which_alternative)
 648       {
 649       case 0:
 650         return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
 651       case 1:
 652         return aarch64_output_simd_mov_immediate (operands[2],
 653            <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), AARCH64_CHECK_BIC);
 654       default:
 655         gcc_unreachable ();
 656       }
 657   }
 658   [(set_attr "type" "neon_logic<q>")]
 659 )
 660
 661 ;; For ORR (vector, register) and ORR (vector, immediate)
 662 (define_insn "ior<mode>3"
 663   [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
 664         (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
 665                    (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
 666   "TARGET_SIMD"
 667   {
 668     switch (which_alternative)
 669       {
 670       case 0:
 671         return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
 672       case 1:
 673         return aarch64_output_simd_mov_immediate (operands[2],
 674                 <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), AARCH64_CHECK_ORR);
 675       default:
 676         gcc_unreachable ();
 677       }
 678   }
 679   [(set_attr "type" "neon_logic<q>")]
 680 )
 681
 682 (define_insn "xor<mode>3"
 683   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 684         (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 685                  (match_operand:VDQ_I 2 "register_operand" "w")))]
 686   "TARGET_SIMD"
 687   "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
 688   [(set_attr "type" "neon_logic<q>")]
 689 )
 690
 691 (define_insn "one_cmpl<mode>2"
 692   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 693         (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 694   "TARGET_SIMD"
 695   "not\t%0.<Vbtype>, %1.<Vbtype>"
 696   [(set_attr "type" "neon_logic<q>")]
 697 )
 698
 699 (define_insn "aarch64_simd_vec_set<mode>"
 700   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
 701         (vec_merge:VDQ_BHSI
 702             (vec_duplicate:VDQ_BHSI
 703                 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
 704             (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
 705             (match_operand:SI 2 "immediate_operand" "i,i,i")))]
 706   "TARGET_SIMD"
 707   {
 708    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
 709    operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
 710    switch (which_alternative)
 711      {
 712      case 0:
 713         return "ins\\t%0.<Vetype>[%p2], %w1";
 714      case 1:
 715         return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
 716      case 2:
 717         return "ld1\\t{%0.<Vetype>}[%p2], %1";
 718      default:
 719         gcc_unreachable ();
 720      }
 721   }
 722   [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_one_lane<q>")]
 723 )
 724
 725 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
 726   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
 727         (vec_merge:VALL_F16
 728             (vec_duplicate:VALL_F16
 729               (vec_select:<VEL>
 730                 (match_operand:VALL_F16 3 "register_operand" "w")
 731                 (parallel
 732                   [(match_operand:SI 4 "immediate_operand" "i")])))
 733             (match_operand:VALL_F16 1 "register_operand" "0")
 734             (match_operand:SI 2 "immediate_operand" "i")))]
 735   "TARGET_SIMD"
 736   {
 737     int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
 738     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
 739     operands[4] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[4])));
 740
 741     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
 742   }
 743   [(set_attr "type" "neon_ins<q>")]
 744 )
 745
 746 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
 747   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
 748         (vec_merge:VALL_F16_NO_V2Q
 749             (vec_duplicate:VALL_F16_NO_V2Q
 750               (vec_select:<VEL>
 751                 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
 752                 (parallel
 753                   [(match_operand:SI 4 "immediate_operand" "i")])))
 754             (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
 755             (match_operand:SI 2 "immediate_operand" "i")))]
 756   "TARGET_SIMD"
 757   {
 758     int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
 759     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
 760     operands[4] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
 761                            INTVAL (operands[4])));
 762
 763     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
 764   }
 765   [(set_attr "type" "neon_ins<q>")]
 766 )
 767
 768 (define_insn "aarch64_simd_lshr<mode>"
 769  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 770        (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 771                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
 772  "TARGET_SIMD"
 773  "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
 774   [(set_attr "type" "neon_shift_imm<q>")]
 775 )
 776
 777 (define_insn "aarch64_simd_ashr<mode>"
 778  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 779        (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 780                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
 781  "TARGET_SIMD"
 782  "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
 783   [(set_attr "type" "neon_shift_imm<q>")]
 784 )
 785
 786 (define_insn "aarch64_simd_imm_shl<mode>"
 787  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 788        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 789                    (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm" "Dl")))]
 790  "TARGET_SIMD"
 791   "shl\t%0.<Vtype>, %1.<Vtype>, %2"
 792   [(set_attr "type" "neon_shift_imm<q>")]
 793 )
 794
 795 (define_insn "aarch64_simd_reg_sshl<mode>"
 796  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 797        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 798                    (match_operand:VDQ_I 2 "register_operand" "w")))]
 799  "TARGET_SIMD"
 800  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 801   [(set_attr "type" "neon_shift_reg<q>")]
 802 )
 803
 804 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
 805  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 806        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
 807                     (match_operand:VDQ_I 2 "register_operand" "w")]
 808                    UNSPEC_ASHIFT_UNSIGNED))]
 809  "TARGET_SIMD"
 810  "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 811   [(set_attr "type" "neon_shift_reg<q>")]
 812 )
 813
 814 (define_insn "aarch64_simd_reg_shl<mode>_signed"
 815  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 816        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
 817                     (match_operand:VDQ_I 2 "register_operand" "w")]
 818                    UNSPEC_ASHIFT_SIGNED))]
 819  "TARGET_SIMD"
 820  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 821   [(set_attr "type" "neon_shift_reg<q>")]
 822 )
 823
 824 (define_expand "ashl<mode>3"
 825   [(match_operand:VDQ_I 0 "register_operand" "")
 826    (match_operand:VDQ_I 1 "register_operand" "")
 827    (match_operand:SI  2 "general_operand" "")]
 828  "TARGET_SIMD"
 829 {
 830   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
 831   int shift_amount;
 832
 833   if (CONST_INT_P (operands[2]))
 834     {
 835       shift_amount = INTVAL (operands[2]);
 836       if (shift_amount >= 0 && shift_amount < bit_width)
 837         {
 838           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 839                                                        shift_amount);
 840           emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
 841                                                      operands[1],
 842                                                      tmp));
 843           DONE;
 844         }
 845       else
 846         {
 847           operands[2] = force_reg (SImode, operands[2]);
 848         }
 849     }
 850   else if (MEM_P (operands[2]))
 851     {
 852       operands[2] = force_reg (SImode, operands[2]);
 853     }
 854
 855   if (REG_P (operands[2]))
 856     {
 857       rtx tmp = gen_reg_rtx (<MODE>mode);
 858       emit_insn (gen_aarch64_simd_dup<mode> (tmp,
 859                                              convert_to_mode (<VEL>mode,
 860                                                               operands[2],
 861                                                               0)));
 862       emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
 863                                                   tmp));
 864       DONE;
 865     }
 866   else
 867     FAIL;
 868 }
 869 )
 870
 871 (define_expand "lshr<mode>3"
 872   [(match_operand:VDQ_I 0 "register_operand" "")
 873    (match_operand:VDQ_I 1 "register_operand" "")
 874    (match_operand:SI  2 "general_operand" "")]
 875  "TARGET_SIMD"
 876 {
 877   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
 878   int shift_amount;
 879
 880   if (CONST_INT_P (operands[2]))
 881     {
 882       shift_amount = INTVAL (operands[2]);
 883       if (shift_amount > 0 && shift_amount <= bit_width)
 884         {
 885           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 886                                                        shift_amount);
 887           emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
 888                                                   operands[1],
 889                                                   tmp));
 890           DONE;
 891         }
 892       else
 893         operands[2] = force_reg (SImode, operands[2]);
 894     }
 895   else if (MEM_P (operands[2]))
 896     {
 897       operands[2] = force_reg (SImode, operands[2]);
 898     }
 899
 900   if (REG_P (operands[2]))
 901     {
 902       rtx tmp = gen_reg_rtx (SImode);
 903       rtx tmp1 = gen_reg_rtx (<MODE>mode);
 904       emit_insn (gen_negsi2 (tmp, operands[2]));
 905       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
 906                                              convert_to_mode (<VEL>mode,
 907                                                               tmp, 0)));
 908       emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
 909                                                           operands[1],
 910                                                           tmp1));
 911       DONE;
 912     }
 913   else
 914     FAIL;
 915 }
 916 )
 917
 918 (define_expand "ashr<mode>3"
 919   [(match_operand:VDQ_I 0 "register_operand" "")
 920    (match_operand:VDQ_I 1 "register_operand" "")
 921    (match_operand:SI  2 "general_operand" "")]
 922  "TARGET_SIMD"
 923 {
 924   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
 925   int shift_amount;
 926
 927   if (CONST_INT_P (operands[2]))
 928     {
 929       shift_amount = INTVAL (operands[2]);
 930       if (shift_amount > 0 && shift_amount <= bit_width)
 931         {
 932           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 933                                                        shift_amount);
 934           emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
 935                                                   operands[1],
 936                                                   tmp));
 937           DONE;
 938         }
 939       else
 940         operands[2] = force_reg (SImode, operands[2]);
 941     }
 942   else if (MEM_P (operands[2]))
 943     {
 944       operands[2] = force_reg (SImode, operands[2]);
 945     }
 946
 947   if (REG_P (operands[2]))
 948     {
 949       rtx tmp = gen_reg_rtx (SImode);
 950       rtx tmp1 = gen_reg_rtx (<MODE>mode);
 951       emit_insn (gen_negsi2 (tmp, operands[2]));
 952       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
 953                                              convert_to_mode (<VEL>mode,
 954                                                               tmp, 0)));
 955       emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
 956                                                         operands[1],
 957                                                         tmp1));
 958       DONE;
 959     }
 960   else
 961     FAIL;
 962 }
 963 )
 964
 965 (define_expand "vashl<mode>3"
 966  [(match_operand:VDQ_I 0 "register_operand" "")
 967   (match_operand:VDQ_I 1 "register_operand" "")
 968   (match_operand:VDQ_I 2 "register_operand" "")]
 969  "TARGET_SIMD"
 970 {
 971   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
 972                                               operands[2]));
 973   DONE;
 974 })
 975
 976 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
 977 ;; Negating individual lanes most certainly offsets the
 978 ;; gain from vectorization.
 979 (define_expand "vashr<mode>3"
 980  [(match_operand:VDQ_BHSI 0 "register_operand" "")
 981   (match_operand:VDQ_BHSI 1 "register_operand" "")
 982   (match_operand:VDQ_BHSI 2 "register_operand" "")]
 983  "TARGET_SIMD"
 984 {
 985   rtx neg = gen_reg_rtx (<MODE>mode);
 986   emit (gen_neg<mode>2 (neg, operands[2]));
 987   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
 988                                                     neg));
 989   DONE;
 990 })
 991
 992 ;; DI vector shift
 993 (define_expand "aarch64_ashr_simddi"
 994   [(match_operand:DI 0 "register_operand" "=w")
 995    (match_operand:DI 1 "register_operand" "w")
 996    (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
 997   "TARGET_SIMD"
 998   {
 999     /* An arithmetic shift right by 64 fills the result with copies of the sign
1000        bit, just like asr by 63 - however the standard pattern does not handle
1001        a shift by 64.  */
1002     if (INTVAL (operands[2]) == 64)
1003       operands[2] = GEN_INT (63);
1004     emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1005     DONE;
1006   }
1007 )
1008
1009 (define_expand "vlshr<mode>3"
1010  [(match_operand:VDQ_BHSI 0 "register_operand" "")
1011   (match_operand:VDQ_BHSI 1 "register_operand" "")
1012   (match_operand:VDQ_BHSI 2 "register_operand" "")]
1013  "TARGET_SIMD"
1014 {
1015   rtx neg = gen_reg_rtx (<MODE>mode);
1016   emit (gen_neg<mode>2 (neg, operands[2]));
1017   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1018                                                       neg));
1019   DONE;
1020 })
1021
1022 (define_expand "aarch64_lshr_simddi"
1023   [(match_operand:DI 0 "register_operand" "=w")
1024    (match_operand:DI 1 "register_operand" "w")
1025    (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1026   "TARGET_SIMD"
1027   {
1028     if (INTVAL (operands[2]) == 64)
1029       emit_move_insn (operands[0], const0_rtx);
1030     else
1031       emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1032     DONE;
1033   }
1034 )
1035
1036 (define_expand "vec_set<mode>"
1037   [(match_operand:VDQ_BHSI 0 "register_operand")
1038    (match_operand:<VEL> 1 "register_operand")
1039    (match_operand:SI 2 "immediate_operand")]
1040   "TARGET_SIMD"
1041   {
1042     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1043     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1044                                             GEN_INT (elem), operands[0]));
1045     DONE;
1046   }
1047 )
1048
1049 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1050 (define_insn "vec_shr_<mode>"
1051   [(set (match_operand:VD 0 "register_operand" "=w")
1052         (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1053                     (match_operand:SI 2 "immediate_operand" "i")]
1054                    UNSPEC_VEC_SHR))]
1055   "TARGET_SIMD"
1056   {
1057     if (BYTES_BIG_ENDIAN)
1058       return "shl %d0, %d1, %2";
1059     else
1060       return "ushr %d0, %d1, %2";
1061   }
1062   [(set_attr "type" "neon_shift_imm")]
1063 )
1064
1065 (define_insn "aarch64_simd_vec_setv2di"
1066   [(set (match_operand:V2DI 0 "register_operand" "=w,w")
1067         (vec_merge:V2DI
1068             (vec_duplicate:V2DI
1069                 (match_operand:DI 1 "register_operand" "r,w"))
1070             (match_operand:V2DI 3 "register_operand" "0,0")
1071             (match_operand:SI 2 "immediate_operand" "i,i")))]
1072   "TARGET_SIMD"
1073   {
1074     int elt = ENDIAN_LANE_N (2, exact_log2 (INTVAL (operands[2])));
1075     operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1076     switch (which_alternative)
1077       {
1078       case 0:
1079         return "ins\\t%0.d[%p2], %1";
1080       case 1:
1081         return "ins\\t%0.d[%p2], %1.d[0]";
1082       default:
1083         gcc_unreachable ();
1084       }
1085   }
1086   [(set_attr "type" "neon_from_gp, neon_ins_q")]
1087 )
1088
1089 (define_expand "vec_setv2di"
1090   [(match_operand:V2DI 0 "register_operand")
1091    (match_operand:DI 1 "register_operand")
1092    (match_operand:SI 2 "immediate_operand")]
1093   "TARGET_SIMD"
1094   {
1095     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1096     emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
1097                                           GEN_INT (elem), operands[0]));
1098     DONE;
1099   }
1100 )
1101
1102 (define_insn "aarch64_simd_vec_set<mode>"
1103   [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
1104         (vec_merge:VDQF_F16
1105             (vec_duplicate:VDQF_F16
1106                 (match_operand:<VEL> 1 "register_operand" "w"))
1107             (match_operand:VDQF_F16 3 "register_operand" "0")
1108             (match_operand:SI 2 "immediate_operand" "i")))]
1109   "TARGET_SIMD"
1110   {
1111     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1112
1113     operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
1114     return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
1115   }
1116   [(set_attr "type" "neon_ins<q>")]
1117 )
1118
1119 (define_expand "vec_set<mode>"
1120   [(match_operand:VDQF_F16 0 "register_operand" "+w")
1121    (match_operand:<VEL> 1 "register_operand" "w")
1122    (match_operand:SI 2 "immediate_operand" "")]
1123   "TARGET_SIMD"
1124   {
1125     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1126     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1127                                           GEN_INT (elem), operands[0]));
1128     DONE;
1129   }
1130 )
1131
1132
1133 (define_insn "aarch64_mla<mode>"
1134  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1135        (plus:VDQ_BHSI (mult:VDQ_BHSI
1136                         (match_operand:VDQ_BHSI 2 "register_operand" "w")
1137                         (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1138                       (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1139  "TARGET_SIMD"
1140  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1141   [(set_attr "type" "neon_mla_<Vetype><q>")]
1142 )
1143
1144 (define_insn "*aarch64_mla_elt<mode>"
1145  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1146        (plus:VDQHS
1147          (mult:VDQHS
1148            (vec_duplicate:VDQHS
1149               (vec_select:<VEL>
1150                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1151                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1152            (match_operand:VDQHS 3 "register_operand" "w"))
1153          (match_operand:VDQHS 4 "register_operand" "0")))]
1154  "TARGET_SIMD"
1155   {
1156     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1157     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1158   }
1159   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1160 )
1161
1162 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1163  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1164        (plus:VDQHS
1165          (mult:VDQHS
1166            (vec_duplicate:VDQHS
1167               (vec_select:<VEL>
1168                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1169                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1170            (match_operand:VDQHS 3 "register_operand" "w"))
1171          (match_operand:VDQHS 4 "register_operand" "0")))]
1172  "TARGET_SIMD"
1173   {
1174     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1175     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1176   }
1177   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1178 )
1179
1180 (define_insn "*aarch64_mla_elt_merge<mode>"
1181   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1182         (plus:VDQHS
1183           (mult:VDQHS (vec_duplicate:VDQHS
1184                   (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1185                 (match_operand:VDQHS 2 "register_operand" "w"))
1186           (match_operand:VDQHS 3 "register_operand" "0")))]
1187  "TARGET_SIMD"
1188  "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1189   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1190 )
1191
1192 (define_insn "aarch64_mls<mode>"
1193  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1194        (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1195                    (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1196                               (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1197  "TARGET_SIMD"
1198  "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1199   [(set_attr "type" "neon_mla_<Vetype><q>")]
1200 )
1201
1202 (define_insn "*aarch64_mls_elt<mode>"
1203  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1204        (minus:VDQHS
1205          (match_operand:VDQHS 4 "register_operand" "0")
1206          (mult:VDQHS
1207            (vec_duplicate:VDQHS
1208               (vec_select:<VEL>
1209                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1210                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1211            (match_operand:VDQHS 3 "register_operand" "w"))))]
1212  "TARGET_SIMD"
1213   {
1214     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1215     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1216   }
1217   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1218 )
1219
1220 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1221  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1222        (minus:VDQHS
1223          (match_operand:VDQHS 4 "register_operand" "0")
1224          (mult:VDQHS
1225            (vec_duplicate:VDQHS
1226               (vec_select:<VEL>
1227                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1228                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1229            (match_operand:VDQHS 3 "register_operand" "w"))))]
1230  "TARGET_SIMD"
1231   {
1232     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1233     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1234   }
1235   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1236 )
1237
1238 (define_insn "*aarch64_mls_elt_merge<mode>"
1239   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1240         (minus:VDQHS
1241           (match_operand:VDQHS 1 "register_operand" "0")
1242           (mult:VDQHS (vec_duplicate:VDQHS
1243                   (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1244                 (match_operand:VDQHS 3 "register_operand" "w"))))]
1245   "TARGET_SIMD"
1246   "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1247   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1248 )
1249
1250 ;; Max/Min operations.
1251 (define_insn "<su><maxmin><mode>3"
1252  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1253        (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1254                     (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1255  "TARGET_SIMD"
1256  "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1257   [(set_attr "type" "neon_minmax<q>")]
1258 )
1259
1260 (define_expand "<su><maxmin>v2di3"
1261  [(set (match_operand:V2DI 0 "register_operand" "")
1262        (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1263                     (match_operand:V2DI 2 "register_operand" "")))]
1264  "TARGET_SIMD"
1265 {
1266   enum rtx_code cmp_operator;
1267   rtx cmp_fmt;
1268
1269   switch (<CODE>)
1270     {
1271     case UMIN:
1272       cmp_operator = LTU;
1273       break;
1274     case SMIN:
1275       cmp_operator = LT;
1276       break;
1277     case UMAX:
1278       cmp_operator = GTU;
1279       break;
1280     case SMAX:
1281       cmp_operator = GT;
1282       break;
1283     default:
1284       gcc_unreachable ();
1285     }
1286
1287   cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1288   emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1289               operands[2], cmp_fmt, operands[1], operands[2]));
1290   DONE;
1291 })
1292
1293 ;; Pairwise Integer Max/Min operations.
1294 (define_insn "aarch64_<maxmin_uns>p<mode>"
1295  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1296        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1297                          (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1298                         MAXMINV))]
1299  "TARGET_SIMD"
1300  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1301   [(set_attr "type" "neon_minmax<q>")]
1302 )
1303
1304 ;; Pairwise FP Max/Min operations.
1305 (define_insn "aarch64_<maxmin_uns>p<mode>"
1306  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1307        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1308                       (match_operand:VHSDF 2 "register_operand" "w")]
1309                       FMAXMINV))]
1310  "TARGET_SIMD"
1311  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1312   [(set_attr "type" "neon_minmax<q>")]
1313 )
1314
1315 ;; vec_concat gives a new vector with the low elements from operand 1, and
1316 ;; the high elements from operand 2.  That is to say, given op1 = { a, b }
1317 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1318 ;; What that means, is that the RTL descriptions of the below patterns
1319 ;; need to change depending on endianness.
1320
1321 ;; Move to the low architectural bits of the register.
1322 ;; On little-endian this is { operand, zeroes }
1323 ;; On big-endian this is { zeroes, operand }
1324
1325 (define_insn "move_lo_quad_internal_<mode>"
1326   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1327         (vec_concat:VQ_NO2E
1328           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1329           (vec_duplicate:<VHALF> (const_int 0))))]
1330   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1331   "@
1332    dup\\t%d0, %1.d[0]
1333    fmov\\t%d0, %1
1334    dup\\t%d0, %1"
1335   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1336    (set_attr "simd" "yes,*,yes")
1337    (set_attr "fp" "*,yes,*")
1338    (set_attr "length" "4")]
1339 )
1340
1341 (define_insn "move_lo_quad_internal_<mode>"
1342   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1343         (vec_concat:VQ_2E
1344           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1345           (const_int 0)))]
1346   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1347   "@
1348    dup\\t%d0, %1.d[0]
1349    fmov\\t%d0, %1
1350    dup\\t%d0, %1"
1351   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1352    (set_attr "simd" "yes,*,yes")
1353    (set_attr "fp" "*,yes,*")
1354    (set_attr "length" "4")]
1355 )
1356
1357 (define_insn "move_lo_quad_internal_be_<mode>"
1358   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1359         (vec_concat:VQ_NO2E
1360           (vec_duplicate:<VHALF> (const_int 0))
1361           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1362   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1363   "@
1364    dup\\t%d0, %1.d[0]
1365    fmov\\t%d0, %1
1366    dup\\t%d0, %1"
1367   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1368    (set_attr "simd" "yes,*,yes")
1369    (set_attr "fp" "*,yes,*")
1370    (set_attr "length" "4")]
1371 )
1372
1373 (define_insn "move_lo_quad_internal_be_<mode>"
1374   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1375         (vec_concat:VQ_2E
1376           (const_int 0)
1377           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1378   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1379   "@
1380    dup\\t%d0, %1.d[0]
1381    fmov\\t%d0, %1
1382    dup\\t%d0, %1"
1383   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1384    (set_attr "simd" "yes,*,yes")
1385    (set_attr "fp" "*,yes,*")
1386    (set_attr "length" "4")]
1387 )
1388
1389 (define_expand "move_lo_quad_<mode>"
1390   [(match_operand:VQ 0 "register_operand")
1391    (match_operand:VQ 1 "register_operand")]
1392   "TARGET_SIMD"
1393 {
1394   if (BYTES_BIG_ENDIAN)
1395     emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1396   else
1397     emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1398   DONE;
1399 }
1400 )
1401
1402 ;; Move operand1 to the high architectural bits of the register, keeping
1403 ;; the low architectural bits of operand2.
1404 ;; For little-endian this is { operand2, operand1 }
1405 ;; For big-endian this is { operand1, operand2 }
1406
1407 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1408   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1409         (vec_concat:VQ
1410           (vec_select:<VHALF>
1411                 (match_dup 0)
1412                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1413           (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1414   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1415   "@
1416    ins\\t%0.d[1], %1.d[0]
1417    ins\\t%0.d[1], %1"
1418   [(set_attr "type" "neon_ins")]
1419 )
1420
1421 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1422   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1423         (vec_concat:VQ
1424           (match_operand:<VHALF> 1 "register_operand" "w,r")
1425           (vec_select:<VHALF>
1426                 (match_dup 0)
1427                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1428   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1429   "@
1430    ins\\t%0.d[1], %1.d[0]
1431    ins\\t%0.d[1], %1"
1432   [(set_attr "type" "neon_ins")]
1433 )
1434
1435 (define_expand "move_hi_quad_<mode>"
1436  [(match_operand:VQ 0 "register_operand" "")
1437   (match_operand:<VHALF> 1 "register_operand" "")]
1438  "TARGET_SIMD"
1439 {
1440   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1441   if (BYTES_BIG_ENDIAN)
1442     emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1443                     operands[1], p));
1444   else
1445     emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1446                     operands[1], p));
1447   DONE;
1448 })
1449
1450 ;; Narrowing operations.
1451
1452 ;; For doubles.
1453 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1454  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1455        (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1456  "TARGET_SIMD"
1457  "xtn\\t%0.<Vntype>, %1.<Vtype>"
1458   [(set_attr "type" "neon_shift_imm_narrow_q")]
1459 )
1460
1461 (define_expand "vec_pack_trunc_<mode>"
1462  [(match_operand:<VNARROWD> 0 "register_operand" "")
1463   (match_operand:VDN 1 "register_operand" "")
1464   (match_operand:VDN 2 "register_operand" "")]
1465  "TARGET_SIMD"
1466 {
1467   rtx tempreg = gen_reg_rtx (<VDBL>mode);
1468   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1469   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1470
1471   emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1472   emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1473   emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1474   DONE;
1475 })
1476
1477 ;; For quads.
1478
1479 (define_insn "vec_pack_trunc_<mode>"
1480  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1481        (vec_concat:<VNARROWQ2>
1482          (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1483          (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1484  "TARGET_SIMD"
1485  {
1486    if (BYTES_BIG_ENDIAN)
1487      return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1488    else
1489      return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1490  }
1491   [(set_attr "type" "multiple")
1492    (set_attr "length" "8")]
1493 )
1494
1495 ;; Widening operations.
1496
1497 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1498   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1499         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1500                                (match_operand:VQW 1 "register_operand" "w")
1501                                (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1502                             )))]
1503   "TARGET_SIMD"
1504   "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1505   [(set_attr "type" "neon_shift_imm_long")]
1506 )
1507
1508 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1509   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1510         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1511                                (match_operand:VQW 1 "register_operand" "w")
1512                                (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1513                             )))]
1514   "TARGET_SIMD"
1515   "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1516   [(set_attr "type" "neon_shift_imm_long")]
1517 )
1518
1519 (define_expand "vec_unpack<su>_hi_<mode>"
1520   [(match_operand:<VWIDE> 0 "register_operand" "")
1521    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1522   "TARGET_SIMD"
1523   {
1524     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1525     emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1526                                                           operands[1], p));
1527     DONE;
1528   }
1529 )
1530
1531 (define_expand "vec_unpack<su>_lo_<mode>"
1532   [(match_operand:<VWIDE> 0 "register_operand" "")
1533    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1534   "TARGET_SIMD"
1535   {
1536     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1537     emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1538                                                           operands[1], p));
1539     DONE;
1540   }
1541 )
1542
1543 ;; Widening arithmetic.
1544
1545 (define_insn "*aarch64_<su>mlal_lo<mode>"
1546   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1547         (plus:<VWIDE>
1548           (mult:<VWIDE>
1549               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1550                  (match_operand:VQW 2 "register_operand" "w")
1551                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1552               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1553                  (match_operand:VQW 4 "register_operand" "w")
1554                  (match_dup 3))))
1555           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1556   "TARGET_SIMD"
1557   "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1558   [(set_attr "type" "neon_mla_<Vetype>_long")]
1559 )
1560
1561 (define_insn "*aarch64_<su>mlal_hi<mode>"
1562   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1563         (plus:<VWIDE>
1564           (mult:<VWIDE>
1565               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1566                  (match_operand:VQW 2 "register_operand" "w")
1567                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1568               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1569                  (match_operand:VQW 4 "register_operand" "w")
1570                  (match_dup 3))))
1571           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1572   "TARGET_SIMD"
1573   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1574   [(set_attr "type" "neon_mla_<Vetype>_long")]
1575 )
1576
1577 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1578   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1579         (minus:<VWIDE>
1580           (match_operand:<VWIDE> 1 "register_operand" "0")
1581           (mult:<VWIDE>
1582               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1583                  (match_operand:VQW 2 "register_operand" "w")
1584                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1585               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1586                  (match_operand:VQW 4 "register_operand" "w")
1587                  (match_dup 3))))))]
1588   "TARGET_SIMD"
1589   "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1590   [(set_attr "type" "neon_mla_<Vetype>_long")]
1591 )
1592
1593 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1594   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1595         (minus:<VWIDE>
1596           (match_operand:<VWIDE> 1 "register_operand" "0")
1597           (mult:<VWIDE>
1598               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1599                  (match_operand:VQW 2 "register_operand" "w")
1600                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1601               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1602                  (match_operand:VQW 4 "register_operand" "w")
1603                  (match_dup 3))))))]
1604   "TARGET_SIMD"
1605   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1606   [(set_attr "type" "neon_mla_<Vetype>_long")]
1607 )
1608
1609 (define_insn "*aarch64_<su>mlal<mode>"
1610   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1611         (plus:<VWIDE>
1612           (mult:<VWIDE>
1613             (ANY_EXTEND:<VWIDE>
1614               (match_operand:VD_BHSI 1 "register_operand" "w"))
1615             (ANY_EXTEND:<VWIDE>
1616               (match_operand:VD_BHSI 2 "register_operand" "w")))
1617           (match_operand:<VWIDE> 3 "register_operand" "0")))]
1618   "TARGET_SIMD"
1619   "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1620   [(set_attr "type" "neon_mla_<Vetype>_long")]
1621 )
1622
1623 (define_insn "*aarch64_<su>mlsl<mode>"
1624   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1625         (minus:<VWIDE>
1626           (match_operand:<VWIDE> 1 "register_operand" "0")
1627           (mult:<VWIDE>
1628             (ANY_EXTEND:<VWIDE>
1629               (match_operand:VD_BHSI 2 "register_operand" "w"))
1630             (ANY_EXTEND:<VWIDE>
1631               (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1632   "TARGET_SIMD"
1633   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1634   [(set_attr "type" "neon_mla_<Vetype>_long")]
1635 )
1636
1637 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1638  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1639        (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1640                            (match_operand:VQW 1 "register_operand" "w")
1641                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1642                      (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1643                            (match_operand:VQW 2 "register_operand" "w")
1644                            (match_dup 3)))))]
1645   "TARGET_SIMD"
1646   "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1647   [(set_attr "type" "neon_mul_<Vetype>_long")]
1648 )
1649
1650 (define_expand "vec_widen_<su>mult_lo_<mode>"
1651   [(match_operand:<VWIDE> 0 "register_operand" "")
1652    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1653    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1654  "TARGET_SIMD"
1655  {
1656    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1657    emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1658                                                        operands[1],
1659                                                        operands[2], p));
1660    DONE;
1661  }
1662 )
1663
1664 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1665  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1666       (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1667                             (match_operand:VQW 1 "register_operand" "w")
1668                             (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1669                     (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1670                             (match_operand:VQW 2 "register_operand" "w")
1671                             (match_dup 3)))))]
1672   "TARGET_SIMD"
1673   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1674   [(set_attr "type" "neon_mul_<Vetype>_long")]
1675 )
1676
1677 (define_expand "vec_widen_<su>mult_hi_<mode>"
1678   [(match_operand:<VWIDE> 0 "register_operand" "")
1679    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1680    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1681  "TARGET_SIMD"
1682  {
1683    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1684    emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1685                                                        operands[1],
1686                                                        operands[2], p));
1687    DONE;
1688
1689  }
1690 )
1691
1692 ;; FP vector operations.
1693 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1694 ;; double-precision (64-bit) floating-point data types and arithmetic as
1695 ;; defined by the IEEE 754-2008 standard.  This makes them vectorizable
1696 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1697 ;;
1698 ;; Floating-point operations can raise an exception.  Vectorizing such
1699 ;; operations are safe because of reasons explained below.
1700 ;;
1701 ;; ARMv8 permits an extension to enable trapped floating-point
1702 ;; exception handling, however this is an optional feature.  In the
1703 ;; event of a floating-point exception being raised by vectorised
1704 ;; code then:
1705 ;; 1.  If trapped floating-point exceptions are available, then a trap
1706 ;;     will be taken when any lane raises an enabled exception.  A trap
1707 ;;     handler may determine which lane raised the exception.
1708 ;; 2.  Alternatively a sticky exception flag is set in the
1709 ;;     floating-point status register (FPSR).  Software may explicitly
1710 ;;     test the exception flags, in which case the tests will either
1711 ;;     prevent vectorisation, allowing precise identification of the
1712 ;;     failing operation, or if tested outside of vectorisable regions
1713 ;;     then the specific operation and lane are not of interest.
1714
1715 ;; FP arithmetic operations.
1716
1717 (define_insn "add<mode>3"
1718  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1719        (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1720                    (match_operand:VHSDF 2 "register_operand" "w")))]
1721  "TARGET_SIMD"
1722  "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1723   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1724 )
1725
1726 (define_insn "sub<mode>3"
1727  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1728        (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1729                     (match_operand:VHSDF 2 "register_operand" "w")))]
1730  "TARGET_SIMD"
1731  "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1732   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1733 )
1734
1735 (define_insn "mul<mode>3"
1736  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1737        (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1738                    (match_operand:VHSDF 2 "register_operand" "w")))]
1739  "TARGET_SIMD"
1740  "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1741   [(set_attr "type" "neon_fp_mul_<stype><q>")]
1742 )
1743
1744 (define_expand "div<mode>3"
1745  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1746        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1747                   (match_operand:VHSDF 2 "register_operand" "w")))]
1748  "TARGET_SIMD"
1749 {
1750   if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1751     DONE;
1752
1753   operands[1] = force_reg (<MODE>mode, operands[1]);
1754 })
1755
1756 (define_insn "*div<mode>3"
1757  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1758        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1759                  (match_operand:VHSDF 2 "register_operand" "w")))]
1760  "TARGET_SIMD"
1761  "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1762   [(set_attr "type" "neon_fp_div_<stype><q>")]
1763 )
1764
1765 (define_insn "neg<mode>2"
1766  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1767        (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1768  "TARGET_SIMD"
1769  "fneg\\t%0.<Vtype>, %1.<Vtype>"
1770   [(set_attr "type" "neon_fp_neg_<stype><q>")]
1771 )
1772
1773 (define_insn "abs<mode>2"
1774  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1775        (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1776  "TARGET_SIMD"
1777  "fabs\\t%0.<Vtype>, %1.<Vtype>"
1778   [(set_attr "type" "neon_fp_abs_<stype><q>")]
1779 )
1780
1781 (define_insn "fma<mode>4"
1782   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1783        (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1784                   (match_operand:VHSDF 2 "register_operand" "w")
1785                   (match_operand:VHSDF 3 "register_operand" "0")))]
1786   "TARGET_SIMD"
1787  "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1788   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1789 )
1790
1791 (define_insn "*aarch64_fma4_elt<mode>"
1792   [(set (match_operand:VDQF 0 "register_operand" "=w")
1793     (fma:VDQF
1794       (vec_duplicate:VDQF
1795         (vec_select:<VEL>
1796           (match_operand:VDQF 1 "register_operand" "<h_con>")
1797           (parallel [(match_operand:SI 2 "immediate_operand")])))
1798       (match_operand:VDQF 3 "register_operand" "w")
1799       (match_operand:VDQF 4 "register_operand" "0")))]
1800   "TARGET_SIMD"
1801   {
1802     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1803     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1804   }
1805   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1806 )
1807
1808 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1809   [(set (match_operand:VDQSF 0 "register_operand" "=w")
1810     (fma:VDQSF
1811       (vec_duplicate:VDQSF
1812         (vec_select:<VEL>
1813           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1814           (parallel [(match_operand:SI 2 "immediate_operand")])))
1815       (match_operand:VDQSF 3 "register_operand" "w")
1816       (match_operand:VDQSF 4 "register_operand" "0")))]
1817   "TARGET_SIMD"
1818   {
1819     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1820     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1821   }
1822   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1823 )
1824
1825 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1826   [(set (match_operand:VMUL 0 "register_operand" "=w")
1827     (fma:VMUL
1828       (vec_duplicate:VMUL
1829           (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1830       (match_operand:VMUL 2 "register_operand" "w")
1831       (match_operand:VMUL 3 "register_operand" "0")))]
1832   "TARGET_SIMD"
1833   "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1834   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1835 )
1836
1837 (define_insn "*aarch64_fma4_elt_to_64v2df"
1838   [(set (match_operand:DF 0 "register_operand" "=w")
1839     (fma:DF
1840         (vec_select:DF
1841           (match_operand:V2DF 1 "register_operand" "w")
1842           (parallel [(match_operand:SI 2 "immediate_operand")]))
1843       (match_operand:DF 3 "register_operand" "w")
1844       (match_operand:DF 4 "register_operand" "0")))]
1845   "TARGET_SIMD"
1846   {
1847     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1848     return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1849   }
1850   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1851 )
1852
1853 (define_insn "fnma<mode>4"
1854   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1855         (fma:VHSDF
1856           (match_operand:VHSDF 1 "register_operand" "w")
1857           (neg:VHSDF
1858             (match_operand:VHSDF 2 "register_operand" "w"))
1859           (match_operand:VHSDF 3 "register_operand" "0")))]
1860   "TARGET_SIMD"
1861   "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1862   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1863 )
1864
1865 (define_insn "*aarch64_fnma4_elt<mode>"
1866   [(set (match_operand:VDQF 0 "register_operand" "=w")
1867     (fma:VDQF
1868       (neg:VDQF
1869         (match_operand:VDQF 3 "register_operand" "w"))
1870       (vec_duplicate:VDQF
1871         (vec_select:<VEL>
1872           (match_operand:VDQF 1 "register_operand" "<h_con>")
1873           (parallel [(match_operand:SI 2 "immediate_operand")])))
1874       (match_operand:VDQF 4 "register_operand" "0")))]
1875   "TARGET_SIMD"
1876   {
1877     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1878     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1879   }
1880   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1881 )
1882
1883 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1884   [(set (match_operand:VDQSF 0 "register_operand" "=w")
1885     (fma:VDQSF
1886       (neg:VDQSF
1887         (match_operand:VDQSF 3 "register_operand" "w"))
1888       (vec_duplicate:VDQSF
1889         (vec_select:<VEL>
1890           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1891           (parallel [(match_operand:SI 2 "immediate_operand")])))
1892       (match_operand:VDQSF 4 "register_operand" "0")))]
1893   "TARGET_SIMD"
1894   {
1895     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1896     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1897   }
1898   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1899 )
1900
1901 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1902   [(set (match_operand:VMUL 0 "register_operand" "=w")
1903     (fma:VMUL
1904       (neg:VMUL
1905         (match_operand:VMUL 2 "register_operand" "w"))
1906       (vec_duplicate:VMUL
1907         (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1908       (match_operand:VMUL 3 "register_operand" "0")))]
1909   "TARGET_SIMD"
1910   "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1911   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1912 )
1913
1914 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1915   [(set (match_operand:DF 0 "register_operand" "=w")
1916     (fma:DF
1917       (vec_select:DF
1918         (match_operand:V2DF 1 "register_operand" "w")
1919         (parallel [(match_operand:SI 2 "immediate_operand")]))
1920       (neg:DF
1921         (match_operand:DF 3 "register_operand" "w"))
1922       (match_operand:DF 4 "register_operand" "0")))]
1923   "TARGET_SIMD"
1924   {
1925     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1926     return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1927   }
1928   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1929 )
1930
1931 ;; Vector versions of the floating-point frint patterns.
1932 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1933 (define_insn "<frint_pattern><mode>2"
1934   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1935         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1936                        FRINT))]
1937   "TARGET_SIMD"
1938   "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1939   [(set_attr "type" "neon_fp_round_<stype><q>")]
1940 )
1941
1942 ;; Vector versions of the fcvt standard patterns.
1943 ;; Expands to lbtrunc, lround, lceil, lfloor
1944 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1945   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1946         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1947                                [(match_operand:VHSDF 1 "register_operand" "w")]
1948                                FCVT)))]
1949   "TARGET_SIMD"
1950   "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1951   [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1952 )
1953
1954 ;; HF Scalar variants of related SIMD instructions.
1955 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1956   [(set (match_operand:HI 0 "register_operand" "=w")
1957         (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1958                       FCVT)))]
1959   "TARGET_SIMD_F16INST"
1960   "fcvt<frint_suffix><su>\t%h0, %h1"
1961   [(set_attr "type" "neon_fp_to_int_s")]
1962 )
1963
1964 (define_insn "<optab>_trunchfhi2"
1965   [(set (match_operand:HI 0 "register_operand" "=w")
1966         (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1967   "TARGET_SIMD_F16INST"
1968   "fcvtz<su>\t%h0, %h1"
1969   [(set_attr "type" "neon_fp_to_int_s")]
1970 )
1971
1972 (define_insn "<optab>hihf2"
1973   [(set (match_operand:HF 0 "register_operand" "=w")
1974         (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1975   "TARGET_SIMD_F16INST"
1976   "<su_optab>cvtf\t%h0, %h1"
1977   [(set_attr "type" "neon_int_to_fp_s")]
1978 )
1979
1980 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1981   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1982         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1983                                [(mult:VDQF
1984          (match_operand:VDQF 1 "register_operand" "w")
1985          (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1986                                UNSPEC_FRINTZ)))]
1987   "TARGET_SIMD
1988    && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1989                 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1990   {
1991     int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1992     char buf[64];
1993     snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1994     output_asm_insn (buf, operands);
1995     return "";
1996   }
1997   [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1998 )
1999
2000 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
2001   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2002         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2003                                [(match_operand:VHSDF 1 "register_operand")]
2004                                 UNSPEC_FRINTZ)))]
2005   "TARGET_SIMD"
2006   {})
2007
2008 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2009   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2010         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2011                                [(match_operand:VHSDF 1 "register_operand")]
2012                                 UNSPEC_FRINTZ)))]
2013   "TARGET_SIMD"
2014   {})
2015
2016 (define_expand "ftrunc<VHSDF:mode>2"
2017   [(set (match_operand:VHSDF 0 "register_operand")
2018         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2019                        UNSPEC_FRINTZ))]
2020   "TARGET_SIMD"
2021   {})
2022
2023 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2024   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2025         (FLOATUORS:VHSDF
2026           (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2027   "TARGET_SIMD"
2028   "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2029   [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2030 )
2031
2032 ;; Conversions between vectors of floats and doubles.
2033 ;; Contains a mix of patterns to match standard pattern names
2034 ;; and those for intrinsics.
2035
2036 ;; Float widening operations.
2037
2038 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2039   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2040         (float_extend:<VWIDE> (vec_select:<VHALF>
2041                                (match_operand:VQ_HSF 1 "register_operand" "w")
2042                                (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2043                             )))]
2044   "TARGET_SIMD"
2045   "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2046   [(set_attr "type" "neon_fp_cvt_widen_s")]
2047 )
2048
2049 ;; Convert between fixed-point and floating-point (vector modes)
2050
2051 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2052   [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2053         (unspec:<VHSDF:FCVT_TARGET>
2054           [(match_operand:VHSDF 1 "register_operand" "w")
2055            (match_operand:SI 2 "immediate_operand" "i")]
2056          FCVT_F2FIXED))]
2057   "TARGET_SIMD"
2058   "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2059   [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2060 )
2061
2062 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2063   [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2064         (unspec:<VDQ_HSDI:FCVT_TARGET>
2065           [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2066            (match_operand:SI 2 "immediate_operand" "i")]
2067          FCVT_FIXED2F))]
2068   "TARGET_SIMD"
2069   "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2070   [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2071 )
2072
2073 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2074 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2075 ;; the meaning of HI and LO changes depending on the target endianness.
2076 ;; While elsewhere we map the higher numbered elements of a vector to
2077 ;; the lower architectural lanes of the vector, for these patterns we want
2078 ;; to always treat "hi" as referring to the higher architectural lanes.
2079 ;; Consequently, while the patterns below look inconsistent with our
2080 ;; other big-endian patterns their behavior is as required.
2081
2082 (define_expand "vec_unpacks_lo_<mode>"
2083   [(match_operand:<VWIDE> 0 "register_operand" "")
2084    (match_operand:VQ_HSF 1 "register_operand" "")]
2085   "TARGET_SIMD"
2086   {
2087     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2088     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2089                                                        operands[1], p));
2090     DONE;
2091   }
2092 )
2093
2094 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2095   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2096         (float_extend:<VWIDE> (vec_select:<VHALF>
2097                                (match_operand:VQ_HSF 1 "register_operand" "w")
2098                                (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2099                             )))]
2100   "TARGET_SIMD"
2101   "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2102   [(set_attr "type" "neon_fp_cvt_widen_s")]
2103 )
2104
2105 (define_expand "vec_unpacks_hi_<mode>"
2106   [(match_operand:<VWIDE> 0 "register_operand" "")
2107    (match_operand:VQ_HSF 1 "register_operand" "")]
2108   "TARGET_SIMD"
2109   {
2110     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2111     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2112                                                        operands[1], p));
2113     DONE;
2114   }
2115 )
2116 (define_insn "aarch64_float_extend_lo_<Vwide>"
2117   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2118         (float_extend:<VWIDE>
2119           (match_operand:VDF 1 "register_operand" "w")))]
2120   "TARGET_SIMD"
2121   "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2122   [(set_attr "type" "neon_fp_cvt_widen_s")]
2123 )
2124
2125 ;; Float narrowing operations.
2126
2127 (define_insn "aarch64_float_truncate_lo_<mode>"
2128   [(set (match_operand:VDF 0 "register_operand" "=w")
2129       (float_truncate:VDF
2130         (match_operand:<VWIDE> 1 "register_operand" "w")))]
2131   "TARGET_SIMD"
2132   "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2133   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2134 )
2135
2136 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2137   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2138     (vec_concat:<VDBL>
2139       (match_operand:VDF 1 "register_operand" "0")
2140       (float_truncate:VDF
2141         (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2142   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2143   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2144   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2145 )
2146
2147 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2148   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2149     (vec_concat:<VDBL>
2150       (float_truncate:VDF
2151         (match_operand:<VWIDE> 2 "register_operand" "w"))
2152       (match_operand:VDF 1 "register_operand" "0")))]
2153   "TARGET_SIMD && BYTES_BIG_ENDIAN"
2154   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2155   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2156 )
2157
2158 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2159   [(match_operand:<VDBL> 0 "register_operand" "=w")
2160    (match_operand:VDF 1 "register_operand" "0")
2161    (match_operand:<VWIDE> 2 "register_operand" "w")]
2162   "TARGET_SIMD"
2163 {
2164   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2165                              ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2166                              : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2167   emit_insn (gen (operands[0], operands[1], operands[2]));
2168   DONE;
2169 }
2170 )
2171
2172 (define_expand "vec_pack_trunc_v2df"
2173   [(set (match_operand:V4SF 0 "register_operand")
2174       (vec_concat:V4SF
2175         (float_truncate:V2SF
2176             (match_operand:V2DF 1 "register_operand"))
2177         (float_truncate:V2SF
2178             (match_operand:V2DF 2 "register_operand"))
2179           ))]
2180   "TARGET_SIMD"
2181   {
2182     rtx tmp = gen_reg_rtx (V2SFmode);
2183     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2184     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2185
2186     emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2187     emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2188                                                    tmp, operands[hi]));
2189     DONE;
2190   }
2191 )
2192
2193 (define_expand "vec_pack_trunc_df"
2194   [(set (match_operand:V2SF 0 "register_operand")
2195       (vec_concat:V2SF
2196         (float_truncate:SF
2197             (match_operand:DF 1 "register_operand"))
2198         (float_truncate:SF
2199             (match_operand:DF 2 "register_operand"))
2200           ))]
2201   "TARGET_SIMD"
2202   {
2203     rtx tmp = gen_reg_rtx (V2SFmode);
2204     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2205     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2206
2207     emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2208     emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2209     emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2210     DONE;
2211   }
2212 )
2213
2214 ;; FP Max/Min
2215 ;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
2216 ;; expression like:
2217 ;;      a = (b < c) ? b : c;
2218 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
2219 ;; either explicitly or indirectly via -ffast-math.
2220 ;;
2221 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2222 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2223 ;; operand will be returned when both operands are zero (i.e. they may not
2224 ;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
2225 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2226 ;; NaNs.
2227
2228 (define_insn "<su><maxmin><mode>3"
2229   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2230         (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2231                        (match_operand:VHSDF 2 "register_operand" "w")))]
2232   "TARGET_SIMD"
2233   "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2234   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2235 )
2236
2237 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2238 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2239 ;; which implement the IEEE fmax ()/fmin () functions.
2240 (define_insn "<maxmin_uns><mode>3"
2241   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2242        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2243                       (match_operand:VHSDF 2 "register_operand" "w")]
2244                       FMAXMIN_UNS))]
2245   "TARGET_SIMD"
2246   "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2247   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2248 )
2249
2250 ;; 'across lanes' add.
2251
2252 (define_expand "reduc_plus_scal_<mode>"
2253   [(match_operand:<VEL> 0 "register_operand" "=w")
2254    (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2255                UNSPEC_ADDV)]
2256   "TARGET_SIMD"
2257   {
2258     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2259     rtx scratch = gen_reg_rtx (<MODE>mode);
2260     emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2261     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2262     DONE;
2263   }
2264 )
2265
2266 (define_insn "aarch64_faddp<mode>"
2267  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2268        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2269                       (match_operand:VHSDF 2 "register_operand" "w")]
2270         UNSPEC_FADDV))]
2271  "TARGET_SIMD"
2272  "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2273   [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2274 )
2275
2276 (define_insn "aarch64_reduc_plus_internal<mode>"
2277  [(set (match_operand:VDQV 0 "register_operand" "=w")
2278        (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2279                     UNSPEC_ADDV))]
2280  "TARGET_SIMD"
2281  "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2282   [(set_attr "type" "neon_reduc_add<q>")]
2283 )
2284
2285 (define_insn "aarch64_reduc_plus_internalv2si"
2286  [(set (match_operand:V2SI 0 "register_operand" "=w")
2287        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2288                     UNSPEC_ADDV))]
2289  "TARGET_SIMD"
2290  "addp\\t%0.2s, %1.2s, %1.2s"
2291   [(set_attr "type" "neon_reduc_add")]
2292 )
2293
2294 (define_insn "reduc_plus_scal_<mode>"
2295  [(set (match_operand:<VEL> 0 "register_operand" "=w")
2296        (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2297                    UNSPEC_FADDV))]
2298  "TARGET_SIMD"
2299  "faddp\\t%<Vetype>0, %1.<Vtype>"
2300   [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2301 )
2302
2303 (define_expand "reduc_plus_scal_v4sf"
2304  [(set (match_operand:SF 0 "register_operand")
2305        (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2306                     UNSPEC_FADDV))]
2307  "TARGET_SIMD"
2308 {
2309   rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2310   rtx scratch = gen_reg_rtx (V4SFmode);
2311   emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2312   emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2313   emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2314   DONE;
2315 })
2316
2317 (define_insn "clrsb<mode>2"
2318   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2319         (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2320   "TARGET_SIMD"
2321   "cls\\t%0.<Vtype>, %1.<Vtype>"
2322   [(set_attr "type" "neon_cls<q>")]
2323 )
2324
2325 (define_insn "clz<mode>2"
2326  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2327        (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2328  "TARGET_SIMD"
2329  "clz\\t%0.<Vtype>, %1.<Vtype>"
2330   [(set_attr "type" "neon_cls<q>")]
2331 )
2332
2333 (define_insn "popcount<mode>2"
2334   [(set (match_operand:VB 0 "register_operand" "=w")
2335         (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2336   "TARGET_SIMD"
2337   "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2338   [(set_attr "type" "neon_cnt<q>")]
2339 )
2340
2341 ;; 'across lanes' max and min ops.
2342
2343 ;; Template for outputting a scalar, so we can create __builtins which can be
2344 ;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code.  (This is FP smax/smin).
2345 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2346   [(match_operand:<VEL> 0 "register_operand")
2347    (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2348                   FMAXMINV)]
2349   "TARGET_SIMD"
2350   {
2351     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2352     rtx scratch = gen_reg_rtx (<MODE>mode);
2353     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2354                                                               operands[1]));
2355     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2356     DONE;
2357   }
2358 )
2359
2360 ;; Likewise for integer cases, signed and unsigned.
2361 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2362   [(match_operand:<VEL> 0 "register_operand")
2363    (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2364                     MAXMINV)]
2365   "TARGET_SIMD"
2366   {
2367     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2368     rtx scratch = gen_reg_rtx (<MODE>mode);
2369     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2370                                                               operands[1]));
2371     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2372     DONE;
2373   }
2374 )
2375
2376 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2377  [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2378        (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2379                     MAXMINV))]
2380  "TARGET_SIMD"
2381  "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2382   [(set_attr "type" "neon_reduc_minmax<q>")]
2383 )
2384
2385 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2386  [(set (match_operand:V2SI 0 "register_operand" "=w")
2387        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2388                     MAXMINV))]
2389  "TARGET_SIMD"
2390  "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2391   [(set_attr "type" "neon_reduc_minmax")]
2392 )
2393
2394 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2395  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2396        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2397                       FMAXMINV))]
2398  "TARGET_SIMD"
2399  "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2400   [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2401 )
2402
2403 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2404 ;; allocation.
2405 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2406 ;; to select.
2407 ;;
2408 ;; Thus our BSL is of the form:
2409 ;;   op0 = bsl (mask, op2, op3)
2410 ;; We can use any of:
2411 ;;
2412 ;;   if (op0 = mask)
2413 ;;     bsl mask, op1, op2
2414 ;;   if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2415 ;;     bit op0, op2, mask
2416 ;;   if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2417 ;;     bif op0, op1, mask
2418 ;;
2419 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2420 ;; Some forms of straight-line code may generate the equivalent form
2421 ;; in *aarch64_simd_bsl<mode>_alt.
2422
2423 (define_insn "aarch64_simd_bsl<mode>_internal"
2424   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2425         (xor:VSDQ_I_DI
2426            (and:VSDQ_I_DI
2427              (xor:VSDQ_I_DI
2428                (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2429                (match_operand:VSDQ_I_DI 2 "register_operand" "w,w,0"))
2430              (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2431           (match_dup:<V_INT_EQUIV> 3)
2432         ))]
2433   "TARGET_SIMD"
2434   "@
2435   bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2436   bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2437   bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2438   [(set_attr "type" "neon_bsl<q>")]
2439 )
2440
2441 ;; We need this form in addition to the above pattern to match the case
2442 ;; when combine tries merging three insns such that the second operand of
2443 ;; the outer XOR matches the second operand of the inner XOR rather than
2444 ;; the first.  The two are equivalent but since recog doesn't try all
2445 ;; permutations of commutative operations, we have to have a separate pattern.
2446
2447 (define_insn "*aarch64_simd_bsl<mode>_alt"
2448   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2449         (xor:VSDQ_I_DI
2450            (and:VSDQ_I_DI
2451              (xor:VSDQ_I_DI
2452                (match_operand:VSDQ_I_DI 3 "register_operand" "w,w,0")
2453                (match_operand:VSDQ_I_DI 2 "register_operand" "w,0,w"))
2454               (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2455           (match_dup:VSDQ_I_DI 2)))]
2456   "TARGET_SIMD"
2457   "@
2458   bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2459   bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2460   bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2461   [(set_attr "type" "neon_bsl<q>")]
2462 )
2463
2464 (define_expand "aarch64_simd_bsl<mode>"
2465   [(match_operand:VALLDIF 0 "register_operand")
2466    (match_operand:<V_INT_EQUIV> 1 "register_operand")
2467    (match_operand:VALLDIF 2 "register_operand")
2468    (match_operand:VALLDIF 3 "register_operand")]
2469  "TARGET_SIMD"
2470 {
2471   /* We can't alias operands together if they have different modes.  */
2472   rtx tmp = operands[0];
2473   if (FLOAT_MODE_P (<MODE>mode))
2474     {
2475       operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2476       operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2477       tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2478     }
2479   operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2480   emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2481                                                          operands[1],
2482                                                          operands[2],
2483                                                          operands[3]));
2484   if (tmp != operands[0])
2485     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2486
2487   DONE;
2488 })
2489
2490 (define_expand "vcond_mask_<mode><v_int_equiv>"
2491   [(match_operand:VALLDI 0 "register_operand")
2492    (match_operand:VALLDI 1 "nonmemory_operand")
2493    (match_operand:VALLDI 2 "nonmemory_operand")
2494    (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2495   "TARGET_SIMD"
2496 {
2497   /* If we have (a = (P) ? -1 : 0);
2498      Then we can simply move the generated mask (result must be int).  */
2499   if (operands[1] == CONSTM1_RTX (<MODE>mode)
2500       && operands[2] == CONST0_RTX (<MODE>mode))
2501     emit_move_insn (operands[0], operands[3]);
2502   /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask.  */
2503   else if (operands[1] == CONST0_RTX (<MODE>mode)
2504            && operands[2] == CONSTM1_RTX (<MODE>mode))
2505     emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2506   else
2507     {
2508       if (!REG_P (operands[1]))
2509         operands[1] = force_reg (<MODE>mode, operands[1]);
2510       if (!REG_P (operands[2]))
2511         operands[2] = force_reg (<MODE>mode, operands[2]);
2512       emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2513                                              operands[1], operands[2]));
2514     }
2515
2516   DONE;
2517 })
2518
2519 ;; Patterns comparing two vectors to produce a mask.
2520
2521 (define_expand "vec_cmp<mode><mode>"
2522   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2523           (match_operator 1 "comparison_operator"
2524             [(match_operand:VSDQ_I_DI 2 "register_operand")
2525              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2526   "TARGET_SIMD"
2527 {
2528   rtx mask = operands[0];
2529   enum rtx_code code = GET_CODE (operands[1]);
2530
2531   switch (code)
2532     {
2533     case NE:
2534     case LE:
2535     case LT:
2536     case GE:
2537     case GT:
2538     case EQ:
2539       if (operands[3] == CONST0_RTX (<MODE>mode))
2540         break;
2541
2542       /* Fall through.  */
2543     default:
2544       if (!REG_P (operands[3]))
2545         operands[3] = force_reg (<MODE>mode, operands[3]);
2546
2547       break;
2548     }
2549
2550   switch (code)
2551     {
2552     case LT:
2553       emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2554       break;
2555
2556     case GE:
2557       emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2558       break;
2559
2560     case LE:
2561       emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2562       break;
2563
2564     case GT:
2565       emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2566       break;
2567
2568     case LTU:
2569       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2570       break;
2571
2572     case GEU:
2573       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2574       break;
2575
2576     case LEU:
2577       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2578       break;
2579
2580     case GTU:
2581       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2582       break;
2583
2584     case NE:
2585       /* Handle NE as !EQ.  */
2586       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2587       emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2588       break;
2589
2590     case EQ:
2591       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2592       break;
2593
2594     default:
2595       gcc_unreachable ();
2596     }
2597
2598   DONE;
2599 })
2600
2601 (define_expand "vec_cmp<mode><v_int_equiv>"
2602   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2603         (match_operator 1 "comparison_operator"
2604             [(match_operand:VDQF 2 "register_operand")
2605              (match_operand:VDQF 3 "nonmemory_operand")]))]
2606   "TARGET_SIMD"
2607 {
2608   int use_zero_form = 0;
2609   enum rtx_code code = GET_CODE (operands[1]);
2610   rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2611
2612   rtx (*comparison) (rtx, rtx, rtx) = NULL;
2613
2614   switch (code)
2615     {
2616     case LE:
2617     case LT:
2618     case GE:
2619     case GT:
2620     case EQ:
2621       if (operands[3] == CONST0_RTX (<MODE>mode))
2622         {
2623           use_zero_form = 1;
2624           break;
2625         }
2626       /* Fall through.  */
2627     default:
2628       if (!REG_P (operands[3]))
2629         operands[3] = force_reg (<MODE>mode, operands[3]);
2630
2631       break;
2632     }
2633
2634   switch (code)
2635     {
2636     case LT:
2637       if (use_zero_form)
2638         {
2639           comparison = gen_aarch64_cmlt<mode>;
2640           break;
2641         }
2642       /* Fall through.  */
2643     case UNGE:
2644       std::swap (operands[2], operands[3]);
2645       /* Fall through.  */
2646     case UNLE:
2647     case GT:
2648       comparison = gen_aarch64_cmgt<mode>;
2649       break;
2650     case LE:
2651       if (use_zero_form)
2652         {
2653           comparison = gen_aarch64_cmle<mode>;
2654           break;
2655         }
2656       /* Fall through.  */
2657     case UNGT:
2658       std::swap (operands[2], operands[3]);
2659       /* Fall through.  */
2660     case UNLT:
2661     case GE:
2662       comparison = gen_aarch64_cmge<mode>;
2663       break;
2664     case NE:
2665     case EQ:
2666       comparison = gen_aarch64_cmeq<mode>;
2667       break;
2668     case UNEQ:
2669     case ORDERED:
2670     case UNORDERED:
2671       break;
2672     default:
2673       gcc_unreachable ();
2674     }
2675
2676   switch (code)
2677     {
2678     case UNGE:
2679     case UNGT:
2680     case UNLE:
2681     case UNLT:
2682     case NE:
2683       /* FCM returns false for lanes which are unordered, so if we use
2684          the inverse of the comparison we actually want to emit, then
2685          invert the result, we will end up with the correct result.
2686          Note that a NE NaN and NaN NE b are true for all a, b.
2687
2688          Our transformations are:
2689          a UNGE b -> !(b GT a)
2690          a UNGT b -> !(b GE a)
2691          a UNLE b -> !(a GT b)
2692          a UNLT b -> !(a GE b)
2693          a   NE b -> !(a EQ b)  */
2694       gcc_assert (comparison != NULL);
2695       emit_insn (comparison (operands[0], operands[2], operands[3]));
2696       emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2697       break;
2698
2699     case LT:
2700     case LE:
2701     case GT:
2702     case GE:
2703     case EQ:
2704       /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
2705          As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
2706          a GE b -> a GE b
2707          a GT b -> a GT b
2708          a LE b -> b GE a
2709          a LT b -> b GT a
2710          a EQ b -> a EQ b  */
2711       gcc_assert (comparison != NULL);
2712       emit_insn (comparison (operands[0], operands[2], operands[3]));
2713       break;
2714
2715     case UNEQ:
2716       /* We first check (a > b ||  b > a) which is !UNEQ, inverting
2717          this result will then give us (a == b || a UNORDERED b).  */
2718       emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2719                                          operands[2], operands[3]));
2720       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2721       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2722       emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2723       break;
2724
2725     case UNORDERED:
2726       /* Operands are ORDERED iff (a > b || b >= a), so we can compute
2727          UNORDERED as !ORDERED.  */
2728       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2729       emit_insn (gen_aarch64_cmge<mode> (operands[0],
2730                                          operands[3], operands[2]));
2731       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2732       emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2733       break;
2734
2735     case ORDERED:
2736       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2737       emit_insn (gen_aarch64_cmge<mode> (operands[0],
2738                                          operands[3], operands[2]));
2739       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2740       break;
2741
2742     default:
2743       gcc_unreachable ();
2744     }
2745
2746   DONE;
2747 })
2748
2749 (define_expand "vec_cmpu<mode><mode>"
2750   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2751           (match_operator 1 "comparison_operator"
2752             [(match_operand:VSDQ_I_DI 2 "register_operand")
2753              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2754   "TARGET_SIMD"
2755 {
2756   emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2757                                       operands[2], operands[3]));
2758   DONE;
2759 })
2760
2761 (define_expand "vcond<mode><mode>"
2762   [(set (match_operand:VALLDI 0 "register_operand")
2763         (if_then_else:VALLDI
2764           (match_operator 3 "comparison_operator"
2765             [(match_operand:VALLDI 4 "register_operand")
2766              (match_operand:VALLDI 5 "nonmemory_operand")])
2767           (match_operand:VALLDI 1 "nonmemory_operand")
2768           (match_operand:VALLDI 2 "nonmemory_operand")))]
2769   "TARGET_SIMD"
2770 {
2771   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2772   enum rtx_code code = GET_CODE (operands[3]);
2773
2774   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2775      it as well as switch operands 1/2 in order to avoid the additional
2776      NOT instruction.  */
2777   if (code == NE)
2778     {
2779       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2780                                     operands[4], operands[5]);
2781       std::swap (operands[1], operands[2]);
2782     }
2783   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2784                                              operands[4], operands[5]));
2785   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2786                                                  operands[2], mask));
2787
2788   DONE;
2789 })
2790
2791 (define_expand "vcond<v_cmp_mixed><mode>"
2792   [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2793         (if_then_else:<V_cmp_mixed>
2794           (match_operator 3 "comparison_operator"
2795             [(match_operand:VDQF_COND 4 "register_operand")
2796              (match_operand:VDQF_COND 5 "nonmemory_operand")])
2797           (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2798           (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2799   "TARGET_SIMD"
2800 {
2801   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2802   enum rtx_code code = GET_CODE (operands[3]);
2803
2804   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2805      it as well as switch operands 1/2 in order to avoid the additional
2806      NOT instruction.  */
2807   if (code == NE)
2808     {
2809       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2810                                     operands[4], operands[5]);
2811       std::swap (operands[1], operands[2]);
2812     }
2813   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2814                                              operands[4], operands[5]));
2815   emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
2816                                                 operands[0], operands[1],
2817                                                 operands[2], mask));
2818
2819   DONE;
2820 })
2821
2822 (define_expand "vcondu<mode><mode>"
2823   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2824         (if_then_else:VSDQ_I_DI
2825           (match_operator 3 "comparison_operator"
2826             [(match_operand:VSDQ_I_DI 4 "register_operand")
2827              (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2828           (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2829           (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2830   "TARGET_SIMD"
2831 {
2832   rtx mask = gen_reg_rtx (<MODE>mode);
2833   enum rtx_code code = GET_CODE (operands[3]);
2834
2835   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2836      it as well as switch operands 1/2 in order to avoid the additional
2837      NOT instruction.  */
2838   if (code == NE)
2839     {
2840       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2841                                     operands[4], operands[5]);
2842       std::swap (operands[1], operands[2]);
2843     }
2844   emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2845                                       operands[4], operands[5]));
2846   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2847                                                  operands[2], mask));
2848   DONE;
2849 })
2850
2851 (define_expand "vcondu<mode><v_cmp_mixed>"
2852   [(set (match_operand:VDQF 0 "register_operand")
2853         (if_then_else:VDQF
2854           (match_operator 3 "comparison_operator"
2855             [(match_operand:<V_cmp_mixed> 4 "register_operand")
2856              (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2857           (match_operand:VDQF 1 "nonmemory_operand")
2858           (match_operand:VDQF 2 "nonmemory_operand")))]
2859   "TARGET_SIMD"
2860 {
2861   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2862   enum rtx_code code = GET_CODE (operands[3]);
2863
2864   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2865      it as well as switch operands 1/2 in order to avoid the additional
2866      NOT instruction.  */
2867   if (code == NE)
2868     {
2869       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2870                                     operands[4], operands[5]);
2871       std::swap (operands[1], operands[2]);
2872     }
2873   emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
2874                                                   mask, operands[3],
2875                                                   operands[4], operands[5]));
2876   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2877                                                  operands[2], mask));
2878   DONE;
2879 })
2880
2881 ;; Patterns for AArch64 SIMD Intrinsics.
2882
2883 ;; Lane extraction with sign extension to general purpose register.
2884 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2885   [(set (match_operand:GPI 0 "register_operand" "=r")
2886         (sign_extend:GPI
2887           (vec_select:<VEL>
2888             (match_operand:VDQQH 1 "register_operand" "w")
2889             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2890   "TARGET_SIMD"
2891   {
2892     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2893     return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
2894   }
2895   [(set_attr "type" "neon_to_gp<q>")]
2896 )
2897
2898 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
2899   [(set (match_operand:SI 0 "register_operand" "=r")
2900         (zero_extend:SI
2901           (vec_select:<VEL>
2902             (match_operand:VDQQH 1 "register_operand" "w")
2903             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2904   "TARGET_SIMD"
2905   {
2906     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2907     return "umov\\t%w0, %1.<Vetype>[%2]";
2908   }
2909   [(set_attr "type" "neon_to_gp<q>")]
2910 )
2911
2912 ;; Lane extraction of a value, neither sign nor zero extension
2913 ;; is guaranteed so upper bits should be considered undefined.
2914 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
2915 (define_insn "aarch64_get_lane<mode>"
2916   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2917         (vec_select:<VEL>
2918           (match_operand:VALL_F16 1 "register_operand" "w, w, w")
2919           (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
2920   "TARGET_SIMD"
2921   {
2922     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2923     switch (which_alternative)
2924       {
2925         case 0:
2926           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2927         case 1:
2928           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2929         case 2:
2930           return "st1\\t{%1.<Vetype>}[%2], %0";
2931         default:
2932           gcc_unreachable ();
2933       }
2934   }
2935   [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
2936 )
2937
2938 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2939 ;; dest vector.
2940
2941 (define_insn "*aarch64_combinez<mode>"
2942   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2943         (vec_concat:<VDBL>
2944            (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")
2945            (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")))]
2946   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2947   "@
2948    mov\\t%0.8b, %1.8b
2949    fmov\t%d0, %1
2950    ldr\\t%d0, %1"
2951   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2952    (set_attr "simd" "yes,*,yes")
2953    (set_attr "fp" "*,yes,*")]
2954 )
2955
2956 (define_insn "*aarch64_combinez_be<mode>"
2957   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2958         (vec_concat:<VDBL>
2959            (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")
2960            (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")))]
2961   "TARGET_SIMD && BYTES_BIG_ENDIAN"
2962   "@
2963    mov\\t%0.8b, %1.8b
2964    fmov\t%d0, %1
2965    ldr\\t%d0, %1"
2966   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2967    (set_attr "simd" "yes,*,yes")
2968    (set_attr "fp" "*,yes,*")]
2969 )
2970
2971 (define_expand "aarch64_combine<mode>"
2972   [(match_operand:<VDBL> 0 "register_operand")
2973    (match_operand:VDC 1 "register_operand")
2974    (match_operand:VDC 2 "register_operand")]
2975   "TARGET_SIMD"
2976 {
2977   aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
2978
2979   DONE;
2980 }
2981 )
2982
2983 (define_expand "aarch64_simd_combine<mode>"
2984   [(match_operand:<VDBL> 0 "register_operand")
2985    (match_operand:VDC 1 "register_operand")
2986    (match_operand:VDC 2 "register_operand")]
2987   "TARGET_SIMD"
2988   {
2989     emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
2990     emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
2991     DONE;
2992   }
2993 [(set_attr "type" "multiple")]
2994 )
2995
2996 ;; <su><addsub>l<q>.
2997
2998 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
2999  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3000        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3001                            (match_operand:VQW 1 "register_operand" "w")
3002                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3003                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3004                            (match_operand:VQW 2 "register_operand" "w")
3005                            (match_dup 3)))))]
3006   "TARGET_SIMD"
3007   "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3008   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3009 )
3010
3011 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3012  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3013        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3014                            (match_operand:VQW 1 "register_operand" "w")
3015                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3016                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3017                            (match_operand:VQW 2 "register_operand" "w")
3018                            (match_dup 3)))))]
3019   "TARGET_SIMD"
3020   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3021   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3022 )
3023
3024
3025 (define_expand "aarch64_saddl2<mode>"
3026   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3027    (match_operand:VQW 1 "register_operand" "w")
3028    (match_operand:VQW 2 "register_operand" "w")]
3029   "TARGET_SIMD"
3030 {
3031   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3032   emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3033                                                   operands[2], p));
3034   DONE;
3035 })
3036
3037 (define_expand "aarch64_uaddl2<mode>"
3038   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3039    (match_operand:VQW 1 "register_operand" "w")
3040    (match_operand:VQW 2 "register_operand" "w")]
3041   "TARGET_SIMD"
3042 {
3043   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3044   emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3045                                                   operands[2], p));
3046   DONE;
3047 })
3048
3049 (define_expand "aarch64_ssubl2<mode>"
3050   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3051    (match_operand:VQW 1 "register_operand" "w")
3052    (match_operand:VQW 2 "register_operand" "w")]
3053   "TARGET_SIMD"
3054 {
3055   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3056   emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3057                                                 operands[2], p));
3058   DONE;
3059 })
3060
3061 (define_expand "aarch64_usubl2<mode>"
3062   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3063    (match_operand:VQW 1 "register_operand" "w")
3064    (match_operand:VQW 2 "register_operand" "w")]
3065   "TARGET_SIMD"
3066 {
3067   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3068   emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3069                                                 operands[2], p));
3070   DONE;
3071 })
3072
3073 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3074  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3075        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3076                            (match_operand:VD_BHSI 1 "register_operand" "w"))
3077                        (ANY_EXTEND:<VWIDE>
3078                            (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3079   "TARGET_SIMD"
3080   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3081   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3082 )
3083
3084 ;; <su><addsub>w<q>.
3085
3086 (define_expand "widen_ssum<mode>3"
3087   [(set (match_operand:<VDBLW> 0 "register_operand" "")
3088         (plus:<VDBLW> (sign_extend:<VDBLW>
3089                         (match_operand:VQW 1 "register_operand" ""))
3090                       (match_operand:<VDBLW> 2 "register_operand" "")))]
3091   "TARGET_SIMD"
3092   {
3093     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3094     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3095
3096     emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3097                                                 operands[1], p));
3098     emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3099     DONE;
3100   }
3101 )
3102
3103 (define_expand "widen_ssum<mode>3"
3104   [(set (match_operand:<VWIDE> 0 "register_operand" "")
3105         (plus:<VWIDE> (sign_extend:<VWIDE>
3106                         (match_operand:VD_BHSI 1 "register_operand" ""))
3107                       (match_operand:<VWIDE> 2 "register_operand" "")))]
3108   "TARGET_SIMD"
3109 {
3110   emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3111   DONE;
3112 })
3113
3114 (define_expand "widen_usum<mode>3"
3115   [(set (match_operand:<VDBLW> 0 "register_operand" "")
3116         (plus:<VDBLW> (zero_extend:<VDBLW>
3117                         (match_operand:VQW 1 "register_operand" ""))
3118                       (match_operand:<VDBLW> 2 "register_operand" "")))]
3119   "TARGET_SIMD"
3120   {
3121     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3122     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3123
3124     emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3125                                                  operands[1], p));
3126     emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3127     DONE;
3128   }
3129 )
3130
3131 (define_expand "widen_usum<mode>3"
3132   [(set (match_operand:<VWIDE> 0 "register_operand" "")
3133         (plus:<VWIDE> (zero_extend:<VWIDE>
3134                         (match_operand:VD_BHSI 1 "register_operand" ""))
3135                       (match_operand:<VWIDE> 2 "register_operand" "")))]
3136   "TARGET_SIMD"
3137 {
3138   emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3139   DONE;
3140 })
3141
3142 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
3143   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3144         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3145                         (ANY_EXTEND:<VWIDE>
3146                           (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3147   "TARGET_SIMD"
3148   "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3149   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3150 )
3151
3152 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
3153   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3154         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3155                         (ANY_EXTEND:<VWIDE>
3156                           (vec_select:<VHALF>
3157                            (match_operand:VQW 2 "register_operand" "w")
3158                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3159   "TARGET_SIMD"
3160   "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3161   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3162 )
3163
3164 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
3165   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3166         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3167                         (ANY_EXTEND:<VWIDE>
3168                           (vec_select:<VHALF>
3169                            (match_operand:VQW 2 "register_operand" "w")
3170                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3171   "TARGET_SIMD"
3172   "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3173   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3174 )
3175
3176 (define_expand "aarch64_saddw2<mode>"
3177   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3178    (match_operand:<VWIDE> 1 "register_operand" "w")
3179    (match_operand:VQW 2 "register_operand" "w")]
3180   "TARGET_SIMD"
3181 {
3182   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3183   emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3184                                                 operands[2], p));
3185   DONE;
3186 })
3187
3188 (define_expand "aarch64_uaddw2<mode>"
3189   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3190    (match_operand:<VWIDE> 1 "register_operand" "w")
3191    (match_operand:VQW 2 "register_operand" "w")]
3192   "TARGET_SIMD"
3193 {
3194   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3195   emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3196                                                 operands[2], p));
3197   DONE;
3198 })
3199
3200
3201 (define_expand "aarch64_ssubw2<mode>"
3202   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3203    (match_operand:<VWIDE> 1 "register_operand" "w")
3204    (match_operand:VQW 2 "register_operand" "w")]
3205   "TARGET_SIMD"
3206 {
3207   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3208   emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3209                                                 operands[2], p));
3210   DONE;
3211 })
3212
3213 (define_expand "aarch64_usubw2<mode>"
3214   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3215    (match_operand:<VWIDE> 1 "register_operand" "w")
3216    (match_operand:VQW 2 "register_operand" "w")]
3217   "TARGET_SIMD"
3218 {
3219   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3220   emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3221                                                 operands[2], p));
3222   DONE;
3223 })
3224
3225 ;; <su><r>h<addsub>.
3226
3227 (define_insn "aarch64_<sur>h<addsub><mode>"
3228   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3229         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3230                       (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3231                      HADDSUB))]
3232   "TARGET_SIMD"
3233   "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3234   [(set_attr "type" "neon_<addsub>_halve<q>")]
3235 )
3236
3237 ;; <r><addsub>hn<q>.
3238
3239 (define_insn "aarch64_<sur><addsub>hn<mode>"
3240   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3241         (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3242                             (match_operand:VQN 2 "register_operand" "w")]
3243                            ADDSUBHN))]
3244   "TARGET_SIMD"
3245   "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3246   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3247 )
3248
3249 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3250   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3251         (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3252                              (match_operand:VQN 2 "register_operand" "w")
3253                              (match_operand:VQN 3 "register_operand" "w")]
3254                             ADDSUBHN2))]
3255   "TARGET_SIMD"
3256   "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3257   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3258 )
3259
3260 ;; pmul.
3261
3262 (define_insn "aarch64_pmul<mode>"
3263   [(set (match_operand:VB 0 "register_operand" "=w")
3264         (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3265                     (match_operand:VB 2 "register_operand" "w")]
3266                    UNSPEC_PMUL))]
3267  "TARGET_SIMD"
3268  "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3269   [(set_attr "type" "neon_mul_<Vetype><q>")]
3270 )
3271
3272 ;; fmulx.
3273
3274 (define_insn "aarch64_fmulx<mode>"
3275   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3276         (unspec:VHSDF_HSDF
3277           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3278            (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3279            UNSPEC_FMULX))]
3280  "TARGET_SIMD"
3281  "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3282  [(set_attr "type" "neon_fp_mul_<stype>")]
3283 )
3284
3285 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3286
3287 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3288   [(set (match_operand:VDQSF 0 "register_operand" "=w")
3289         (unspec:VDQSF
3290          [(match_operand:VDQSF 1 "register_operand" "w")
3291           (vec_duplicate:VDQSF
3292            (vec_select:<VEL>
3293             (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3294             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3295          UNSPEC_FMULX))]
3296   "TARGET_SIMD"
3297   {
3298     operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3299     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3300   }
3301   [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3302 )
3303
3304 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3305
3306 (define_insn "*aarch64_mulx_elt<mode>"
3307   [(set (match_operand:VDQF 0 "register_operand" "=w")
3308         (unspec:VDQF
3309          [(match_operand:VDQF 1 "register_operand" "w")
3310           (vec_duplicate:VDQF
3311            (vec_select:<VEL>
3312             (match_operand:VDQF 2 "register_operand" "w")
3313             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3314          UNSPEC_FMULX))]
3315   "TARGET_SIMD"
3316   {
3317     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3318     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3319   }
3320   [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3321 )
3322
3323 ;; vmulxq_lane
3324
3325 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3326   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3327         (unspec:VHSDF
3328          [(match_operand:VHSDF 1 "register_operand" "w")
3329           (vec_duplicate:VHSDF
3330             (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3331          UNSPEC_FMULX))]
3332   "TARGET_SIMD"
3333   "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3334   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3335 )
3336
3337 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3338 ;; vmulxd_lane_f64 ==  vmulx_lane_f64
3339 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3340
3341 (define_insn "*aarch64_vgetfmulx<mode>"
3342   [(set (match_operand:<VEL> 0 "register_operand" "=w")
3343         (unspec:<VEL>
3344          [(match_operand:<VEL> 1 "register_operand" "w")
3345           (vec_select:<VEL>
3346            (match_operand:VDQF 2 "register_operand" "w")
3347             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3348          UNSPEC_FMULX))]
3349   "TARGET_SIMD"
3350   {
3351     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3352     return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3353   }
3354   [(set_attr "type" "fmul<Vetype>")]
3355 )
3356 ;; <su>q<addsub>
3357
3358 (define_insn "aarch64_<su_optab><optab><mode>"
3359   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3360         (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3361                           (match_operand:VSDQ_I 2 "register_operand" "w")))]
3362   "TARGET_SIMD"
3363   "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3364   [(set_attr "type" "neon_<optab><q>")]
3365 )
3366
3367 ;; suqadd and usqadd
3368
3369 (define_insn "aarch64_<sur>qadd<mode>"
3370   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3371         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3372                         (match_operand:VSDQ_I 2 "register_operand" "w")]
3373                        USSUQADD))]
3374   "TARGET_SIMD"
3375   "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3376   [(set_attr "type" "neon_qadd<q>")]
3377 )
3378
3379 ;; sqmovun
3380
3381 (define_insn "aarch64_sqmovun<mode>"
3382   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3383         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3384                             UNSPEC_SQXTUN))]
3385    "TARGET_SIMD"
3386    "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3387    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3388 )
3389
3390 ;; sqmovn and uqmovn
3391
3392 (define_insn "aarch64_<sur>qmovn<mode>"
3393   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3394         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3395                             SUQMOVN))]
3396   "TARGET_SIMD"
3397   "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3398    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3399 )
3400
3401 ;; <su>q<absneg>
3402
3403 (define_insn "aarch64_s<optab><mode>"
3404   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3405         (UNQOPS:VSDQ_I
3406           (match_operand:VSDQ_I 1 "register_operand" "w")))]
3407   "TARGET_SIMD"
3408   "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3409   [(set_attr "type" "neon_<optab><q>")]
3410 )
3411
3412 ;; sq<r>dmulh.
3413
3414 (define_insn "aarch64_sq<r>dmulh<mode>"
3415   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3416         (unspec:VSDQ_HSI
3417           [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3418            (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3419          VQDMULH))]
3420   "TARGET_SIMD"
3421   "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3422   [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3423 )
3424
3425 ;; sq<r>dmulh_lane
3426
3427 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3428   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3429         (unspec:VDQHS
3430           [(match_operand:VDQHS 1 "register_operand" "w")
3431            (vec_select:<VEL>
3432              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3433              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3434          VQDMULH))]
3435   "TARGET_SIMD"
3436   "*
3437    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3438    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3439   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3440 )
3441
3442 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3443   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3444         (unspec:VDQHS
3445           [(match_operand:VDQHS 1 "register_operand" "w")
3446            (vec_select:<VEL>
3447              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3448              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3449          VQDMULH))]
3450   "TARGET_SIMD"
3451   "*
3452    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3453    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3454   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3455 )
3456
3457 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3458   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3459         (unspec:SD_HSI
3460           [(match_operand:SD_HSI 1 "register_operand" "w")
3461            (vec_select:<VEL>
3462              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3463              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3464          VQDMULH))]
3465   "TARGET_SIMD"
3466   "*
3467    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3468    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3469   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3470 )
3471
3472 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3473   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3474         (unspec:SD_HSI
3475           [(match_operand:SD_HSI 1 "register_operand" "w")
3476            (vec_select:<VEL>
3477              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3478              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3479          VQDMULH))]
3480   "TARGET_SIMD"
3481   "*
3482    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3483    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3484   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3485 )
3486
3487 ;; sqrdml[as]h.
3488
3489 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3490   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3491         (unspec:VSDQ_HSI
3492           [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3493            (match_operand:VSDQ_HSI 2 "register_operand" "w")
3494            (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3495           SQRDMLH_AS))]
3496    "TARGET_SIMD_RDMA"
3497    "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3498    [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3499 )
3500
3501 ;; sqrdml[as]h_lane.
3502
3503 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3504   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3505         (unspec:VDQHS
3506           [(match_operand:VDQHS 1 "register_operand" "0")
3507            (match_operand:VDQHS 2 "register_operand" "w")
3508            (vec_select:<VEL>
3509              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3510              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3511           SQRDMLH_AS))]
3512    "TARGET_SIMD_RDMA"
3513    {
3514      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3515      return
3516       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3517    }
3518    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3519 )
3520
3521 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3522   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3523         (unspec:SD_HSI
3524           [(match_operand:SD_HSI 1 "register_operand" "0")
3525            (match_operand:SD_HSI 2 "register_operand" "w")
3526            (vec_select:<VEL>
3527              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3528              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3529           SQRDMLH_AS))]
3530    "TARGET_SIMD_RDMA"
3531    {
3532      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3533      return
3534       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3535    }
3536    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3537 )
3538
3539 ;; sqrdml[as]h_laneq.
3540
3541 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3542   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3543         (unspec:VDQHS
3544           [(match_operand:VDQHS 1 "register_operand" "0")
3545            (match_operand:VDQHS 2 "register_operand" "w")
3546            (vec_select:<VEL>
3547              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3548              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3549           SQRDMLH_AS))]
3550    "TARGET_SIMD_RDMA"
3551    {
3552      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3553      return
3554       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3555    }
3556    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3557 )
3558
3559 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3560   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3561         (unspec:SD_HSI
3562           [(match_operand:SD_HSI 1 "register_operand" "0")
3563            (match_operand:SD_HSI 2 "register_operand" "w")
3564            (vec_select:<VEL>
3565              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3566              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3567           SQRDMLH_AS))]
3568    "TARGET_SIMD_RDMA"
3569    {
3570      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3571      return
3572       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3573    }
3574    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3575 )
3576
3577 ;; vqdml[sa]l
3578
3579 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3580   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3581         (SBINQOPS:<VWIDE>
3582           (match_operand:<VWIDE> 1 "register_operand" "0")
3583           (ss_ashift:<VWIDE>
3584               (mult:<VWIDE>
3585                 (sign_extend:<VWIDE>
3586                       (match_operand:VSD_HSI 2 "register_operand" "w"))
3587                 (sign_extend:<VWIDE>
3588                       (match_operand:VSD_HSI 3 "register_operand" "w")))
3589               (const_int 1))))]
3590   "TARGET_SIMD"
3591   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3592   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3593 )
3594
3595 ;; vqdml[sa]l_lane
3596
3597 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3598   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3599         (SBINQOPS:<VWIDE>
3600           (match_operand:<VWIDE> 1 "register_operand" "0")
3601           (ss_ashift:<VWIDE>
3602             (mult:<VWIDE>
3603               (sign_extend:<VWIDE>
3604                 (match_operand:VD_HSI 2 "register_operand" "w"))
3605               (sign_extend:<VWIDE>
3606                 (vec_duplicate:VD_HSI
3607                   (vec_select:<VEL>
3608                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3609                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3610               ))
3611             (const_int 1))))]
3612   "TARGET_SIMD"
3613   {
3614     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3615     return
3616       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3617   }
3618   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3619 )
3620
3621 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3622   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3623         (SBINQOPS:<VWIDE>
3624           (match_operand:<VWIDE> 1 "register_operand" "0")
3625           (ss_ashift:<VWIDE>
3626             (mult:<VWIDE>
3627               (sign_extend:<VWIDE>
3628                 (match_operand:VD_HSI 2 "register_operand" "w"))
3629               (sign_extend:<VWIDE>
3630                 (vec_duplicate:VD_HSI
3631                   (vec_select:<VEL>
3632                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3633                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3634               ))
3635             (const_int 1))))]
3636   "TARGET_SIMD"
3637   {
3638     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3639     return
3640       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3641   }
3642   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3643 )
3644
3645 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3646   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3647         (SBINQOPS:<VWIDE>
3648           (match_operand:<VWIDE> 1 "register_operand" "0")
3649           (ss_ashift:<VWIDE>
3650             (mult:<VWIDE>
3651               (sign_extend:<VWIDE>
3652                 (match_operand:SD_HSI 2 "register_operand" "w"))
3653               (sign_extend:<VWIDE>
3654                 (vec_select:<VEL>
3655                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3656                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3657               )
3658             (const_int 1))))]
3659   "TARGET_SIMD"
3660   {
3661     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3662     return
3663       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3664   }
3665   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3666 )
3667
3668 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3669   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3670         (SBINQOPS:<VWIDE>
3671           (match_operand:<VWIDE> 1 "register_operand" "0")
3672           (ss_ashift:<VWIDE>
3673             (mult:<VWIDE>
3674               (sign_extend:<VWIDE>
3675                 (match_operand:SD_HSI 2 "register_operand" "w"))
3676               (sign_extend:<VWIDE>
3677                 (vec_select:<VEL>
3678                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3679                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3680               )
3681             (const_int 1))))]
3682   "TARGET_SIMD"
3683   {
3684     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3685     return
3686       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3687   }
3688   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3689 )
3690
3691 ;; vqdml[sa]l_n
3692
3693 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3694   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3695         (SBINQOPS:<VWIDE>
3696           (match_operand:<VWIDE> 1 "register_operand" "0")
3697           (ss_ashift:<VWIDE>
3698               (mult:<VWIDE>
3699                 (sign_extend:<VWIDE>
3700                       (match_operand:VD_HSI 2 "register_operand" "w"))
3701                 (sign_extend:<VWIDE>
3702                   (vec_duplicate:VD_HSI
3703                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3704               (const_int 1))))]
3705   "TARGET_SIMD"
3706   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3707   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3708 )
3709
3710 ;; sqdml[as]l2
3711
3712 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3713   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3714         (SBINQOPS:<VWIDE>
3715          (match_operand:<VWIDE> 1 "register_operand" "0")
3716          (ss_ashift:<VWIDE>
3717              (mult:<VWIDE>
3718                (sign_extend:<VWIDE>
3719                  (vec_select:<VHALF>
3720                      (match_operand:VQ_HSI 2 "register_operand" "w")
3721                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3722                (sign_extend:<VWIDE>
3723                  (vec_select:<VHALF>
3724                      (match_operand:VQ_HSI 3 "register_operand" "w")
3725                      (match_dup 4))))
3726              (const_int 1))))]
3727   "TARGET_SIMD"
3728   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3729   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3730 )
3731
3732 (define_expand "aarch64_sqdmlal2<mode>"
3733   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3734    (match_operand:<VWIDE> 1 "register_operand" "w")
3735    (match_operand:VQ_HSI 2 "register_operand" "w")
3736    (match_operand:VQ_HSI 3 "register_operand" "w")]
3737   "TARGET_SIMD"
3738 {
3739   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3740   emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3741                                                   operands[2], operands[3], p));
3742   DONE;
3743 })
3744
3745 (define_expand "aarch64_sqdmlsl2<mode>"
3746   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3747    (match_operand:<VWIDE> 1 "register_operand" "w")
3748    (match_operand:VQ_HSI 2 "register_operand" "w")
3749    (match_operand:VQ_HSI 3 "register_operand" "w")]
3750   "TARGET_SIMD"
3751 {
3752   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3753   emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3754                                                   operands[2], operands[3], p));
3755   DONE;
3756 })
3757
3758 ;; vqdml[sa]l2_lane
3759
3760 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3761   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3762         (SBINQOPS:<VWIDE>
3763           (match_operand:<VWIDE> 1 "register_operand" "0")
3764           (ss_ashift:<VWIDE>
3765               (mult:<VWIDE>
3766                 (sign_extend:<VWIDE>
3767                   (vec_select:<VHALF>
3768                     (match_operand:VQ_HSI 2 "register_operand" "w")
3769                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3770                 (sign_extend:<VWIDE>
3771                   (vec_duplicate:<VHALF>
3772                     (vec_select:<VEL>
3773                       (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3774                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3775                     ))))
3776               (const_int 1))))]
3777   "TARGET_SIMD"
3778   {
3779     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3780     return
3781      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3782   }
3783   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3784 )
3785
3786 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3787   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3788         (SBINQOPS:<VWIDE>
3789           (match_operand:<VWIDE> 1 "register_operand" "0")
3790           (ss_ashift:<VWIDE>
3791               (mult:<VWIDE>
3792                 (sign_extend:<VWIDE>
3793                   (vec_select:<VHALF>
3794                     (match_operand:VQ_HSI 2 "register_operand" "w")
3795                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3796                 (sign_extend:<VWIDE>
3797                   (vec_duplicate:<VHALF>
3798                     (vec_select:<VEL>
3799                       (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3800                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3801                     ))))
3802               (const_int 1))))]
3803   "TARGET_SIMD"
3804   {
3805     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3806     return
3807      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3808   }
3809   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3810 )
3811
3812 (define_expand "aarch64_sqdmlal2_lane<mode>"
3813   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3814    (match_operand:<VWIDE> 1 "register_operand" "w")
3815    (match_operand:VQ_HSI 2 "register_operand" "w")
3816    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3817    (match_operand:SI 4 "immediate_operand" "i")]
3818   "TARGET_SIMD"
3819 {
3820   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3821   emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3822                                                        operands[2], operands[3],
3823                                                        operands[4], p));
3824   DONE;
3825 })
3826
3827 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3828   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3829    (match_operand:<VWIDE> 1 "register_operand" "w")
3830    (match_operand:VQ_HSI 2 "register_operand" "w")
3831    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3832    (match_operand:SI 4 "immediate_operand" "i")]
3833   "TARGET_SIMD"
3834 {
3835   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3836   emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3837                                                        operands[2], operands[3],
3838                                                        operands[4], p));
3839   DONE;
3840 })
3841
3842 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3843   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3844    (match_operand:<VWIDE> 1 "register_operand" "w")
3845    (match_operand:VQ_HSI 2 "register_operand" "w")
3846    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3847    (match_operand:SI 4 "immediate_operand" "i")]
3848   "TARGET_SIMD"
3849 {
3850   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3851   emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3852                                                        operands[2], operands[3],
3853                                                        operands[4], p));
3854   DONE;
3855 })
3856
3857 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3858   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3859    (match_operand:<VWIDE> 1 "register_operand" "w")
3860    (match_operand:VQ_HSI 2 "register_operand" "w")
3861    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3862    (match_operand:SI 4 "immediate_operand" "i")]
3863   "TARGET_SIMD"
3864 {
3865   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3866   emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
3867                                                        operands[2], operands[3],
3868                                                        operands[4], p));
3869   DONE;
3870 })
3871
3872 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
3873   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3874         (SBINQOPS:<VWIDE>
3875           (match_operand:<VWIDE> 1 "register_operand" "0")
3876           (ss_ashift:<VWIDE>
3877             (mult:<VWIDE>
3878               (sign_extend:<VWIDE>
3879                 (vec_select:<VHALF>
3880                   (match_operand:VQ_HSI 2 "register_operand" "w")
3881                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3882               (sign_extend:<VWIDE>
3883                 (vec_duplicate:<VHALF>
3884                   (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3885             (const_int 1))))]
3886   "TARGET_SIMD"
3887   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3888   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3889 )
3890
3891 (define_expand "aarch64_sqdmlal2_n<mode>"
3892   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3893    (match_operand:<VWIDE> 1 "register_operand" "w")
3894    (match_operand:VQ_HSI 2 "register_operand" "w")
3895    (match_operand:<VEL> 3 "register_operand" "w")]
3896   "TARGET_SIMD"
3897 {
3898   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3899   emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
3900                                                     operands[2], operands[3],
3901                                                     p));
3902   DONE;
3903 })
3904
3905 (define_expand "aarch64_sqdmlsl2_n<mode>"
3906   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3907    (match_operand:<VWIDE> 1 "register_operand" "w")
3908    (match_operand:VQ_HSI 2 "register_operand" "w")
3909    (match_operand:<VEL> 3 "register_operand" "w")]
3910   "TARGET_SIMD"
3911 {
3912   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3913   emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
3914                                                     operands[2], operands[3],
3915                                                     p));
3916   DONE;
3917 })
3918
3919 ;; vqdmull
3920
3921 (define_insn "aarch64_sqdmull<mode>"
3922   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3923         (ss_ashift:<VWIDE>
3924              (mult:<VWIDE>
3925                (sign_extend:<VWIDE>
3926                      (match_operand:VSD_HSI 1 "register_operand" "w"))
3927                (sign_extend:<VWIDE>
3928                      (match_operand:VSD_HSI 2 "register_operand" "w")))
3929              (const_int 1)))]
3930   "TARGET_SIMD"
3931   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3932   [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
3933 )
3934
3935 ;; vqdmull_lane
3936
3937 (define_insn "aarch64_sqdmull_lane<mode>"
3938   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3939         (ss_ashift:<VWIDE>
3940              (mult:<VWIDE>
3941                (sign_extend:<VWIDE>
3942                  (match_operand:VD_HSI 1 "register_operand" "w"))
3943                (sign_extend:<VWIDE>
3944                  (vec_duplicate:VD_HSI
3945                    (vec_select:<VEL>
3946                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3947                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3948                ))
3949              (const_int 1)))]
3950   "TARGET_SIMD"
3951   {
3952     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3953     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3954   }
3955   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3956 )
3957
3958 (define_insn "aarch64_sqdmull_laneq<mode>"
3959   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3960         (ss_ashift:<VWIDE>
3961              (mult:<VWIDE>
3962                (sign_extend:<VWIDE>
3963                  (match_operand:VD_HSI 1 "register_operand" "w"))
3964                (sign_extend:<VWIDE>
3965                  (vec_duplicate:VD_HSI
3966                    (vec_select:<VEL>
3967                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3968                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3969                ))
3970              (const_int 1)))]
3971   "TARGET_SIMD"
3972   {
3973     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3974     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3975   }
3976   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3977 )
3978
3979 (define_insn "aarch64_sqdmull_lane<mode>"
3980   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3981         (ss_ashift:<VWIDE>
3982              (mult:<VWIDE>
3983                (sign_extend:<VWIDE>
3984                  (match_operand:SD_HSI 1 "register_operand" "w"))
3985                (sign_extend:<VWIDE>
3986                  (vec_select:<VEL>
3987                    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3988                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3989                ))
3990              (const_int 1)))]
3991   "TARGET_SIMD"
3992   {
3993     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3994     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3995   }
3996   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3997 )
3998
3999 (define_insn "aarch64_sqdmull_laneq<mode>"
4000   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4001         (ss_ashift:<VWIDE>
4002              (mult:<VWIDE>
4003                (sign_extend:<VWIDE>
4004                  (match_operand:SD_HSI 1 "register_operand" "w"))
4005                (sign_extend:<VWIDE>
4006                  (vec_select:<VEL>
4007                    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4008                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4009                ))
4010              (const_int 1)))]
4011   "TARGET_SIMD"
4012   {
4013     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4014     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4015   }
4016   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4017 )
4018
4019 ;; vqdmull_n
4020
4021 (define_insn "aarch64_sqdmull_n<mode>"
4022   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4023         (ss_ashift:<VWIDE>
4024              (mult:<VWIDE>
4025                (sign_extend:<VWIDE>
4026                  (match_operand:VD_HSI 1 "register_operand" "w"))
4027                (sign_extend:<VWIDE>
4028                  (vec_duplicate:VD_HSI
4029                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4030                )
4031              (const_int 1)))]
4032   "TARGET_SIMD"
4033   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4034   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4035 )
4036
4037 ;; vqdmull2
4038
4039
4040
4041 (define_insn "aarch64_sqdmull2<mode>_internal"
4042   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4043         (ss_ashift:<VWIDE>
4044              (mult:<VWIDE>
4045                (sign_extend:<VWIDE>
4046                  (vec_select:<VHALF>
4047                    (match_operand:VQ_HSI 1 "register_operand" "w")
4048                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4049                (sign_extend:<VWIDE>
4050                  (vec_select:<VHALF>
4051                    (match_operand:VQ_HSI 2 "register_operand" "w")
4052                    (match_dup 3)))
4053                )
4054              (const_int 1)))]
4055   "TARGET_SIMD"
4056   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4057   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4058 )
4059
4060 (define_expand "aarch64_sqdmull2<mode>"
4061   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4062    (match_operand:VQ_HSI 1 "register_operand" "w")
4063    (match_operand:VQ_HSI 2 "register_operand" "w")]
4064   "TARGET_SIMD"
4065 {
4066   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4067   emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4068                                                   operands[2], p));
4069   DONE;
4070 })
4071
4072 ;; vqdmull2_lane
4073
4074 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4075   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4076         (ss_ashift:<VWIDE>
4077              (mult:<VWIDE>
4078                (sign_extend:<VWIDE>
4079                  (vec_select:<VHALF>
4080                    (match_operand:VQ_HSI 1 "register_operand" "w")
4081                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4082                (sign_extend:<VWIDE>
4083                  (vec_duplicate:<VHALF>
4084                    (vec_select:<VEL>
4085                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4086                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4087                ))
4088              (const_int 1)))]
4089   "TARGET_SIMD"
4090   {
4091     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4092     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4093   }
4094   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4095 )
4096
4097 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4098   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4099         (ss_ashift:<VWIDE>
4100              (mult:<VWIDE>
4101                (sign_extend:<VWIDE>
4102                  (vec_select:<VHALF>
4103                    (match_operand:VQ_HSI 1 "register_operand" "w")
4104                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4105                (sign_extend:<VWIDE>
4106                  (vec_duplicate:<VHALF>
4107                    (vec_select:<VEL>
4108                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4109                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4110                ))
4111              (const_int 1)))]
4112   "TARGET_SIMD"
4113   {
4114     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4115     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4116   }
4117   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4118 )
4119
4120 (define_expand "aarch64_sqdmull2_lane<mode>"
4121   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4122    (match_operand:VQ_HSI 1 "register_operand" "w")
4123    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4124    (match_operand:SI 3 "immediate_operand" "i")]
4125   "TARGET_SIMD"
4126 {
4127   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4128   emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4129                                                        operands[2], operands[3],
4130                                                        p));
4131   DONE;
4132 })
4133
4134 (define_expand "aarch64_sqdmull2_laneq<mode>"
4135   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4136    (match_operand:VQ_HSI 1 "register_operand" "w")
4137    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4138    (match_operand:SI 3 "immediate_operand" "i")]
4139   "TARGET_SIMD"
4140 {
4141   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4142   emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4143                                                        operands[2], operands[3],
4144                                                        p));
4145   DONE;
4146 })
4147
4148 ;; vqdmull2_n
4149
4150 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4151   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4152         (ss_ashift:<VWIDE>
4153              (mult:<VWIDE>
4154                (sign_extend:<VWIDE>
4155                  (vec_select:<VHALF>
4156                    (match_operand:VQ_HSI 1 "register_operand" "w")
4157                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4158                (sign_extend:<VWIDE>
4159                  (vec_duplicate:<VHALF>
4160                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4161                )
4162              (const_int 1)))]
4163   "TARGET_SIMD"
4164   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4165   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4166 )
4167
4168 (define_expand "aarch64_sqdmull2_n<mode>"
4169   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4170    (match_operand:VQ_HSI 1 "register_operand" "w")
4171    (match_operand:<VEL> 2 "register_operand" "w")]
4172   "TARGET_SIMD"
4173 {
4174   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4175   emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4176                                                     operands[2], p));
4177   DONE;
4178 })
4179
4180 ;; vshl
4181
4182 (define_insn "aarch64_<sur>shl<mode>"
4183   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4184         (unspec:VSDQ_I_DI
4185           [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4186            (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4187          VSHL))]
4188   "TARGET_SIMD"
4189   "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4190   [(set_attr "type" "neon_shift_reg<q>")]
4191 )
4192
4193
4194 ;; vqshl
4195
4196 (define_insn "aarch64_<sur>q<r>shl<mode>"
4197   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4198         (unspec:VSDQ_I
4199           [(match_operand:VSDQ_I 1 "register_operand" "w")
4200            (match_operand:VSDQ_I 2 "register_operand" "w")]
4201          VQSHL))]
4202   "TARGET_SIMD"
4203   "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4204   [(set_attr "type" "neon_sat_shift_reg<q>")]
4205 )
4206
4207 ;; vshll_n
4208
4209 (define_insn "aarch64_<sur>shll_n<mode>"
4210   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4211         (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4212                          (match_operand:SI 2
4213                            "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4214                          VSHLL))]
4215   "TARGET_SIMD"
4216   {
4217     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4218       return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4219     else
4220       return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4221   }
4222   [(set_attr "type" "neon_shift_imm_long")]
4223 )
4224
4225 ;; vshll_high_n
4226
4227 (define_insn "aarch64_<sur>shll2_n<mode>"
4228   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4229         (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4230                          (match_operand:SI 2 "immediate_operand" "i")]
4231                          VSHLL))]
4232   "TARGET_SIMD"
4233   {
4234     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4235       return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4236     else
4237       return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4238   }
4239   [(set_attr "type" "neon_shift_imm_long")]
4240 )
4241
4242 ;; vrshr_n
4243
4244 (define_insn "aarch64_<sur>shr_n<mode>"
4245   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4246         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4247                            (match_operand:SI 2
4248                              "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4249                           VRSHR_N))]
4250   "TARGET_SIMD"
4251   "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4252   [(set_attr "type" "neon_sat_shift_imm<q>")]
4253 )
4254
4255 ;; v(r)sra_n
4256
4257 (define_insn "aarch64_<sur>sra_n<mode>"
4258   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4259         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4260                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4261                        (match_operand:SI 3
4262                          "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4263                       VSRA))]
4264   "TARGET_SIMD"
4265   "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4266   [(set_attr "type" "neon_shift_acc<q>")]
4267 )
4268
4269 ;; vs<lr>i_n
4270
4271 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4272   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4273         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4274                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4275                        (match_operand:SI 3
4276                          "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4277                       VSLRI))]
4278   "TARGET_SIMD"
4279   "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4280   [(set_attr "type" "neon_shift_imm<q>")]
4281 )
4282
4283 ;; vqshl(u)
4284
4285 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4286   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4287         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4288                        (match_operand:SI 2
4289                          "aarch64_simd_shift_imm_<ve_mode>" "i")]
4290                       VQSHL_N))]
4291   "TARGET_SIMD"
4292   "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4293   [(set_attr "type" "neon_sat_shift_imm<q>")]
4294 )
4295
4296
4297 ;; vq(r)shr(u)n_n
4298
4299 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4300   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4301         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4302                             (match_operand:SI 2
4303                               "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4304                            VQSHRN_N))]
4305   "TARGET_SIMD"
4306   "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4307   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4308 )
4309
4310
4311 ;; cm(eq|ge|gt|lt|le)
4312 ;; Note, we have constraints for Dz and Z as different expanders
4313 ;; have different ideas of what should be passed to this pattern.
4314
4315 (define_insn "aarch64_cm<optab><mode>"
4316   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4317         (neg:<V_INT_EQUIV>
4318           (COMPARISONS:<V_INT_EQUIV>
4319             (match_operand:VDQ_I 1 "register_operand" "w,w")
4320             (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4321           )))]
4322   "TARGET_SIMD"
4323   "@
4324   cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4325   cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4326   [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4327 )
4328
4329 (define_insn_and_split "aarch64_cm<optab>di"
4330   [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4331         (neg:DI
4332           (COMPARISONS:DI
4333             (match_operand:DI 1 "register_operand" "w,w,r")
4334             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4335           )))
4336      (clobber (reg:CC CC_REGNUM))]
4337   "TARGET_SIMD"
4338   "#"
4339   "reload_completed"
4340   [(set (match_operand:DI 0 "register_operand")
4341         (neg:DI
4342           (COMPARISONS:DI
4343             (match_operand:DI 1 "register_operand")
4344             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4345           )))]
4346   {
4347     /* If we are in the general purpose register file,
4348        we split to a sequence of comparison and store.  */
4349     if (GP_REGNUM_P (REGNO (operands[0]))
4350         && GP_REGNUM_P (REGNO (operands[1])))
4351       {
4352         machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4353         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4354         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4355         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4356         DONE;
4357       }
4358     /* Otherwise, we expand to a similar pattern which does not
4359        clobber CC_REGNUM.  */
4360   }
4361   [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4362 )
4363
4364 (define_insn "*aarch64_cm<optab>di"
4365   [(set (match_operand:DI 0 "register_operand" "=w,w")
4366         (neg:DI
4367           (COMPARISONS:DI
4368             (match_operand:DI 1 "register_operand" "w,w")
4369             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4370           )))]
4371   "TARGET_SIMD && reload_completed"
4372   "@
4373   cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4374   cm<optab>\t%d0, %d1, #0"
4375   [(set_attr "type" "neon_compare, neon_compare_zero")]
4376 )
4377
4378 ;; cm(hs|hi)
4379
4380 (define_insn "aarch64_cm<optab><mode>"
4381   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4382         (neg:<V_INT_EQUIV>
4383           (UCOMPARISONS:<V_INT_EQUIV>
4384             (match_operand:VDQ_I 1 "register_operand" "w")
4385             (match_operand:VDQ_I 2 "register_operand" "w")
4386           )))]
4387   "TARGET_SIMD"
4388   "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4389   [(set_attr "type" "neon_compare<q>")]
4390 )
4391
4392 (define_insn_and_split "aarch64_cm<optab>di"
4393   [(set (match_operand:DI 0 "register_operand" "=w,r")
4394         (neg:DI
4395           (UCOMPARISONS:DI
4396             (match_operand:DI 1 "register_operand" "w,r")
4397             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4398           )))
4399     (clobber (reg:CC CC_REGNUM))]
4400   "TARGET_SIMD"
4401   "#"
4402   "reload_completed"
4403   [(set (match_operand:DI 0 "register_operand")
4404         (neg:DI
4405           (UCOMPARISONS:DI
4406             (match_operand:DI 1 "register_operand")
4407             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4408           )))]
4409   {
4410     /* If we are in the general purpose register file,
4411        we split to a sequence of comparison and store.  */
4412     if (GP_REGNUM_P (REGNO (operands[0]))
4413         && GP_REGNUM_P (REGNO (operands[1])))
4414       {
4415         machine_mode mode = CCmode;
4416         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4417         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4418         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4419         DONE;
4420       }
4421     /* Otherwise, we expand to a similar pattern which does not
4422        clobber CC_REGNUM.  */
4423   }
4424   [(set_attr "type" "neon_compare,multiple")]
4425 )
4426
4427 (define_insn "*aarch64_cm<optab>di"
4428   [(set (match_operand:DI 0 "register_operand" "=w")
4429         (neg:DI
4430           (UCOMPARISONS:DI
4431             (match_operand:DI 1 "register_operand" "w")
4432             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4433           )))]
4434   "TARGET_SIMD && reload_completed"
4435   "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4436   [(set_attr "type" "neon_compare")]
4437 )
4438
4439 ;; cmtst
4440
4441 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4442 ;; we don't have any insns using ne, and aarch64_vcond outputs
4443 ;; not (neg (eq (and x y) 0))
4444 ;; which is rewritten by simplify_rtx as
4445 ;; plus (eq (and x y) 0) -1.
4446
4447 (define_insn "aarch64_cmtst<mode>"
4448   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4449         (plus:<V_INT_EQUIV>
4450           (eq:<V_INT_EQUIV>
4451             (and:VDQ_I
4452               (match_operand:VDQ_I 1 "register_operand" "w")
4453               (match_operand:VDQ_I 2 "register_operand" "w"))
4454             (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4455           (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4456   ]
4457   "TARGET_SIMD"
4458   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4459   [(set_attr "type" "neon_tst<q>")]
4460 )
4461
4462 (define_insn_and_split "aarch64_cmtstdi"
4463   [(set (match_operand:DI 0 "register_operand" "=w,r")
4464         (neg:DI
4465           (ne:DI
4466             (and:DI
4467               (match_operand:DI 1 "register_operand" "w,r")
4468               (match_operand:DI 2 "register_operand" "w,r"))
4469             (const_int 0))))
4470     (clobber (reg:CC CC_REGNUM))]
4471   "TARGET_SIMD"
4472   "#"
4473   "reload_completed"
4474   [(set (match_operand:DI 0 "register_operand")
4475         (neg:DI
4476           (ne:DI
4477             (and:DI
4478               (match_operand:DI 1 "register_operand")
4479               (match_operand:DI 2 "register_operand"))
4480             (const_int 0))))]
4481   {
4482     /* If we are in the general purpose register file,
4483        we split to a sequence of comparison and store.  */
4484     if (GP_REGNUM_P (REGNO (operands[0]))
4485         && GP_REGNUM_P (REGNO (operands[1])))
4486       {
4487         rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4488         machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4489         rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4490         rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4491         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4492         DONE;
4493       }
4494     /* Otherwise, we expand to a similar pattern which does not
4495        clobber CC_REGNUM.  */
4496   }
4497   [(set_attr "type" "neon_tst,multiple")]
4498 )
4499
4500 (define_insn "*aarch64_cmtstdi"
4501   [(set (match_operand:DI 0 "register_operand" "=w")
4502         (neg:DI
4503           (ne:DI
4504             (and:DI
4505               (match_operand:DI 1 "register_operand" "w")
4506               (match_operand:DI 2 "register_operand" "w"))
4507             (const_int 0))))]
4508   "TARGET_SIMD"
4509   "cmtst\t%d0, %d1, %d2"
4510   [(set_attr "type" "neon_tst")]
4511 )
4512
4513 ;; fcm(eq|ge|gt|le|lt)
4514
4515 (define_insn "aarch64_cm<optab><mode>"
4516   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4517         (neg:<V_INT_EQUIV>
4518           (COMPARISONS:<V_INT_EQUIV>
4519             (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4520             (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4521           )))]
4522   "TARGET_SIMD"
4523   "@
4524   fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4525   fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4526   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4527 )
4528
4529 ;; fac(ge|gt)
4530 ;; Note we can also handle what would be fac(le|lt) by
4531 ;; generating fac(ge|gt).
4532
4533 (define_insn "aarch64_fac<optab><mode>"
4534   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4535         (neg:<V_INT_EQUIV>
4536           (FAC_COMPARISONS:<V_INT_EQUIV>
4537             (abs:VHSDF_HSDF
4538               (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4539             (abs:VHSDF_HSDF
4540               (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4541   )))]
4542   "TARGET_SIMD"
4543   "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4544   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4545 )
4546
4547 ;; addp
4548
4549 (define_insn "aarch64_addp<mode>"
4550   [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4551         (unspec:VD_BHSI
4552           [(match_operand:VD_BHSI 1 "register_operand" "w")
4553            (match_operand:VD_BHSI 2 "register_operand" "w")]
4554           UNSPEC_ADDP))]
4555   "TARGET_SIMD"
4556   "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4557   [(set_attr "type" "neon_reduc_add<q>")]
4558 )
4559
4560 (define_insn "aarch64_addpdi"
4561   [(set (match_operand:DI 0 "register_operand" "=w")
4562         (unspec:DI
4563           [(match_operand:V2DI 1 "register_operand" "w")]
4564           UNSPEC_ADDP))]
4565   "TARGET_SIMD"
4566   "addp\t%d0, %1.2d"
4567   [(set_attr "type" "neon_reduc_add")]
4568 )
4569
4570 ;; sqrt
4571
4572 (define_expand "sqrt<mode>2"
4573   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4574         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4575   "TARGET_SIMD"
4576 {
4577   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4578     DONE;
4579 })
4580
4581 (define_insn "*sqrt<mode>2"
4582   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4583         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4584   "TARGET_SIMD"
4585   "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4586   [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4587 )
4588
4589 ;; Patterns for vector struct loads and stores.
4590
4591 (define_insn "aarch64_simd_ld2<mode>"
4592   [(set (match_operand:OI 0 "register_operand" "=w")
4593         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4594                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4595                    UNSPEC_LD2))]
4596   "TARGET_SIMD"
4597   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4598   [(set_attr "type" "neon_load2_2reg<q>")]
4599 )
4600
4601 (define_insn "aarch64_simd_ld2r<mode>"
4602   [(set (match_operand:OI 0 "register_operand" "=w")
4603        (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4604                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4605                   UNSPEC_LD2_DUP))]
4606   "TARGET_SIMD"
4607   "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4608   [(set_attr "type" "neon_load2_all_lanes<q>")]
4609 )
4610
4611 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4612   [(set (match_operand:OI 0 "register_operand" "=w")
4613         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4614                     (match_operand:OI 2 "register_operand" "0")
4615                     (match_operand:SI 3 "immediate_operand" "i")
4616                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4617                    UNSPEC_LD2_LANE))]
4618   "TARGET_SIMD"
4619   {
4620     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4621     return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4622   }
4623   [(set_attr "type" "neon_load2_one_lane")]
4624 )
4625
4626 (define_expand "vec_load_lanesoi<mode>"
4627   [(set (match_operand:OI 0 "register_operand" "=w")
4628         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4629                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4630                    UNSPEC_LD2))]
4631   "TARGET_SIMD"
4632 {
4633   if (BYTES_BIG_ENDIAN)
4634     {
4635       rtx tmp = gen_reg_rtx (OImode);
4636       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4637       emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4638       emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4639     }
4640   else
4641     emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4642   DONE;
4643 })
4644
4645 (define_insn "aarch64_simd_st2<mode>"
4646   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4647         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4648                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4649                    UNSPEC_ST2))]
4650   "TARGET_SIMD"
4651   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4652   [(set_attr "type" "neon_store2_2reg<q>")]
4653 )
4654
4655 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4656 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4657   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4658         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4659                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4660                     (match_operand:SI 2 "immediate_operand" "i")]
4661                    UNSPEC_ST2_LANE))]
4662   "TARGET_SIMD"
4663   {
4664     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4665     return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4666   }
4667   [(set_attr "type" "neon_store2_one_lane<q>")]
4668 )
4669
4670 (define_expand "vec_store_lanesoi<mode>"
4671   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4672         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4673                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4674                    UNSPEC_ST2))]
4675   "TARGET_SIMD"
4676 {
4677   if (BYTES_BIG_ENDIAN)
4678     {
4679       rtx tmp = gen_reg_rtx (OImode);
4680       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4681       emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4682       emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4683     }
4684   else
4685     emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4686   DONE;
4687 })
4688
4689 (define_insn "aarch64_simd_ld3<mode>"
4690   [(set (match_operand:CI 0 "register_operand" "=w")
4691         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4692                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4693                    UNSPEC_LD3))]
4694   "TARGET_SIMD"
4695   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4696   [(set_attr "type" "neon_load3_3reg<q>")]
4697 )
4698
4699 (define_insn "aarch64_simd_ld3r<mode>"
4700   [(set (match_operand:CI 0 "register_operand" "=w")
4701        (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4702                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4703                   UNSPEC_LD3_DUP))]
4704   "TARGET_SIMD"
4705   "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4706   [(set_attr "type" "neon_load3_all_lanes<q>")]
4707 )
4708
4709 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4710   [(set (match_operand:CI 0 "register_operand" "=w")
4711         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4712                     (match_operand:CI 2 "register_operand" "0")
4713                     (match_operand:SI 3 "immediate_operand" "i")
4714                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4715                    UNSPEC_LD3_LANE))]
4716   "TARGET_SIMD"
4717 {
4718     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4719     return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4720 }
4721   [(set_attr "type" "neon_load3_one_lane")]
4722 )
4723
4724 (define_expand "vec_load_lanesci<mode>"
4725   [(set (match_operand:CI 0 "register_operand" "=w")
4726         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4727                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4728                    UNSPEC_LD3))]
4729   "TARGET_SIMD"
4730 {
4731   if (BYTES_BIG_ENDIAN)
4732     {
4733       rtx tmp = gen_reg_rtx (CImode);
4734       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4735       emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4736       emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4737     }
4738   else
4739     emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4740   DONE;
4741 })
4742
4743 (define_insn "aarch64_simd_st3<mode>"
4744   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4745         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4746                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4747                    UNSPEC_ST3))]
4748   "TARGET_SIMD"
4749   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4750   [(set_attr "type" "neon_store3_3reg<q>")]
4751 )
4752
4753 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4754 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4755   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4756         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4757                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4758                      (match_operand:SI 2 "immediate_operand" "i")]
4759                     UNSPEC_ST3_LANE))]
4760   "TARGET_SIMD"
4761   {
4762     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4763     return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4764   }
4765   [(set_attr "type" "neon_store3_one_lane<q>")]
4766 )
4767
4768 (define_expand "vec_store_lanesci<mode>"
4769   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4770         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4771                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4772                    UNSPEC_ST3))]
4773   "TARGET_SIMD"
4774 {
4775   if (BYTES_BIG_ENDIAN)
4776     {
4777       rtx tmp = gen_reg_rtx (CImode);
4778       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4779       emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4780       emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4781     }
4782   else
4783     emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4784   DONE;
4785 })
4786
4787 (define_insn "aarch64_simd_ld4<mode>"
4788   [(set (match_operand:XI 0 "register_operand" "=w")
4789         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4790                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4791                    UNSPEC_LD4))]
4792   "TARGET_SIMD"
4793   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4794   [(set_attr "type" "neon_load4_4reg<q>")]
4795 )
4796
4797 (define_insn "aarch64_simd_ld4r<mode>"
4798   [(set (match_operand:XI 0 "register_operand" "=w")
4799        (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4800                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4801                   UNSPEC_LD4_DUP))]
4802   "TARGET_SIMD"
4803   "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4804   [(set_attr "type" "neon_load4_all_lanes<q>")]
4805 )
4806
4807 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4808   [(set (match_operand:XI 0 "register_operand" "=w")
4809         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4810                     (match_operand:XI 2 "register_operand" "0")
4811                     (match_operand:SI 3 "immediate_operand" "i")
4812                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4813                    UNSPEC_LD4_LANE))]
4814   "TARGET_SIMD"
4815 {
4816     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4817     return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4818 }
4819   [(set_attr "type" "neon_load4_one_lane")]
4820 )
4821
4822 (define_expand "vec_load_lanesxi<mode>"
4823   [(set (match_operand:XI 0 "register_operand" "=w")
4824         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4825                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4826                    UNSPEC_LD4))]
4827   "TARGET_SIMD"
4828 {
4829   if (BYTES_BIG_ENDIAN)
4830     {
4831       rtx tmp = gen_reg_rtx (XImode);
4832       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4833       emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4834       emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4835     }
4836   else
4837     emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4838   DONE;
4839 })
4840
4841 (define_insn "aarch64_simd_st4<mode>"
4842   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4843         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4844                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4845                    UNSPEC_ST4))]
4846   "TARGET_SIMD"
4847   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4848   [(set_attr "type" "neon_store4_4reg<q>")]
4849 )
4850
4851 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4852 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4853   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4854         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4855                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4856                      (match_operand:SI 2 "immediate_operand" "i")]
4857                     UNSPEC_ST4_LANE))]
4858   "TARGET_SIMD"
4859   {
4860     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4861     return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
4862   }
4863   [(set_attr "type" "neon_store4_one_lane<q>")]
4864 )
4865
4866 (define_expand "vec_store_lanesxi<mode>"
4867   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4868         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4869                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4870                    UNSPEC_ST4))]
4871   "TARGET_SIMD"
4872 {
4873   if (BYTES_BIG_ENDIAN)
4874     {
4875       rtx tmp = gen_reg_rtx (XImode);
4876       rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4877       emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
4878       emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
4879     }
4880   else
4881     emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
4882   DONE;
4883 })
4884
4885 (define_insn_and_split "aarch64_rev_reglist<mode>"
4886 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
4887         (unspec:VSTRUCT
4888                    [(match_operand:VSTRUCT 1 "register_operand" "w")
4889                     (match_operand:V16QI 2 "register_operand" "w")]
4890                    UNSPEC_REV_REGLIST))]
4891   "TARGET_SIMD"
4892   "#"
4893   "&& reload_completed"
4894   [(const_int 0)]
4895 {
4896   int i;
4897   int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
4898   for (i = 0; i < nregs; i++)
4899     {
4900       rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
4901       rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
4902       emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
4903     }
4904   DONE;
4905 }
4906   [(set_attr "type" "neon_tbl1_q")
4907    (set_attr "length" "<insn_count>")]
4908 )
4909
4910 ;; Reload patterns for AdvSIMD register list operands.
4911
4912 (define_expand "mov<mode>"
4913   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
4914         (match_operand:VSTRUCT 1 "general_operand" ""))]
4915   "TARGET_SIMD"
4916 {
4917   if (can_create_pseudo_p ())
4918     {
4919       if (GET_CODE (operands[0]) != REG)
4920         operands[1] = force_reg (<MODE>mode, operands[1]);
4921     }
4922 })
4923
4924 (define_insn "*aarch64_mov<mode>"
4925   [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
4926         (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
4927   "TARGET_SIMD && !BYTES_BIG_ENDIAN
4928    && (register_operand (operands[0], <MODE>mode)
4929        || register_operand (operands[1], <MODE>mode))"
4930   "@
4931    #
4932    st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
4933    ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
4934   [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
4935                      neon_load<nregs>_<nregs>reg_q")
4936    (set_attr "length" "<insn_count>,4,4")]
4937 )
4938
4939 (define_insn "aarch64_be_ld1<mode>"
4940   [(set (match_operand:VALLDI_F16 0     "register_operand" "=w")
4941         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
4942                              "aarch64_simd_struct_operand" "Utv")]
4943         UNSPEC_LD1))]
4944   "TARGET_SIMD"
4945   "ld1\\t{%0<Vmtype>}, %1"
4946   [(set_attr "type" "neon_load1_1reg<q>")]
4947 )
4948
4949 (define_insn "aarch64_be_st1<mode>"
4950   [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
4951         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
4952         UNSPEC_ST1))]
4953   "TARGET_SIMD"
4954   "st1\\t{%1<Vmtype>}, %0"
4955   [(set_attr "type" "neon_store1_1reg<q>")]
4956 )
4957
4958 (define_insn "*aarch64_be_movoi"
4959   [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
4960         (match_operand:OI 1 "general_operand"      " w,w,m"))]
4961   "TARGET_SIMD && BYTES_BIG_ENDIAN
4962    && (register_operand (operands[0], OImode)
4963        || register_operand (operands[1], OImode))"
4964   "@
4965    #
4966    stp\\t%q1, %R1, %0
4967    ldp\\t%q0, %R0, %1"
4968   [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
4969    (set_attr "length" "8,4,4")]
4970 )
4971
4972 (define_insn "*aarch64_be_movci"
4973   [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
4974         (match_operand:CI 1 "general_operand"      " w,w,o"))]
4975   "TARGET_SIMD && BYTES_BIG_ENDIAN
4976    && (register_operand (operands[0], CImode)
4977        || register_operand (operands[1], CImode))"
4978   "#"
4979   [(set_attr "type" "multiple")
4980    (set_attr "length" "12,4,4")]
4981 )
4982
4983 (define_insn "*aarch64_be_movxi"
4984   [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
4985         (match_operand:XI 1 "general_operand"      " w,w,o"))]
4986   "TARGET_SIMD && BYTES_BIG_ENDIAN
4987    && (register_operand (operands[0], XImode)
4988        || register_operand (operands[1], XImode))"
4989   "#"
4990   [(set_attr "type" "multiple")
4991    (set_attr "length" "16,4,4")]
4992 )
4993
4994 (define_split
4995   [(set (match_operand:OI 0 "register_operand")
4996         (match_operand:OI 1 "register_operand"))]
4997   "TARGET_SIMD && reload_completed"
4998   [(const_int 0)]
4999 {
5000   aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5001   DONE;
5002 })
5003
5004 (define_split
5005   [(set (match_operand:CI 0 "nonimmediate_operand")
5006         (match_operand:CI 1 "general_operand"))]
5007   "TARGET_SIMD && reload_completed"
5008   [(const_int 0)]
5009 {
5010   if (register_operand (operands[0], CImode)
5011       && register_operand (operands[1], CImode))
5012     {
5013       aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5014       DONE;
5015     }
5016   else if (BYTES_BIG_ENDIAN)
5017     {
5018       emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5019                       simplify_gen_subreg (OImode, operands[1], CImode, 0));
5020       emit_move_insn (gen_lowpart (V16QImode,
5021                                    simplify_gen_subreg (TImode, operands[0],
5022                                                         CImode, 32)),
5023                       gen_lowpart (V16QImode,
5024                                    simplify_gen_subreg (TImode, operands[1],
5025                                                         CImode, 32)));
5026       DONE;
5027     }
5028   else
5029     FAIL;
5030 })
5031
5032 (define_split
5033   [(set (match_operand:XI 0 "nonimmediate_operand")
5034         (match_operand:XI 1 "general_operand"))]
5035   "TARGET_SIMD && reload_completed"
5036   [(const_int 0)]
5037 {
5038   if (register_operand (operands[0], XImode)
5039       && register_operand (operands[1], XImode))
5040     {
5041       aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5042       DONE;
5043     }
5044   else if (BYTES_BIG_ENDIAN)
5045     {
5046       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5047                       simplify_gen_subreg (OImode, operands[1], XImode, 0));
5048       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5049                       simplify_gen_subreg (OImode, operands[1], XImode, 32));
5050       DONE;
5051     }
5052   else
5053     FAIL;
5054 })
5055
5056 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5057   [(match_operand:VSTRUCT 0 "register_operand" "=w")
5058    (match_operand:DI 1 "register_operand" "w")
5059    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5060   "TARGET_SIMD"
5061 {
5062   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5063   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5064                      * <VSTRUCT:nregs>);
5065
5066   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5067                                                                 mem));
5068   DONE;
5069 })
5070
5071 (define_insn "aarch64_ld2<mode>_dreg"
5072   [(set (match_operand:OI 0 "register_operand" "=w")
5073         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5074                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5075                    UNSPEC_LD2_DREG))]
5076   "TARGET_SIMD"
5077   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5078   [(set_attr "type" "neon_load2_2reg<q>")]
5079 )
5080
5081 (define_insn "aarch64_ld2<mode>_dreg"
5082   [(set (match_operand:OI 0 "register_operand" "=w")
5083         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5084                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5085                    UNSPEC_LD2_DREG))]
5086   "TARGET_SIMD"
5087   "ld1\\t{%S0.1d - %T0.1d}, %1"
5088   [(set_attr "type" "neon_load1_2reg<q>")]
5089 )
5090
5091 (define_insn "aarch64_ld3<mode>_dreg"
5092   [(set (match_operand:CI 0 "register_operand" "=w")
5093         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5094                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5095                    UNSPEC_LD3_DREG))]
5096   "TARGET_SIMD"
5097   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5098   [(set_attr "type" "neon_load3_3reg<q>")]
5099 )
5100
5101 (define_insn "aarch64_ld3<mode>_dreg"
5102   [(set (match_operand:CI 0 "register_operand" "=w")
5103         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5104                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5105                    UNSPEC_LD3_DREG))]
5106   "TARGET_SIMD"
5107   "ld1\\t{%S0.1d - %U0.1d}, %1"
5108   [(set_attr "type" "neon_load1_3reg<q>")]
5109 )
5110
5111 (define_insn "aarch64_ld4<mode>_dreg"
5112   [(set (match_operand:XI 0 "register_operand" "=w")
5113         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5114                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5115                    UNSPEC_LD4_DREG))]
5116   "TARGET_SIMD"
5117   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5118   [(set_attr "type" "neon_load4_4reg<q>")]
5119 )
5120
5121 (define_insn "aarch64_ld4<mode>_dreg"
5122   [(set (match_operand:XI 0 "register_operand" "=w")
5123         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5124                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5125                    UNSPEC_LD4_DREG))]
5126   "TARGET_SIMD"
5127   "ld1\\t{%S0.1d - %V0.1d}, %1"
5128   [(set_attr "type" "neon_load1_4reg<q>")]
5129 )
5130
5131 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5132  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5133   (match_operand:DI 1 "register_operand" "r")
5134   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5135   "TARGET_SIMD"
5136 {
5137   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5138   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5139
5140   emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5141   DONE;
5142 })
5143
5144 (define_expand "aarch64_ld1<VALL_F16:mode>"
5145  [(match_operand:VALL_F16 0 "register_operand")
5146   (match_operand:DI 1 "register_operand")]
5147   "TARGET_SIMD"
5148 {
5149   machine_mode mode = <VALL_F16:MODE>mode;
5150   rtx mem = gen_rtx_MEM (mode, operands[1]);
5151
5152   if (BYTES_BIG_ENDIAN)
5153     emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5154   else
5155     emit_move_insn (operands[0], mem);
5156   DONE;
5157 })
5158
5159 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5160  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5161   (match_operand:DI 1 "register_operand" "r")
5162   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5163   "TARGET_SIMD"
5164 {
5165   machine_mode mode = <VSTRUCT:MODE>mode;
5166   rtx mem = gen_rtx_MEM (mode, operands[1]);
5167
5168   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5169   DONE;
5170 })
5171
5172 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5173   [(match_operand:VSTRUCT 0 "register_operand" "=w")
5174         (match_operand:DI 1 "register_operand" "w")
5175         (match_operand:VSTRUCT 2 "register_operand" "0")
5176         (match_operand:SI 3 "immediate_operand" "i")
5177         (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5178   "TARGET_SIMD"
5179 {
5180   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5181   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5182                      * <VSTRUCT:nregs>);
5183
5184   aarch64_simd_lane_bounds (operands[3], 0,
5185                             GET_MODE_NUNITS (<VALLDIF:MODE>mode),
5186                             NULL);
5187   emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5188         operands[0], mem, operands[2], operands[3]));
5189   DONE;
5190 })
5191
5192 ;; Expanders for builtins to extract vector registers from large
5193 ;; opaque integer modes.
5194
5195 ;; D-register list.
5196
5197 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5198  [(match_operand:VDC 0 "register_operand" "=w")
5199   (match_operand:VSTRUCT 1 "register_operand" "w")
5200   (match_operand:SI 2 "immediate_operand" "i")]
5201   "TARGET_SIMD"
5202 {
5203   int part = INTVAL (operands[2]);
5204   rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5205   int offset = part * 16;
5206
5207   emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5208   emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5209   DONE;
5210 })
5211
5212 ;; Q-register list.
5213
5214 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5215  [(match_operand:VQ 0 "register_operand" "=w")
5216   (match_operand:VSTRUCT 1 "register_operand" "w")
5217   (match_operand:SI 2 "immediate_operand" "i")]
5218   "TARGET_SIMD"
5219 {
5220   int part = INTVAL (operands[2]);
5221   int offset = part * 16;
5222
5223   emit_move_insn (operands[0],
5224                   gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5225   DONE;
5226 })
5227
5228 ;; Permuted-store expanders for neon intrinsics.
5229
5230 ;; Permute instructions
5231
5232 ;; vec_perm support
5233
5234 (define_expand "vec_perm_const<mode>"
5235   [(match_operand:VALL_F16 0 "register_operand")
5236    (match_operand:VALL_F16 1 "register_operand")
5237    (match_operand:VALL_F16 2 "register_operand")
5238    (match_operand:<V_INT_EQUIV> 3)]
5239   "TARGET_SIMD"
5240 {
5241   if (aarch64_expand_vec_perm_const (operands[0], operands[1],
5242                                      operands[2], operands[3]))
5243     DONE;
5244   else
5245     FAIL;
5246 })
5247
5248 (define_expand "vec_perm<mode>"
5249   [(match_operand:VB 0 "register_operand")
5250    (match_operand:VB 1 "register_operand")
5251    (match_operand:VB 2 "register_operand")
5252    (match_operand:VB 3 "register_operand")]
5253   "TARGET_SIMD"
5254 {
5255   aarch64_expand_vec_perm (operands[0], operands[1],
5256                            operands[2], operands[3]);
5257   DONE;
5258 })
5259
5260 (define_insn "aarch64_tbl1<mode>"
5261   [(set (match_operand:VB 0 "register_operand" "=w")
5262         (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5263                     (match_operand:VB 2 "register_operand" "w")]
5264                    UNSPEC_TBL))]
5265   "TARGET_SIMD"
5266   "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5267   [(set_attr "type" "neon_tbl1<q>")]
5268 )
5269
5270 ;; Two source registers.
5271
5272 (define_insn "aarch64_tbl2v16qi"
5273   [(set (match_operand:V16QI 0 "register_operand" "=w")
5274         (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5275                        (match_operand:V16QI 2 "register_operand" "w")]
5276                       UNSPEC_TBL))]
5277   "TARGET_SIMD"
5278   "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5279   [(set_attr "type" "neon_tbl2_q")]
5280 )
5281
5282 (define_insn "aarch64_tbl3<mode>"
5283   [(set (match_operand:VB 0 "register_operand" "=w")
5284         (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5285                       (match_operand:VB 2 "register_operand" "w")]
5286                       UNSPEC_TBL))]
5287   "TARGET_SIMD"
5288   "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5289   [(set_attr "type" "neon_tbl3")]
5290 )
5291
5292 (define_insn "aarch64_tbx4<mode>"
5293   [(set (match_operand:VB 0 "register_operand" "=w")
5294         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5295                       (match_operand:OI 2 "register_operand" "w")
5296                       (match_operand:VB 3 "register_operand" "w")]
5297                       UNSPEC_TBX))]
5298   "TARGET_SIMD"
5299   "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5300   [(set_attr "type" "neon_tbl4")]
5301 )
5302
5303 ;; Three source registers.
5304
5305 (define_insn "aarch64_qtbl3<mode>"
5306   [(set (match_operand:VB 0 "register_operand" "=w")
5307         (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5308                       (match_operand:VB 2 "register_operand" "w")]
5309                       UNSPEC_TBL))]
5310   "TARGET_SIMD"
5311   "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5312   [(set_attr "type" "neon_tbl3")]
5313 )
5314
5315 (define_insn "aarch64_qtbx3<mode>"
5316   [(set (match_operand:VB 0 "register_operand" "=w")
5317         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5318                       (match_operand:CI 2 "register_operand" "w")
5319                       (match_operand:VB 3 "register_operand" "w")]
5320                       UNSPEC_TBX))]
5321   "TARGET_SIMD"
5322   "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5323   [(set_attr "type" "neon_tbl3")]
5324 )
5325
5326 ;; Four source registers.
5327
5328 (define_insn "aarch64_qtbl4<mode>"
5329   [(set (match_operand:VB 0 "register_operand" "=w")
5330         (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5331                       (match_operand:VB 2 "register_operand" "w")]
5332                       UNSPEC_TBL))]
5333   "TARGET_SIMD"
5334   "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5335   [(set_attr "type" "neon_tbl4")]
5336 )
5337
5338 (define_insn "aarch64_qtbx4<mode>"
5339   [(set (match_operand:VB 0 "register_operand" "=w")
5340         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5341                       (match_operand:XI 2 "register_operand" "w")
5342                       (match_operand:VB 3 "register_operand" "w")]
5343                       UNSPEC_TBX))]
5344   "TARGET_SIMD"
5345   "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5346   [(set_attr "type" "neon_tbl4")]
5347 )
5348
5349 (define_insn_and_split "aarch64_combinev16qi"
5350   [(set (match_operand:OI 0 "register_operand" "=w")
5351         (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5352                     (match_operand:V16QI 2 "register_operand" "w")]
5353                    UNSPEC_CONCAT))]
5354   "TARGET_SIMD"
5355   "#"
5356   "&& reload_completed"
5357   [(const_int 0)]
5358 {
5359   aarch64_split_combinev16qi (operands);
5360   DONE;
5361 }
5362 [(set_attr "type" "multiple")]
5363 )
5364
5365 ;; This instruction's pattern is generated directly by
5366 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5367 ;; need corresponding changes there.
5368 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5369   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5370         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5371                           (match_operand:VALL_F16 2 "register_operand" "w")]
5372          PERMUTE))]
5373   "TARGET_SIMD"
5374   "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5375   [(set_attr "type" "neon_permute<q>")]
5376 )
5377
5378 ;; This instruction's pattern is generated directly by
5379 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5380 ;; need corresponding changes there.  Note that the immediate (third)
5381 ;; operand is a lane index not a byte index.
5382 (define_insn "aarch64_ext<mode>"
5383   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5384         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5385                           (match_operand:VALL_F16 2 "register_operand" "w")
5386                           (match_operand:SI 3 "immediate_operand" "i")]
5387          UNSPEC_EXT))]
5388   "TARGET_SIMD"
5389 {
5390   operands[3] = GEN_INT (INTVAL (operands[3])
5391       * GET_MODE_UNIT_SIZE (<MODE>mode));
5392   return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5393 }
5394   [(set_attr "type" "neon_ext<q>")]
5395 )
5396
5397 ;; This instruction's pattern is generated directly by
5398 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5399 ;; need corresponding changes there.
5400 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5401   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5402         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5403                     REVERSE))]
5404   "TARGET_SIMD"
5405   "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5406   [(set_attr "type" "neon_rev<q>")]
5407 )
5408
5409 (define_insn "aarch64_st2<mode>_dreg"
5410   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5411         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5412                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5413                    UNSPEC_ST2))]
5414   "TARGET_SIMD"
5415   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5416   [(set_attr "type" "neon_store2_2reg")]
5417 )
5418
5419 (define_insn "aarch64_st2<mode>_dreg"
5420   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5421         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5422                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5423                    UNSPEC_ST2))]
5424   "TARGET_SIMD"
5425   "st1\\t{%S1.1d - %T1.1d}, %0"
5426   [(set_attr "type" "neon_store1_2reg")]
5427 )
5428
5429 (define_insn "aarch64_st3<mode>_dreg"
5430   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5431         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5432                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5433                    UNSPEC_ST3))]
5434   "TARGET_SIMD"
5435   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5436   [(set_attr "type" "neon_store3_3reg")]
5437 )
5438
5439 (define_insn "aarch64_st3<mode>_dreg"
5440   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5441         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5442                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5443                    UNSPEC_ST3))]
5444   "TARGET_SIMD"
5445   "st1\\t{%S1.1d - %U1.1d}, %0"
5446   [(set_attr "type" "neon_store1_3reg")]
5447 )
5448
5449 (define_insn "aarch64_st4<mode>_dreg"
5450   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5451         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5452                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5453                    UNSPEC_ST4))]
5454   "TARGET_SIMD"
5455   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5456   [(set_attr "type" "neon_store4_4reg")]
5457 )
5458
5459 (define_insn "aarch64_st4<mode>_dreg"
5460   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5461         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5462                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5463                    UNSPEC_ST4))]
5464   "TARGET_SIMD"
5465   "st1\\t{%S1.1d - %V1.1d}, %0"
5466   [(set_attr "type" "neon_store1_4reg")]
5467 )
5468
5469 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5470  [(match_operand:DI 0 "register_operand" "r")
5471   (match_operand:VSTRUCT 1 "register_operand" "w")
5472   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5473   "TARGET_SIMD"
5474 {
5475   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5476   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5477
5478   emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5479   DONE;
5480 })
5481
5482 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5483  [(match_operand:DI 0 "register_operand" "r")
5484   (match_operand:VSTRUCT 1 "register_operand" "w")
5485   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5486   "TARGET_SIMD"
5487 {
5488   machine_mode mode = <VSTRUCT:MODE>mode;
5489   rtx mem = gen_rtx_MEM (mode, operands[0]);
5490
5491   emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5492   DONE;
5493 })
5494
5495 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5496  [(match_operand:DI 0 "register_operand" "r")
5497   (match_operand:VSTRUCT 1 "register_operand" "w")
5498   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5499   (match_operand:SI 2 "immediate_operand")]
5500   "TARGET_SIMD"
5501 {
5502   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5503   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5504                      * <VSTRUCT:nregs>);
5505
5506   emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5507                 mem, operands[1], operands[2]));
5508   DONE;
5509 })
5510
5511 (define_expand "aarch64_st1<VALL_F16:mode>"
5512  [(match_operand:DI 0 "register_operand")
5513   (match_operand:VALL_F16 1 "register_operand")]
5514   "TARGET_SIMD"
5515 {
5516   machine_mode mode = <VALL_F16:MODE>mode;
5517   rtx mem = gen_rtx_MEM (mode, operands[0]);
5518
5519   if (BYTES_BIG_ENDIAN)
5520     emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5521   else
5522     emit_move_insn (mem, operands[1]);
5523   DONE;
5524 })
5525
5526 ;; Expander for builtins to insert vector registers into large
5527 ;; opaque integer modes.
5528
5529 ;; Q-register list.  We don't need a D-reg inserter as we zero
5530 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5531
5532 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5533  [(match_operand:VSTRUCT 0 "register_operand" "+w")
5534   (match_operand:VSTRUCT 1 "register_operand" "0")
5535   (match_operand:VQ 2 "register_operand" "w")
5536   (match_operand:SI 3 "immediate_operand" "i")]
5537   "TARGET_SIMD"
5538 {
5539   int part = INTVAL (operands[3]);
5540   int offset = part * 16;
5541
5542   emit_move_insn (operands[0], operands[1]);
5543   emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5544                   operands[2]);
5545   DONE;
5546 })
5547
5548 ;; Standard pattern name vec_init<mode><Vel>.
5549
5550 (define_expand "vec_init<mode><Vel>"
5551   [(match_operand:VALL_F16 0 "register_operand" "")
5552    (match_operand 1 "" "")]
5553   "TARGET_SIMD"
5554 {
5555   aarch64_expand_vector_init (operands[0], operands[1]);
5556   DONE;
5557 })
5558
5559 (define_insn "*aarch64_simd_ld1r<mode>"
5560   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5561         (vec_duplicate:VALL_F16
5562           (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5563   "TARGET_SIMD"
5564   "ld1r\\t{%0.<Vtype>}, %1"
5565   [(set_attr "type" "neon_load1_all_lanes")]
5566 )
5567
5568 (define_insn "aarch64_frecpe<mode>"
5569   [(set (match_operand:VHSDF 0 "register_operand" "=w")
5570         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5571          UNSPEC_FRECPE))]
5572   "TARGET_SIMD"
5573   "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5574   [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5575 )
5576
5577 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5578   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5579         (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5580          FRECP))]
5581   "TARGET_SIMD"
5582   "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5583   [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5584 )
5585
5586 (define_insn "aarch64_frecps<mode>"
5587   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5588         (unspec:VHSDF_HSDF
5589           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5590           (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5591           UNSPEC_FRECPS))]
5592   "TARGET_SIMD"
5593   "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5594   [(set_attr "type" "neon_fp_recps_<stype><q>")]
5595 )
5596
5597 (define_insn "aarch64_urecpe<mode>"
5598   [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5599         (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5600                 UNSPEC_URECPE))]
5601  "TARGET_SIMD"
5602  "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5603   [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5604
5605 ;; Standard pattern name vec_extract<mode><Vel>.
5606
5607 (define_expand "vec_extract<mode><Vel>"
5608   [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5609    (match_operand:VALL_F16 1 "register_operand" "")
5610    (match_operand:SI 2 "immediate_operand" "")]
5611   "TARGET_SIMD"
5612 {
5613     emit_insn
5614       (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5615     DONE;
5616 })
5617
5618 ;; aes
5619
5620 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5621   [(set (match_operand:V16QI 0 "register_operand" "=w")
5622         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5623                        (match_operand:V16QI 2 "register_operand" "w")]
5624          CRYPTO_AES))]
5625   "TARGET_SIMD && TARGET_CRYPTO"
5626   "aes<aes_op>\\t%0.16b, %2.16b"
5627   [(set_attr "type" "crypto_aese")]
5628 )
5629
5630 ;; When AES/AESMC fusion is enabled we want the register allocation to
5631 ;; look like:
5632 ;;    AESE Vn, _
5633 ;;    AESMC Vn, Vn
5634 ;; So prefer to tie operand 1 to operand 0 when fusing.
5635
5636 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5637   [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5638         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5639          CRYPTO_AESMC))]
5640   "TARGET_SIMD && TARGET_CRYPTO"
5641   "aes<aesmc_op>\\t%0.16b, %1.16b"
5642   [(set_attr "type" "crypto_aesmc")
5643    (set_attr_alternative "enabled"
5644      [(if_then_else (match_test
5645                        "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5646                      (const_string "yes" )
5647                      (const_string "no"))
5648       (const_string "yes")])]
5649 )
5650
5651 ;; sha1
5652
5653 (define_insn "aarch64_crypto_sha1hsi"
5654   [(set (match_operand:SI 0 "register_operand" "=w")
5655         (unspec:SI [(match_operand:SI 1
5656                        "register_operand" "w")]
5657          UNSPEC_SHA1H))]
5658   "TARGET_SIMD && TARGET_CRYPTO"
5659   "sha1h\\t%s0, %s1"
5660   [(set_attr "type" "crypto_sha1_fast")]
5661 )
5662
5663 (define_insn "aarch64_crypto_sha1hv4si"
5664   [(set (match_operand:SI 0 "register_operand" "=w")
5665         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5666                      (parallel [(const_int 0)]))]
5667          UNSPEC_SHA1H))]
5668   "TARGET_SIMD && TARGET_CRYPTO && !BYTES_BIG_ENDIAN"
5669   "sha1h\\t%s0, %s1"
5670   [(set_attr "type" "crypto_sha1_fast")]
5671 )
5672
5673 (define_insn "aarch64_be_crypto_sha1hv4si"
5674   [(set (match_operand:SI 0 "register_operand" "=w")
5675         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5676                      (parallel [(const_int 3)]))]
5677          UNSPEC_SHA1H))]
5678   "TARGET_SIMD && TARGET_CRYPTO && BYTES_BIG_ENDIAN"
5679   "sha1h\\t%s0, %s1"
5680   [(set_attr "type" "crypto_sha1_fast")]
5681 )
5682
5683 (define_insn "aarch64_crypto_sha1su1v4si"
5684   [(set (match_operand:V4SI 0 "register_operand" "=w")
5685         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5686                       (match_operand:V4SI 2 "register_operand" "w")]
5687          UNSPEC_SHA1SU1))]
5688   "TARGET_SIMD && TARGET_CRYPTO"
5689   "sha1su1\\t%0.4s, %2.4s"
5690   [(set_attr "type" "crypto_sha1_fast")]
5691 )
5692
5693 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5694   [(set (match_operand:V4SI 0 "register_operand" "=w")
5695         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5696                       (match_operand:SI 2 "register_operand" "w")
5697                       (match_operand:V4SI 3 "register_operand" "w")]
5698          CRYPTO_SHA1))]
5699   "TARGET_SIMD && TARGET_CRYPTO"
5700   "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5701   [(set_attr "type" "crypto_sha1_slow")]
5702 )
5703
5704 (define_insn "aarch64_crypto_sha1su0v4si"
5705   [(set (match_operand:V4SI 0 "register_operand" "=w")
5706         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5707                       (match_operand:V4SI 2 "register_operand" "w")
5708                       (match_operand:V4SI 3 "register_operand" "w")]
5709          UNSPEC_SHA1SU0))]
5710   "TARGET_SIMD && TARGET_CRYPTO"
5711   "sha1su0\\t%0.4s, %2.4s, %3.4s"
5712   [(set_attr "type" "crypto_sha1_xor")]
5713 )
5714
5715 ;; sha256
5716
5717 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
5718   [(set (match_operand:V4SI 0 "register_operand" "=w")
5719         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5720                       (match_operand:V4SI 2 "register_operand" "w")
5721                       (match_operand:V4SI 3 "register_operand" "w")]
5722          CRYPTO_SHA256))]
5723   "TARGET_SIMD && TARGET_CRYPTO"
5724   "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5725   [(set_attr "type" "crypto_sha256_slow")]
5726 )
5727
5728 (define_insn "aarch64_crypto_sha256su0v4si"
5729   [(set (match_operand:V4SI 0 "register_operand" "=w")
5730         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5731                       (match_operand:V4SI 2 "register_operand" "w")]
5732          UNSPEC_SHA256SU0))]
5733   "TARGET_SIMD &&TARGET_CRYPTO"
5734   "sha256su0\\t%0.4s, %2.4s"
5735   [(set_attr "type" "crypto_sha256_fast")]
5736 )
5737
5738 (define_insn "aarch64_crypto_sha256su1v4si"
5739   [(set (match_operand:V4SI 0 "register_operand" "=w")
5740         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5741                       (match_operand:V4SI 2 "register_operand" "w")
5742                       (match_operand:V4SI 3 "register_operand" "w")]
5743          UNSPEC_SHA256SU1))]
5744   "TARGET_SIMD &&TARGET_CRYPTO"
5745   "sha256su1\\t%0.4s, %2.4s, %3.4s"
5746   [(set_attr "type" "crypto_sha256_slow")]
5747 )
5748
5749 ;; pmull
5750
5751 (define_insn "aarch64_crypto_pmulldi"
5752   [(set (match_operand:TI 0 "register_operand" "=w")
5753         (unspec:TI  [(match_operand:DI 1 "register_operand" "w")
5754                      (match_operand:DI 2 "register_operand" "w")]
5755                     UNSPEC_PMULL))]
5756  "TARGET_SIMD && TARGET_CRYPTO"
5757  "pmull\\t%0.1q, %1.1d, %2.1d"
5758   [(set_attr "type" "crypto_pmull")]
5759 )
5760
5761 (define_insn "aarch64_crypto_pmullv2di"
5762  [(set (match_operand:TI 0 "register_operand" "=w")
5763        (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
5764                    (match_operand:V2DI 2 "register_operand" "w")]
5765                   UNSPEC_PMULL2))]
5766   "TARGET_SIMD && TARGET_CRYPTO"
5767   "pmull2\\t%0.1q, %1.2d, %2.2d"
5768   [(set_attr "type" "crypto_pmull")]
5769 )