gcc/config/aarch64/aarch64-simd.md

   1 ;; Machine description for AArch64 AdvSIMD architecture.
   2 ;; Copyright (C) 2011-2017 Free Software Foundation, Inc.
   3 ;; Contributed by ARM Ltd.
   4 ;;
   5 ;; This file is part of GCC.
   6 ;;
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published by
   9 ;; the Free Software Foundation; either version 3, or (at your option)
  10 ;; any later version.
  11 ;;
  12 ;; GCC is distributed in the hope that it will be useful, but
  13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 ;; General Public License for more details.
  16 ;;
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 (define_expand "mov<mode>"
  22   [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
  23         (match_operand:VALL_F16 1 "general_operand" ""))]
  24   "TARGET_SIMD"
  25   "
  26     if (GET_CODE (operands[0]) == MEM
  27         && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
  28              && aarch64_legitimate_address_p (<MODE>mode, operands[0],
  29                                               PARALLEL, 1)))
  30       operands[1] = force_reg (<MODE>mode, operands[1]);
  31   "
  32 )
  33
  34 (define_expand "movmisalign<mode>"
  35   [(set (match_operand:VALL 0 "nonimmediate_operand" "")
  36         (match_operand:VALL 1 "general_operand" ""))]
  37   "TARGET_SIMD"
  38 {
  39   /* This pattern is not permitted to fail during expansion: if both arguments
  40      are non-registers (e.g. memory := constant, which can be created by the
  41      auto-vectorizer), force operand 1 into a register.  */
  42   if (!register_operand (operands[0], <MODE>mode)
  43       && !register_operand (operands[1], <MODE>mode))
  44     operands[1] = force_reg (<MODE>mode, operands[1]);
  45 })
  46
  47 (define_insn "aarch64_simd_dup<mode>"
  48   [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
  49         (vec_duplicate:VDQ_I
  50           (match_operand:<VEL> 1 "register_operand" "w,?r")))]
  51   "TARGET_SIMD"
  52   "@
  53    dup\\t%0.<Vtype>, %1.<Vetype>[0]
  54    dup\\t%0.<Vtype>, %<vw>1"
  55   [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
  56 )
  57
  58 (define_insn "aarch64_simd_dup<mode>"
  59   [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
  60         (vec_duplicate:VDQF_F16
  61           (match_operand:<VEL> 1 "register_operand" "w")))]
  62   "TARGET_SIMD"
  63   "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
  64   [(set_attr "type" "neon_dup<q>")]
  65 )
  66
  67 (define_insn "aarch64_dup_lane<mode>"
  68   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
  69         (vec_duplicate:VALL_F16
  70           (vec_select:<VEL>
  71             (match_operand:VALL_F16 1 "register_operand" "w")
  72             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  73           )))]
  74   "TARGET_SIMD"
  75   {
  76     operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
  77     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
  78   }
  79   [(set_attr "type" "neon_dup<q>")]
  80 )
  81
  82 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
  83   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
  84         (vec_duplicate:VALL_F16_NO_V2Q
  85           (vec_select:<VEL>
  86             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
  87             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  88           )))]
  89   "TARGET_SIMD"
  90   {
  91     operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
  92                                           INTVAL (operands[2])));
  93     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
  94   }
  95   [(set_attr "type" "neon_dup<q>")]
  96 )
  97
  98 (define_insn "*aarch64_simd_mov<mode>"
  99   [(set (match_operand:VD 0 "nonimmediate_operand"
 100                 "=w, m,  m,  w, ?r, ?w, ?r, w")
 101         (match_operand:VD 1 "general_operand"
 102                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 103   "TARGET_SIMD
 104    && (register_operand (operands[0], <MODE>mode)
 105        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 106 {
 107    switch (which_alternative)
 108      {
 109      case 0: return "ldr\t%d0, %1";
 110      case 1: return "str\txzr, %0";
 111      case 2: return "str\t%d1, %0";
 112      case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 113      case 4: return "umov\t%0, %1.d[0]";
 114      case 5: return "fmov\t%d0, %1";
 115      case 6: return "mov\t%0, %1";
 116      case 7:
 117         return aarch64_output_simd_mov_immediate (operands[1],
 118                                                   <MODE>mode, 64);
 119      default: gcc_unreachable ();
 120      }
 121 }
 122   [(set_attr "type" "neon_load1_1reg<q>, neon_stp, neon_store1_1reg<q>,\
 123                      neon_logic<q>, neon_to_gp<q>, f_mcr,\
 124                      mov_reg, neon_move<q>")]
 125 )
 126
 127 (define_insn "*aarch64_simd_mov<mode>"
 128   [(set (match_operand:VQ 0 "nonimmediate_operand"
 129                 "=w, Ump,  m,  w, ?r, ?w, ?r, w")
 130         (match_operand:VQ 1 "general_operand"
 131                 "m,  Dz, w,  w,  w,  r,  r, Dn"))]
 132   "TARGET_SIMD
 133    && (register_operand (operands[0], <MODE>mode)
 134        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 135 {
 136   switch (which_alternative)
 137     {
 138     case 0:
 139         return "ldr\t%q0, %1";
 140     case 1:
 141         return "stp\txzr, xzr, %0";
 142     case 2:
 143         return "str\t%q1, %0";
 144     case 3:
 145         return "mov\t%0.<Vbtype>, %1.<Vbtype>";
 146     case 4:
 147     case 5:
 148     case 6:
 149         return "#";
 150     case 7:
 151         return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
 152     default:
 153         gcc_unreachable ();
 154     }
 155 }
 156   [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
 157                      neon_stp, neon_logic<q>, multiple, multiple,\
 158                      multiple, neon_move<q>")
 159    (set_attr "length" "4,4,4,4,8,8,8,4")]
 160 )
 161
 162 ;; When storing lane zero we can use the normal STR and its more permissive
 163 ;; addressing modes.
 164
 165 (define_insn "aarch64_store_lane0<mode>"
 166   [(set (match_operand:<VEL> 0 "memory_operand" "=m")
 167         (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
 168                         (parallel [(match_operand 2 "const_int_operand" "n")])))]
 169   "TARGET_SIMD
 170    && ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])) == 0"
 171   "str\\t%<Vetype>1, %0"
 172   [(set_attr "type" "neon_store1_1reg<q>")]
 173 )
 174
 175 (define_insn "load_pair<mode>"
 176   [(set (match_operand:VD 0 "register_operand" "=w")
 177         (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
 178    (set (match_operand:VD 2 "register_operand" "=w")
 179         (match_operand:VD 3 "memory_operand" "m"))]
 180   "TARGET_SIMD
 181    && rtx_equal_p (XEXP (operands[3], 0),
 182                    plus_constant (Pmode,
 183                                   XEXP (operands[1], 0),
 184                                   GET_MODE_SIZE (<MODE>mode)))"
 185   "ldp\\t%d0, %d2, %1"
 186   [(set_attr "type" "neon_ldp")]
 187 )
 188
 189 (define_insn "store_pair<mode>"
 190   [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
 191         (match_operand:VD 1 "register_operand" "w"))
 192    (set (match_operand:VD 2 "memory_operand" "=m")
 193         (match_operand:VD 3 "register_operand" "w"))]
 194   "TARGET_SIMD
 195    && rtx_equal_p (XEXP (operands[2], 0),
 196                    plus_constant (Pmode,
 197                                   XEXP (operands[0], 0),
 198                                   GET_MODE_SIZE (<MODE>mode)))"
 199   "stp\\t%d1, %d3, %0"
 200   [(set_attr "type" "neon_stp")]
 201 )
 202
 203 (define_split
 204   [(set (match_operand:VQ 0 "register_operand" "")
 205       (match_operand:VQ 1 "register_operand" ""))]
 206   "TARGET_SIMD && reload_completed
 207    && GP_REGNUM_P (REGNO (operands[0]))
 208    && GP_REGNUM_P (REGNO (operands[1]))"
 209   [(const_int 0)]
 210 {
 211   aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
 212   DONE;
 213 })
 214
 215 (define_split
 216   [(set (match_operand:VQ 0 "register_operand" "")
 217         (match_operand:VQ 1 "register_operand" ""))]
 218   "TARGET_SIMD && reload_completed
 219    && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
 220        || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
 221   [(const_int 0)]
 222 {
 223   aarch64_split_simd_move (operands[0], operands[1]);
 224   DONE;
 225 })
 226
 227 (define_expand "aarch64_split_simd_mov<mode>"
 228   [(set (match_operand:VQ 0)
 229         (match_operand:VQ 1))]
 230   "TARGET_SIMD"
 231   {
 232     rtx dst = operands[0];
 233     rtx src = operands[1];
 234
 235     if (GP_REGNUM_P (REGNO (src)))
 236       {
 237         rtx src_low_part = gen_lowpart (<VHALF>mode, src);
 238         rtx src_high_part = gen_highpart (<VHALF>mode, src);
 239
 240         emit_insn
 241           (gen_move_lo_quad_<mode> (dst, src_low_part));
 242         emit_insn
 243           (gen_move_hi_quad_<mode> (dst, src_high_part));
 244       }
 245
 246     else
 247       {
 248         rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
 249         rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
 250         rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
 251         rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
 252
 253         emit_insn
 254           (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
 255         emit_insn
 256           (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
 257       }
 258     DONE;
 259   }
 260 )
 261
 262 (define_insn "aarch64_simd_mov_from_<mode>low"
 263   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
 264         (vec_select:<VHALF>
 265           (match_operand:VQ 1 "register_operand" "w")
 266           (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
 267   "TARGET_SIMD && reload_completed"
 268   "umov\t%0, %1.d[0]"
 269   [(set_attr "type" "neon_to_gp<q>")
 270    (set_attr "length" "4")
 271   ])
 272
 273 (define_insn "aarch64_simd_mov_from_<mode>high"
 274   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
 275         (vec_select:<VHALF>
 276           (match_operand:VQ 1 "register_operand" "w")
 277           (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
 278   "TARGET_SIMD && reload_completed"
 279   "umov\t%0, %1.d[1]"
 280   [(set_attr "type" "neon_to_gp<q>")
 281    (set_attr "length" "4")
 282   ])
 283
 284 (define_insn "orn<mode>3"
 285  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 286        (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 287                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 288  "TARGET_SIMD"
 289  "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 290   [(set_attr "type" "neon_logic<q>")]
 291 )
 292
 293 (define_insn "bic<mode>3"
 294  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 295        (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 296                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 297  "TARGET_SIMD"
 298  "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 299   [(set_attr "type" "neon_logic<q>")]
 300 )
 301
 302 (define_insn "add<mode>3"
 303   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 304         (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 305                   (match_operand:VDQ_I 2 "register_operand" "w")))]
 306   "TARGET_SIMD"
 307   "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 308   [(set_attr "type" "neon_add<q>")]
 309 )
 310
 311 (define_insn "sub<mode>3"
 312   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 313         (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 314                    (match_operand:VDQ_I 2 "register_operand" "w")))]
 315   "TARGET_SIMD"
 316   "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 317   [(set_attr "type" "neon_sub<q>")]
 318 )
 319
 320 (define_insn "mul<mode>3"
 321   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 322         (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
 323                    (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
 324   "TARGET_SIMD"
 325   "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 326   [(set_attr "type" "neon_mul_<Vetype><q>")]
 327 )
 328
 329 (define_insn "bswap<mode>2"
 330   [(set (match_operand:VDQHSD 0 "register_operand" "=w")
 331         (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
 332   "TARGET_SIMD"
 333   "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
 334   [(set_attr "type" "neon_rev<q>")]
 335 )
 336
 337 (define_insn "aarch64_rbit<mode>"
 338   [(set (match_operand:VB 0 "register_operand" "=w")
 339         (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
 340                    UNSPEC_RBIT))]
 341   "TARGET_SIMD"
 342   "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
 343   [(set_attr "type" "neon_rbit")]
 344 )
 345
 346 (define_expand "ctz<mode>2"
 347   [(set (match_operand:VS 0 "register_operand")
 348         (ctz:VS (match_operand:VS 1 "register_operand")))]
 349   "TARGET_SIMD"
 350   {
 351      emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
 352      rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
 353                                              <MODE>mode, 0);
 354      emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
 355      emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
 356      DONE;
 357   }
 358 )
 359
 360 (define_expand "xorsign<mode>3"
 361   [(match_operand:VHSDF 0 "register_operand")
 362    (match_operand:VHSDF 1 "register_operand")
 363    (match_operand:VHSDF 2 "register_operand")]
 364   "TARGET_SIMD"
 365 {
 366
 367   machine_mode imode = <V_INT_EQUIV>mode;
 368   rtx v_bitmask = gen_reg_rtx (imode);
 369   rtx op1x = gen_reg_rtx (imode);
 370   rtx op2x = gen_reg_rtx (imode);
 371
 372   rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
 373   rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
 374
 375   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 376
 377   emit_move_insn (v_bitmask,
 378                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 379                                                      HOST_WIDE_INT_M1U << bits));
 380
 381   emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
 382   emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
 383   emit_move_insn (operands[0],
 384                   lowpart_subreg (<MODE>mode, op1x, imode));
 385   DONE;
 386 }
 387 )
 388
 389 (define_expand "copysign<mode>3"
 390   [(match_operand:VHSDF 0 "register_operand")
 391    (match_operand:VHSDF 1 "register_operand")
 392    (match_operand:VHSDF 2 "register_operand")]
 393   "TARGET_FLOAT && TARGET_SIMD"
 394 {
 395   rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
 396   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 397
 398   emit_move_insn (v_bitmask,
 399                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 400                                                      HOST_WIDE_INT_M1U << bits));
 401   emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
 402                                          operands[2], operands[1]));
 403   DONE;
 404 }
 405 )
 406
 407 (define_insn "*aarch64_mul3_elt<mode>"
 408  [(set (match_operand:VMUL 0 "register_operand" "=w")
 409     (mult:VMUL
 410       (vec_duplicate:VMUL
 411           (vec_select:<VEL>
 412             (match_operand:VMUL 1 "register_operand" "<h_con>")
 413             (parallel [(match_operand:SI 2 "immediate_operand")])))
 414       (match_operand:VMUL 3 "register_operand" "w")))]
 415   "TARGET_SIMD"
 416   {
 417     operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
 418     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 419   }
 420   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 421 )
 422
 423 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
 424   [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
 425      (mult:VMUL_CHANGE_NLANES
 426        (vec_duplicate:VMUL_CHANGE_NLANES
 427           (vec_select:<VEL>
 428             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
 429             (parallel [(match_operand:SI 2 "immediate_operand")])))
 430       (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
 431   "TARGET_SIMD"
 432   {
 433     operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
 434                                           INTVAL (operands[2])));
 435     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 436   }
 437   [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
 438 )
 439
 440 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
 441  [(set (match_operand:VMUL 0 "register_operand" "=w")
 442     (mult:VMUL
 443       (vec_duplicate:VMUL
 444             (match_operand:<VEL> 1 "register_operand" "<h_con>"))
 445       (match_operand:VMUL 2 "register_operand" "w")))]
 446   "TARGET_SIMD"
 447   "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
 448   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 449 )
 450
 451 (define_insn "aarch64_rsqrte<mode>"
 452   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 453         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
 454                      UNSPEC_RSQRTE))]
 455   "TARGET_SIMD"
 456   "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 457   [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
 458
 459 (define_insn "aarch64_rsqrts<mode>"
 460   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 461         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
 462                             (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
 463          UNSPEC_RSQRTS))]
 464   "TARGET_SIMD"
 465   "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 466   [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
 467
 468 (define_expand "rsqrt<mode>2"
 469   [(set (match_operand:VALLF 0 "register_operand" "=w")
 470         (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
 471                      UNSPEC_RSQRT))]
 472   "TARGET_SIMD"
 473 {
 474   aarch64_emit_approx_sqrt (operands[0], operands[1], true);
 475   DONE;
 476 })
 477
 478 (define_insn "*aarch64_mul3_elt_to_64v2df"
 479   [(set (match_operand:DF 0 "register_operand" "=w")
 480      (mult:DF
 481        (vec_select:DF
 482          (match_operand:V2DF 1 "register_operand" "w")
 483          (parallel [(match_operand:SI 2 "immediate_operand")]))
 484        (match_operand:DF 3 "register_operand" "w")))]
 485   "TARGET_SIMD"
 486   {
 487     operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
 488     return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
 489   }
 490   [(set_attr "type" "neon_fp_mul_d_scalar_q")]
 491 )
 492
 493 (define_insn "neg<mode>2"
 494   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 495         (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 496   "TARGET_SIMD"
 497   "neg\t%0.<Vtype>, %1.<Vtype>"
 498   [(set_attr "type" "neon_neg<q>")]
 499 )
 500
 501 (define_insn "abs<mode>2"
 502   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 503         (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 504   "TARGET_SIMD"
 505   "abs\t%0.<Vtype>, %1.<Vtype>"
 506   [(set_attr "type" "neon_abs<q>")]
 507 )
 508
 509 ;; The intrinsic version of integer ABS must not be allowed to
 510 ;; combine with any operation with an integerated ABS step, such
 511 ;; as SABD.
 512 (define_insn "aarch64_abs<mode>"
 513   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
 514           (unspec:VSDQ_I_DI
 515             [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
 516            UNSPEC_ABS))]
 517   "TARGET_SIMD"
 518   "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 519   [(set_attr "type" "neon_abs<q>")]
 520 )
 521
 522 (define_insn "abd<mode>_3"
 523   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 524         (abs:VDQ_BHSI (minus:VDQ_BHSI
 525                        (match_operand:VDQ_BHSI 1 "register_operand" "w")
 526                        (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
 527   "TARGET_SIMD"
 528   "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 529   [(set_attr "type" "neon_abd<q>")]
 530 )
 531
 532 (define_insn "aba<mode>_3"
 533   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 534         (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
 535                          (match_operand:VDQ_BHSI 1 "register_operand" "w")
 536                          (match_operand:VDQ_BHSI 2 "register_operand" "w")))
 537                        (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
 538   "TARGET_SIMD"
 539   "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 540   [(set_attr "type" "neon_arith_acc<q>")]
 541 )
 542
 543 (define_insn "fabd<mode>3"
 544   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 545         (abs:VHSDF_HSDF
 546           (minus:VHSDF_HSDF
 547             (match_operand:VHSDF_HSDF 1 "register_operand" "w")
 548             (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
 549   "TARGET_SIMD"
 550   "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 551   [(set_attr "type" "neon_fp_abd_<stype><q>")]
 552 )
 553
 554 (define_insn "and<mode>3"
 555   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 556         (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 557                  (match_operand:VDQ_I 2 "register_operand" "w")))]
 558   "TARGET_SIMD"
 559   "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
 560   [(set_attr "type" "neon_logic<q>")]
 561 )
 562
 563 (define_insn "ior<mode>3"
 564   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 565         (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 566                  (match_operand:VDQ_I 2 "register_operand" "w")))]
 567   "TARGET_SIMD"
 568   "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
 569   [(set_attr "type" "neon_logic<q>")]
 570 )
 571
 572 (define_insn "xor<mode>3"
 573   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 574         (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 575                  (match_operand:VDQ_I 2 "register_operand" "w")))]
 576   "TARGET_SIMD"
 577   "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
 578   [(set_attr "type" "neon_logic<q>")]
 579 )
 580
 581 (define_insn "one_cmpl<mode>2"
 582   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 583         (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 584   "TARGET_SIMD"
 585   "not\t%0.<Vbtype>, %1.<Vbtype>"
 586   [(set_attr "type" "neon_logic<q>")]
 587 )
 588
 589 (define_insn "aarch64_simd_vec_set<mode>"
 590   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
 591         (vec_merge:VDQ_BHSI
 592             (vec_duplicate:VDQ_BHSI
 593                 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
 594             (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
 595             (match_operand:SI 2 "immediate_operand" "i,i,i")))]
 596   "TARGET_SIMD"
 597   {
 598    int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
 599    operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
 600    switch (which_alternative)
 601      {
 602      case 0:
 603         return "ins\\t%0.<Vetype>[%p2], %w1";
 604      case 1:
 605         return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
 606      case 2:
 607         return "ld1\\t{%0.<Vetype>}[%p2], %1";
 608      default:
 609         gcc_unreachable ();
 610      }
 611   }
 612   [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_one_lane<q>")]
 613 )
 614
 615 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
 616   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
 617         (vec_merge:VALL_F16
 618             (vec_duplicate:VALL_F16
 619               (vec_select:<VEL>
 620                 (match_operand:VALL_F16 3 "register_operand" "w")
 621                 (parallel
 622                   [(match_operand:SI 4 "immediate_operand" "i")])))
 623             (match_operand:VALL_F16 1 "register_operand" "0")
 624             (match_operand:SI 2 "immediate_operand" "i")))]
 625   "TARGET_SIMD"
 626   {
 627     int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
 628     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
 629     operands[4] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[4])));
 630
 631     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
 632   }
 633   [(set_attr "type" "neon_ins<q>")]
 634 )
 635
 636 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
 637   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
 638         (vec_merge:VALL_F16_NO_V2Q
 639             (vec_duplicate:VALL_F16_NO_V2Q
 640               (vec_select:<VEL>
 641                 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
 642                 (parallel
 643                   [(match_operand:SI 4 "immediate_operand" "i")])))
 644             (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
 645             (match_operand:SI 2 "immediate_operand" "i")))]
 646   "TARGET_SIMD"
 647   {
 648     int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
 649     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
 650     operands[4] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
 651                            INTVAL (operands[4])));
 652
 653     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
 654   }
 655   [(set_attr "type" "neon_ins<q>")]
 656 )
 657
 658 (define_insn "aarch64_simd_lshr<mode>"
 659  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 660        (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 661                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
 662  "TARGET_SIMD"
 663  "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
 664   [(set_attr "type" "neon_shift_imm<q>")]
 665 )
 666
 667 (define_insn "aarch64_simd_ashr<mode>"
 668  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 669        (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 670                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
 671  "TARGET_SIMD"
 672  "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
 673   [(set_attr "type" "neon_shift_imm<q>")]
 674 )
 675
 676 (define_insn "aarch64_simd_imm_shl<mode>"
 677  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 678        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 679                    (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm" "Dl")))]
 680  "TARGET_SIMD"
 681   "shl\t%0.<Vtype>, %1.<Vtype>, %2"
 682   [(set_attr "type" "neon_shift_imm<q>")]
 683 )
 684
 685 (define_insn "aarch64_simd_reg_sshl<mode>"
 686  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 687        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 688                    (match_operand:VDQ_I 2 "register_operand" "w")))]
 689  "TARGET_SIMD"
 690  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 691   [(set_attr "type" "neon_shift_reg<q>")]
 692 )
 693
 694 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
 695  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 696        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
 697                     (match_operand:VDQ_I 2 "register_operand" "w")]
 698                    UNSPEC_ASHIFT_UNSIGNED))]
 699  "TARGET_SIMD"
 700  "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 701   [(set_attr "type" "neon_shift_reg<q>")]
 702 )
 703
 704 (define_insn "aarch64_simd_reg_shl<mode>_signed"
 705  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 706        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
 707                     (match_operand:VDQ_I 2 "register_operand" "w")]
 708                    UNSPEC_ASHIFT_SIGNED))]
 709  "TARGET_SIMD"
 710  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 711   [(set_attr "type" "neon_shift_reg<q>")]
 712 )
 713
 714 (define_expand "ashl<mode>3"
 715   [(match_operand:VDQ_I 0 "register_operand" "")
 716    (match_operand:VDQ_I 1 "register_operand" "")
 717    (match_operand:SI  2 "general_operand" "")]
 718  "TARGET_SIMD"
 719 {
 720   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
 721   int shift_amount;
 722
 723   if (CONST_INT_P (operands[2]))
 724     {
 725       shift_amount = INTVAL (operands[2]);
 726       if (shift_amount >= 0 && shift_amount < bit_width)
 727         {
 728           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 729                                                        shift_amount);
 730           emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
 731                                                      operands[1],
 732                                                      tmp));
 733           DONE;
 734         }
 735       else
 736         {
 737           operands[2] = force_reg (SImode, operands[2]);
 738         }
 739     }
 740   else if (MEM_P (operands[2]))
 741     {
 742       operands[2] = force_reg (SImode, operands[2]);
 743     }
 744
 745   if (REG_P (operands[2]))
 746     {
 747       rtx tmp = gen_reg_rtx (<MODE>mode);
 748       emit_insn (gen_aarch64_simd_dup<mode> (tmp,
 749                                              convert_to_mode (<VEL>mode,
 750                                                               operands[2],
 751                                                               0)));
 752       emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
 753                                                   tmp));
 754       DONE;
 755     }
 756   else
 757     FAIL;
 758 }
 759 )
 760
 761 (define_expand "lshr<mode>3"
 762   [(match_operand:VDQ_I 0 "register_operand" "")
 763    (match_operand:VDQ_I 1 "register_operand" "")
 764    (match_operand:SI  2 "general_operand" "")]
 765  "TARGET_SIMD"
 766 {
 767   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
 768   int shift_amount;
 769
 770   if (CONST_INT_P (operands[2]))
 771     {
 772       shift_amount = INTVAL (operands[2]);
 773       if (shift_amount > 0 && shift_amount <= bit_width)
 774         {
 775           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 776                                                        shift_amount);
 777           emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
 778                                                   operands[1],
 779                                                   tmp));
 780           DONE;
 781         }
 782       else
 783         operands[2] = force_reg (SImode, operands[2]);
 784     }
 785   else if (MEM_P (operands[2]))
 786     {
 787       operands[2] = force_reg (SImode, operands[2]);
 788     }
 789
 790   if (REG_P (operands[2]))
 791     {
 792       rtx tmp = gen_reg_rtx (SImode);
 793       rtx tmp1 = gen_reg_rtx (<MODE>mode);
 794       emit_insn (gen_negsi2 (tmp, operands[2]));
 795       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
 796                                              convert_to_mode (<VEL>mode,
 797                                                               tmp, 0)));
 798       emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
 799                                                           operands[1],
 800                                                           tmp1));
 801       DONE;
 802     }
 803   else
 804     FAIL;
 805 }
 806 )
 807
 808 (define_expand "ashr<mode>3"
 809   [(match_operand:VDQ_I 0 "register_operand" "")
 810    (match_operand:VDQ_I 1 "register_operand" "")
 811    (match_operand:SI  2 "general_operand" "")]
 812  "TARGET_SIMD"
 813 {
 814   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
 815   int shift_amount;
 816
 817   if (CONST_INT_P (operands[2]))
 818     {
 819       shift_amount = INTVAL (operands[2]);
 820       if (shift_amount > 0 && shift_amount <= bit_width)
 821         {
 822           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 823                                                        shift_amount);
 824           emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
 825                                                   operands[1],
 826                                                   tmp));
 827           DONE;
 828         }
 829       else
 830         operands[2] = force_reg (SImode, operands[2]);
 831     }
 832   else if (MEM_P (operands[2]))
 833     {
 834       operands[2] = force_reg (SImode, operands[2]);
 835     }
 836
 837   if (REG_P (operands[2]))
 838     {
 839       rtx tmp = gen_reg_rtx (SImode);
 840       rtx tmp1 = gen_reg_rtx (<MODE>mode);
 841       emit_insn (gen_negsi2 (tmp, operands[2]));
 842       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
 843                                              convert_to_mode (<VEL>mode,
 844                                                               tmp, 0)));
 845       emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
 846                                                         operands[1],
 847                                                         tmp1));
 848       DONE;
 849     }
 850   else
 851     FAIL;
 852 }
 853 )
 854
 855 (define_expand "vashl<mode>3"
 856  [(match_operand:VDQ_I 0 "register_operand" "")
 857   (match_operand:VDQ_I 1 "register_operand" "")
 858   (match_operand:VDQ_I 2 "register_operand" "")]
 859  "TARGET_SIMD"
 860 {
 861   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
 862                                               operands[2]));
 863   DONE;
 864 })
 865
 866 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
 867 ;; Negating individual lanes most certainly offsets the
 868 ;; gain from vectorization.
 869 (define_expand "vashr<mode>3"
 870  [(match_operand:VDQ_BHSI 0 "register_operand" "")
 871   (match_operand:VDQ_BHSI 1 "register_operand" "")
 872   (match_operand:VDQ_BHSI 2 "register_operand" "")]
 873  "TARGET_SIMD"
 874 {
 875   rtx neg = gen_reg_rtx (<MODE>mode);
 876   emit (gen_neg<mode>2 (neg, operands[2]));
 877   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
 878                                                     neg));
 879   DONE;
 880 })
 881
 882 ;; DI vector shift
 883 (define_expand "aarch64_ashr_simddi"
 884   [(match_operand:DI 0 "register_operand" "=w")
 885    (match_operand:DI 1 "register_operand" "w")
 886    (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
 887   "TARGET_SIMD"
 888   {
 889     /* An arithmetic shift right by 64 fills the result with copies of the sign
 890        bit, just like asr by 63 - however the standard pattern does not handle
 891        a shift by 64.  */
 892     if (INTVAL (operands[2]) == 64)
 893       operands[2] = GEN_INT (63);
 894     emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
 895     DONE;
 896   }
 897 )
 898
 899 (define_expand "vlshr<mode>3"
 900  [(match_operand:VDQ_BHSI 0 "register_operand" "")
 901   (match_operand:VDQ_BHSI 1 "register_operand" "")
 902   (match_operand:VDQ_BHSI 2 "register_operand" "")]
 903  "TARGET_SIMD"
 904 {
 905   rtx neg = gen_reg_rtx (<MODE>mode);
 906   emit (gen_neg<mode>2 (neg, operands[2]));
 907   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
 908                                                       neg));
 909   DONE;
 910 })
 911
 912 (define_expand "aarch64_lshr_simddi"
 913   [(match_operand:DI 0 "register_operand" "=w")
 914    (match_operand:DI 1 "register_operand" "w")
 915    (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
 916   "TARGET_SIMD"
 917   {
 918     if (INTVAL (operands[2]) == 64)
 919       emit_move_insn (operands[0], const0_rtx);
 920     else
 921       emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
 922     DONE;
 923   }
 924 )
 925
 926 (define_expand "vec_set<mode>"
 927   [(match_operand:VDQ_BHSI 0 "register_operand")
 928    (match_operand:<VEL> 1 "register_operand")
 929    (match_operand:SI 2 "immediate_operand")]
 930   "TARGET_SIMD"
 931   {
 932     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
 933     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
 934                                             GEN_INT (elem), operands[0]));
 935     DONE;
 936   }
 937 )
 938
 939 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
 940 (define_insn "vec_shr_<mode>"
 941   [(set (match_operand:VD 0 "register_operand" "=w")
 942         (unspec:VD [(match_operand:VD 1 "register_operand" "w")
 943                     (match_operand:SI 2 "immediate_operand" "i")]
 944                    UNSPEC_VEC_SHR))]
 945   "TARGET_SIMD"
 946   {
 947     if (BYTES_BIG_ENDIAN)
 948       return "shl %d0, %d1, %2";
 949     else
 950       return "ushr %d0, %d1, %2";
 951   }
 952   [(set_attr "type" "neon_shift_imm")]
 953 )
 954
 955 (define_insn "aarch64_simd_vec_setv2di"
 956   [(set (match_operand:V2DI 0 "register_operand" "=w,w")
 957         (vec_merge:V2DI
 958             (vec_duplicate:V2DI
 959                 (match_operand:DI 1 "register_operand" "r,w"))
 960             (match_operand:V2DI 3 "register_operand" "0,0")
 961             (match_operand:SI 2 "immediate_operand" "i,i")))]
 962   "TARGET_SIMD"
 963   {
 964     int elt = ENDIAN_LANE_N (V2DImode, exact_log2 (INTVAL (operands[2])));
 965     operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
 966     switch (which_alternative)
 967       {
 968       case 0:
 969         return "ins\\t%0.d[%p2], %1";
 970       case 1:
 971         return "ins\\t%0.d[%p2], %1.d[0]";
 972       default:
 973         gcc_unreachable ();
 974       }
 975   }
 976   [(set_attr "type" "neon_from_gp, neon_ins_q")]
 977 )
 978
 979 (define_expand "vec_setv2di"
 980   [(match_operand:V2DI 0 "register_operand")
 981    (match_operand:DI 1 "register_operand")
 982    (match_operand:SI 2 "immediate_operand")]
 983   "TARGET_SIMD"
 984   {
 985     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
 986     emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
 987                                           GEN_INT (elem), operands[0]));
 988     DONE;
 989   }
 990 )
 991
 992 (define_insn "aarch64_simd_vec_set<mode>"
 993   [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
 994         (vec_merge:VDQF_F16
 995             (vec_duplicate:VDQF_F16
 996                 (match_operand:<VEL> 1 "register_operand" "w"))
 997             (match_operand:VDQF_F16 3 "register_operand" "0")
 998             (match_operand:SI 2 "immediate_operand" "i")))]
 999   "TARGET_SIMD"
1000   {
1001     int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
1002
1003     operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
1004     return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
1005   }
1006   [(set_attr "type" "neon_ins<q>")]
1007 )
1008
1009 (define_expand "vec_set<mode>"
1010   [(match_operand:VDQF_F16 0 "register_operand" "+w")
1011    (match_operand:<VEL> 1 "register_operand" "w")
1012    (match_operand:SI 2 "immediate_operand" "")]
1013   "TARGET_SIMD"
1014   {
1015     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1016     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1017                                           GEN_INT (elem), operands[0]));
1018     DONE;
1019   }
1020 )
1021
1022
1023 (define_insn "aarch64_mla<mode>"
1024  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1025        (plus:VDQ_BHSI (mult:VDQ_BHSI
1026                         (match_operand:VDQ_BHSI 2 "register_operand" "w")
1027                         (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1028                       (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1029  "TARGET_SIMD"
1030  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1031   [(set_attr "type" "neon_mla_<Vetype><q>")]
1032 )
1033
1034 (define_insn "*aarch64_mla_elt<mode>"
1035  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1036        (plus:VDQHS
1037          (mult:VDQHS
1038            (vec_duplicate:VDQHS
1039               (vec_select:<VEL>
1040                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1041                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1042            (match_operand:VDQHS 3 "register_operand" "w"))
1043          (match_operand:VDQHS 4 "register_operand" "0")))]
1044  "TARGET_SIMD"
1045   {
1046     operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1047     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1048   }
1049   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1050 )
1051
1052 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1053  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1054        (plus:VDQHS
1055          (mult:VDQHS
1056            (vec_duplicate:VDQHS
1057               (vec_select:<VEL>
1058                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1059                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1060            (match_operand:VDQHS 3 "register_operand" "w"))
1061          (match_operand:VDQHS 4 "register_operand" "0")))]
1062  "TARGET_SIMD"
1063   {
1064     operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1065                                           INTVAL (operands[2])));
1066     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1067   }
1068   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1069 )
1070
1071 (define_insn "*aarch64_mla_elt_merge<mode>"
1072   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1073         (plus:VDQHS
1074           (mult:VDQHS (vec_duplicate:VDQHS
1075                   (match_operand:<VEL> 1 "register_operand" "w"))
1076                 (match_operand:VDQHS 2 "register_operand" "w"))
1077           (match_operand:VDQHS 3 "register_operand" "0")))]
1078  "TARGET_SIMD"
1079  "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1080   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1081 )
1082
1083 (define_insn "aarch64_mls<mode>"
1084  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1085        (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1086                    (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1087                               (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1088  "TARGET_SIMD"
1089  "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1090   [(set_attr "type" "neon_mla_<Vetype><q>")]
1091 )
1092
1093 (define_insn "*aarch64_mls_elt<mode>"
1094  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1095        (minus:VDQHS
1096          (match_operand:VDQHS 4 "register_operand" "0")
1097          (mult:VDQHS
1098            (vec_duplicate:VDQHS
1099               (vec_select:<VEL>
1100                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1101                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1102            (match_operand:VDQHS 3 "register_operand" "w"))))]
1103  "TARGET_SIMD"
1104   {
1105     operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1106     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1107   }
1108   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1109 )
1110
1111 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1112  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1113        (minus:VDQHS
1114          (match_operand:VDQHS 4 "register_operand" "0")
1115          (mult:VDQHS
1116            (vec_duplicate:VDQHS
1117               (vec_select:<VEL>
1118                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1119                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1120            (match_operand:VDQHS 3 "register_operand" "w"))))]
1121  "TARGET_SIMD"
1122   {
1123     operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1124                                           INTVAL (operands[2])));
1125     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1126   }
1127   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1128 )
1129
1130 (define_insn "*aarch64_mls_elt_merge<mode>"
1131   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1132         (minus:VDQHS
1133           (match_operand:VDQHS 1 "register_operand" "0")
1134           (mult:VDQHS (vec_duplicate:VDQHS
1135                   (match_operand:<VEL> 2 "register_operand" "w"))
1136                 (match_operand:VDQHS 3 "register_operand" "w"))))]
1137   "TARGET_SIMD"
1138   "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1139   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1140 )
1141
1142 ;; Max/Min operations.
1143 (define_insn "<su><maxmin><mode>3"
1144  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1145        (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1146                     (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1147  "TARGET_SIMD"
1148  "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1149   [(set_attr "type" "neon_minmax<q>")]
1150 )
1151
1152 (define_expand "<su><maxmin>v2di3"
1153  [(set (match_operand:V2DI 0 "register_operand" "")
1154        (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1155                     (match_operand:V2DI 2 "register_operand" "")))]
1156  "TARGET_SIMD"
1157 {
1158   enum rtx_code cmp_operator;
1159   rtx cmp_fmt;
1160
1161   switch (<CODE>)
1162     {
1163     case UMIN:
1164       cmp_operator = LTU;
1165       break;
1166     case SMIN:
1167       cmp_operator = LT;
1168       break;
1169     case UMAX:
1170       cmp_operator = GTU;
1171       break;
1172     case SMAX:
1173       cmp_operator = GT;
1174       break;
1175     default:
1176       gcc_unreachable ();
1177     }
1178
1179   cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1180   emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1181               operands[2], cmp_fmt, operands[1], operands[2]));
1182   DONE;
1183 })
1184
1185 ;; Pairwise Integer Max/Min operations.
1186 (define_insn "aarch64_<maxmin_uns>p<mode>"
1187  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1188        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1189                          (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1190                         MAXMINV))]
1191  "TARGET_SIMD"
1192  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1193   [(set_attr "type" "neon_minmax<q>")]
1194 )
1195
1196 ;; Pairwise FP Max/Min operations.
1197 (define_insn "aarch64_<maxmin_uns>p<mode>"
1198  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1199        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1200                       (match_operand:VHSDF 2 "register_operand" "w")]
1201                       FMAXMINV))]
1202  "TARGET_SIMD"
1203  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1204   [(set_attr "type" "neon_minmax<q>")]
1205 )
1206
1207 ;; vec_concat gives a new vector with the low elements from operand 1, and
1208 ;; the high elements from operand 2.  That is to say, given op1 = { a, b }
1209 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1210 ;; What that means, is that the RTL descriptions of the below patterns
1211 ;; need to change depending on endianness.
1212
1213 ;; Move to the low architectural bits of the register.
1214 ;; On little-endian this is { operand, zeroes }
1215 ;; On big-endian this is { zeroes, operand }
1216
1217 (define_insn "move_lo_quad_internal_<mode>"
1218   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1219         (vec_concat:VQ_NO2E
1220           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1221           (vec_duplicate:<VHALF> (const_int 0))))]
1222   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1223   "@
1224    dup\\t%d0, %1.d[0]
1225    fmov\\t%d0, %1
1226    dup\\t%d0, %1"
1227   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1228    (set_attr "simd" "yes,*,yes")
1229    (set_attr "fp" "*,yes,*")
1230    (set_attr "length" "4")]
1231 )
1232
1233 (define_insn "move_lo_quad_internal_<mode>"
1234   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1235         (vec_concat:VQ_2E
1236           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1237           (const_int 0)))]
1238   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1239   "@
1240    dup\\t%d0, %1.d[0]
1241    fmov\\t%d0, %1
1242    dup\\t%d0, %1"
1243   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1244    (set_attr "simd" "yes,*,yes")
1245    (set_attr "fp" "*,yes,*")
1246    (set_attr "length" "4")]
1247 )
1248
1249 (define_insn "move_lo_quad_internal_be_<mode>"
1250   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1251         (vec_concat:VQ_NO2E
1252           (vec_duplicate:<VHALF> (const_int 0))
1253           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1254   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1255   "@
1256    dup\\t%d0, %1.d[0]
1257    fmov\\t%d0, %1
1258    dup\\t%d0, %1"
1259   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1260    (set_attr "simd" "yes,*,yes")
1261    (set_attr "fp" "*,yes,*")
1262    (set_attr "length" "4")]
1263 )
1264
1265 (define_insn "move_lo_quad_internal_be_<mode>"
1266   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1267         (vec_concat:VQ_2E
1268           (const_int 0)
1269           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1270   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1271   "@
1272    dup\\t%d0, %1.d[0]
1273    fmov\\t%d0, %1
1274    dup\\t%d0, %1"
1275   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1276    (set_attr "simd" "yes,*,yes")
1277    (set_attr "fp" "*,yes,*")
1278    (set_attr "length" "4")]
1279 )
1280
1281 (define_expand "move_lo_quad_<mode>"
1282   [(match_operand:VQ 0 "register_operand")
1283    (match_operand:VQ 1 "register_operand")]
1284   "TARGET_SIMD"
1285 {
1286   if (BYTES_BIG_ENDIAN)
1287     emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1288   else
1289     emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1290   DONE;
1291 }
1292 )
1293
1294 ;; Move operand1 to the high architectural bits of the register, keeping
1295 ;; the low architectural bits of operand2.
1296 ;; For little-endian this is { operand2, operand1 }
1297 ;; For big-endian this is { operand1, operand2 }
1298
1299 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1300   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1301         (vec_concat:VQ
1302           (vec_select:<VHALF>
1303                 (match_dup 0)
1304                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1305           (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1306   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1307   "@
1308    ins\\t%0.d[1], %1.d[0]
1309    ins\\t%0.d[1], %1"
1310   [(set_attr "type" "neon_ins")]
1311 )
1312
1313 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1314   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1315         (vec_concat:VQ
1316           (match_operand:<VHALF> 1 "register_operand" "w,r")
1317           (vec_select:<VHALF>
1318                 (match_dup 0)
1319                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1320   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1321   "@
1322    ins\\t%0.d[1], %1.d[0]
1323    ins\\t%0.d[1], %1"
1324   [(set_attr "type" "neon_ins")]
1325 )
1326
1327 (define_expand "move_hi_quad_<mode>"
1328  [(match_operand:VQ 0 "register_operand" "")
1329   (match_operand:<VHALF> 1 "register_operand" "")]
1330  "TARGET_SIMD"
1331 {
1332   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1333   if (BYTES_BIG_ENDIAN)
1334     emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1335                     operands[1], p));
1336   else
1337     emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1338                     operands[1], p));
1339   DONE;
1340 })
1341
1342 ;; Narrowing operations.
1343
1344 ;; For doubles.
1345 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1346  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1347        (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1348  "TARGET_SIMD"
1349  "xtn\\t%0.<Vntype>, %1.<Vtype>"
1350   [(set_attr "type" "neon_shift_imm_narrow_q")]
1351 )
1352
1353 (define_expand "vec_pack_trunc_<mode>"
1354  [(match_operand:<VNARROWD> 0 "register_operand" "")
1355   (match_operand:VDN 1 "register_operand" "")
1356   (match_operand:VDN 2 "register_operand" "")]
1357  "TARGET_SIMD"
1358 {
1359   rtx tempreg = gen_reg_rtx (<VDBL>mode);
1360   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1361   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1362
1363   emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1364   emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1365   emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1366   DONE;
1367 })
1368
1369 ;; For quads.
1370
1371 (define_insn "vec_pack_trunc_<mode>"
1372  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1373        (vec_concat:<VNARROWQ2>
1374          (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1375          (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1376  "TARGET_SIMD"
1377  {
1378    if (BYTES_BIG_ENDIAN)
1379      return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1380    else
1381      return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1382  }
1383   [(set_attr "type" "multiple")
1384    (set_attr "length" "8")]
1385 )
1386
1387 ;; Widening operations.
1388
1389 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1390   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1391         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1392                                (match_operand:VQW 1 "register_operand" "w")
1393                                (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1394                             )))]
1395   "TARGET_SIMD"
1396   "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1397   [(set_attr "type" "neon_shift_imm_long")]
1398 )
1399
1400 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1401   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1402         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1403                                (match_operand:VQW 1 "register_operand" "w")
1404                                (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1405                             )))]
1406   "TARGET_SIMD"
1407   "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1408   [(set_attr "type" "neon_shift_imm_long")]
1409 )
1410
1411 (define_expand "vec_unpack<su>_hi_<mode>"
1412   [(match_operand:<VWIDE> 0 "register_operand" "")
1413    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1414   "TARGET_SIMD"
1415   {
1416     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1417     emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1418                                                           operands[1], p));
1419     DONE;
1420   }
1421 )
1422
1423 (define_expand "vec_unpack<su>_lo_<mode>"
1424   [(match_operand:<VWIDE> 0 "register_operand" "")
1425    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1426   "TARGET_SIMD"
1427   {
1428     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1429     emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1430                                                           operands[1], p));
1431     DONE;
1432   }
1433 )
1434
1435 ;; Widening arithmetic.
1436
1437 (define_insn "*aarch64_<su>mlal_lo<mode>"
1438   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1439         (plus:<VWIDE>
1440           (mult:<VWIDE>
1441               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1442                  (match_operand:VQW 2 "register_operand" "w")
1443                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1444               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1445                  (match_operand:VQW 4 "register_operand" "w")
1446                  (match_dup 3))))
1447           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1448   "TARGET_SIMD"
1449   "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1450   [(set_attr "type" "neon_mla_<Vetype>_long")]
1451 )
1452
1453 (define_insn "*aarch64_<su>mlal_hi<mode>"
1454   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1455         (plus:<VWIDE>
1456           (mult:<VWIDE>
1457               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1458                  (match_operand:VQW 2 "register_operand" "w")
1459                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1460               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1461                  (match_operand:VQW 4 "register_operand" "w")
1462                  (match_dup 3))))
1463           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1464   "TARGET_SIMD"
1465   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1466   [(set_attr "type" "neon_mla_<Vetype>_long")]
1467 )
1468
1469 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1470   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1471         (minus:<VWIDE>
1472           (match_operand:<VWIDE> 1 "register_operand" "0")
1473           (mult:<VWIDE>
1474               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1475                  (match_operand:VQW 2 "register_operand" "w")
1476                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1477               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1478                  (match_operand:VQW 4 "register_operand" "w")
1479                  (match_dup 3))))))]
1480   "TARGET_SIMD"
1481   "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1482   [(set_attr "type" "neon_mla_<Vetype>_long")]
1483 )
1484
1485 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1486   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1487         (minus:<VWIDE>
1488           (match_operand:<VWIDE> 1 "register_operand" "0")
1489           (mult:<VWIDE>
1490               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1491                  (match_operand:VQW 2 "register_operand" "w")
1492                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1493               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1494                  (match_operand:VQW 4 "register_operand" "w")
1495                  (match_dup 3))))))]
1496   "TARGET_SIMD"
1497   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1498   [(set_attr "type" "neon_mla_<Vetype>_long")]
1499 )
1500
1501 (define_insn "*aarch64_<su>mlal<mode>"
1502   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1503         (plus:<VWIDE>
1504           (mult:<VWIDE>
1505             (ANY_EXTEND:<VWIDE>
1506               (match_operand:VD_BHSI 1 "register_operand" "w"))
1507             (ANY_EXTEND:<VWIDE>
1508               (match_operand:VD_BHSI 2 "register_operand" "w")))
1509           (match_operand:<VWIDE> 3 "register_operand" "0")))]
1510   "TARGET_SIMD"
1511   "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1512   [(set_attr "type" "neon_mla_<Vetype>_long")]
1513 )
1514
1515 (define_insn "*aarch64_<su>mlsl<mode>"
1516   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1517         (minus:<VWIDE>
1518           (match_operand:<VWIDE> 1 "register_operand" "0")
1519           (mult:<VWIDE>
1520             (ANY_EXTEND:<VWIDE>
1521               (match_operand:VD_BHSI 2 "register_operand" "w"))
1522             (ANY_EXTEND:<VWIDE>
1523               (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1524   "TARGET_SIMD"
1525   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1526   [(set_attr "type" "neon_mla_<Vetype>_long")]
1527 )
1528
1529 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1530  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1531        (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1532                            (match_operand:VQW 1 "register_operand" "w")
1533                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1534                      (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1535                            (match_operand:VQW 2 "register_operand" "w")
1536                            (match_dup 3)))))]
1537   "TARGET_SIMD"
1538   "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1539   [(set_attr "type" "neon_mul_<Vetype>_long")]
1540 )
1541
1542 (define_expand "vec_widen_<su>mult_lo_<mode>"
1543   [(match_operand:<VWIDE> 0 "register_operand" "")
1544    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1545    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1546  "TARGET_SIMD"
1547  {
1548    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1549    emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1550                                                        operands[1],
1551                                                        operands[2], p));
1552    DONE;
1553  }
1554 )
1555
1556 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1557  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1558       (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1559                             (match_operand:VQW 1 "register_operand" "w")
1560                             (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1561                     (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1562                             (match_operand:VQW 2 "register_operand" "w")
1563                             (match_dup 3)))))]
1564   "TARGET_SIMD"
1565   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1566   [(set_attr "type" "neon_mul_<Vetype>_long")]
1567 )
1568
1569 (define_expand "vec_widen_<su>mult_hi_<mode>"
1570   [(match_operand:<VWIDE> 0 "register_operand" "")
1571    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1572    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1573  "TARGET_SIMD"
1574  {
1575    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1576    emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1577                                                        operands[1],
1578                                                        operands[2], p));
1579    DONE;
1580
1581  }
1582 )
1583
1584 ;; FP vector operations.
1585 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1586 ;; double-precision (64-bit) floating-point data types and arithmetic as
1587 ;; defined by the IEEE 754-2008 standard.  This makes them vectorizable
1588 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1589 ;;
1590 ;; Floating-point operations can raise an exception.  Vectorizing such
1591 ;; operations are safe because of reasons explained below.
1592 ;;
1593 ;; ARMv8 permits an extension to enable trapped floating-point
1594 ;; exception handling, however this is an optional feature.  In the
1595 ;; event of a floating-point exception being raised by vectorised
1596 ;; code then:
1597 ;; 1.  If trapped floating-point exceptions are available, then a trap
1598 ;;     will be taken when any lane raises an enabled exception.  A trap
1599 ;;     handler may determine which lane raised the exception.
1600 ;; 2.  Alternatively a sticky exception flag is set in the
1601 ;;     floating-point status register (FPSR).  Software may explicitly
1602 ;;     test the exception flags, in which case the tests will either
1603 ;;     prevent vectorisation, allowing precise identification of the
1604 ;;     failing operation, or if tested outside of vectorisable regions
1605 ;;     then the specific operation and lane are not of interest.
1606
1607 ;; FP arithmetic operations.
1608
1609 (define_insn "add<mode>3"
1610  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1611        (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1612                    (match_operand:VHSDF 2 "register_operand" "w")))]
1613  "TARGET_SIMD"
1614  "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1615   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1616 )
1617
1618 (define_insn "sub<mode>3"
1619  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1620        (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1621                     (match_operand:VHSDF 2 "register_operand" "w")))]
1622  "TARGET_SIMD"
1623  "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1624   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1625 )
1626
1627 (define_insn "mul<mode>3"
1628  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1629        (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1630                    (match_operand:VHSDF 2 "register_operand" "w")))]
1631  "TARGET_SIMD"
1632  "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1633   [(set_attr "type" "neon_fp_mul_<stype><q>")]
1634 )
1635
1636 (define_expand "div<mode>3"
1637  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1638        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1639                   (match_operand:VHSDF 2 "register_operand" "w")))]
1640  "TARGET_SIMD"
1641 {
1642   if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1643     DONE;
1644
1645   operands[1] = force_reg (<MODE>mode, operands[1]);
1646 })
1647
1648 (define_insn "*div<mode>3"
1649  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1650        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1651                  (match_operand:VHSDF 2 "register_operand" "w")))]
1652  "TARGET_SIMD"
1653  "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1654   [(set_attr "type" "neon_fp_div_<stype><q>")]
1655 )
1656
1657 (define_insn "neg<mode>2"
1658  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1659        (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1660  "TARGET_SIMD"
1661  "fneg\\t%0.<Vtype>, %1.<Vtype>"
1662   [(set_attr "type" "neon_fp_neg_<stype><q>")]
1663 )
1664
1665 (define_insn "abs<mode>2"
1666  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1667        (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1668  "TARGET_SIMD"
1669  "fabs\\t%0.<Vtype>, %1.<Vtype>"
1670   [(set_attr "type" "neon_fp_abs_<stype><q>")]
1671 )
1672
1673 (define_insn "fma<mode>4"
1674   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1675        (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1676                   (match_operand:VHSDF 2 "register_operand" "w")
1677                   (match_operand:VHSDF 3 "register_operand" "0")))]
1678   "TARGET_SIMD"
1679  "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1680   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1681 )
1682
1683 (define_insn "*aarch64_fma4_elt<mode>"
1684   [(set (match_operand:VDQF 0 "register_operand" "=w")
1685     (fma:VDQF
1686       (vec_duplicate:VDQF
1687         (vec_select:<VEL>
1688           (match_operand:VDQF 1 "register_operand" "<h_con>")
1689           (parallel [(match_operand:SI 2 "immediate_operand")])))
1690       (match_operand:VDQF 3 "register_operand" "w")
1691       (match_operand:VDQF 4 "register_operand" "0")))]
1692   "TARGET_SIMD"
1693   {
1694     operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1695     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1696   }
1697   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1698 )
1699
1700 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1701   [(set (match_operand:VDQSF 0 "register_operand" "=w")
1702     (fma:VDQSF
1703       (vec_duplicate:VDQSF
1704         (vec_select:<VEL>
1705           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1706           (parallel [(match_operand:SI 2 "immediate_operand")])))
1707       (match_operand:VDQSF 3 "register_operand" "w")
1708       (match_operand:VDQSF 4 "register_operand" "0")))]
1709   "TARGET_SIMD"
1710   {
1711     operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1712                                           INTVAL (operands[2])));
1713     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1714   }
1715   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1716 )
1717
1718 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1719   [(set (match_operand:VMUL 0 "register_operand" "=w")
1720     (fma:VMUL
1721       (vec_duplicate:VMUL
1722           (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1723       (match_operand:VMUL 2 "register_operand" "w")
1724       (match_operand:VMUL 3 "register_operand" "0")))]
1725   "TARGET_SIMD"
1726   "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1727   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1728 )
1729
1730 (define_insn "*aarch64_fma4_elt_to_64v2df"
1731   [(set (match_operand:DF 0 "register_operand" "=w")
1732     (fma:DF
1733         (vec_select:DF
1734           (match_operand:V2DF 1 "register_operand" "w")
1735           (parallel [(match_operand:SI 2 "immediate_operand")]))
1736       (match_operand:DF 3 "register_operand" "w")
1737       (match_operand:DF 4 "register_operand" "0")))]
1738   "TARGET_SIMD"
1739   {
1740     operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1741     return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1742   }
1743   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1744 )
1745
1746 (define_insn "fnma<mode>4"
1747   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1748         (fma:VHSDF
1749           (match_operand:VHSDF 1 "register_operand" "w")
1750           (neg:VHSDF
1751             (match_operand:VHSDF 2 "register_operand" "w"))
1752           (match_operand:VHSDF 3 "register_operand" "0")))]
1753   "TARGET_SIMD"
1754   "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1755   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1756 )
1757
1758 (define_insn "*aarch64_fnma4_elt<mode>"
1759   [(set (match_operand:VDQF 0 "register_operand" "=w")
1760     (fma:VDQF
1761       (neg:VDQF
1762         (match_operand:VDQF 3 "register_operand" "w"))
1763       (vec_duplicate:VDQF
1764         (vec_select:<VEL>
1765           (match_operand:VDQF 1 "register_operand" "<h_con>")
1766           (parallel [(match_operand:SI 2 "immediate_operand")])))
1767       (match_operand:VDQF 4 "register_operand" "0")))]
1768   "TARGET_SIMD"
1769   {
1770     operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1771     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1772   }
1773   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1774 )
1775
1776 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1777   [(set (match_operand:VDQSF 0 "register_operand" "=w")
1778     (fma:VDQSF
1779       (neg:VDQSF
1780         (match_operand:VDQSF 3 "register_operand" "w"))
1781       (vec_duplicate:VDQSF
1782         (vec_select:<VEL>
1783           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1784           (parallel [(match_operand:SI 2 "immediate_operand")])))
1785       (match_operand:VDQSF 4 "register_operand" "0")))]
1786   "TARGET_SIMD"
1787   {
1788     operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1789                                           INTVAL (operands[2])));
1790     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1791   }
1792   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1793 )
1794
1795 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1796   [(set (match_operand:VMUL 0 "register_operand" "=w")
1797     (fma:VMUL
1798       (neg:VMUL
1799         (match_operand:VMUL 2 "register_operand" "w"))
1800       (vec_duplicate:VMUL
1801         (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1802       (match_operand:VMUL 3 "register_operand" "0")))]
1803   "TARGET_SIMD"
1804   "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1805   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1806 )
1807
1808 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1809   [(set (match_operand:DF 0 "register_operand" "=w")
1810     (fma:DF
1811       (vec_select:DF
1812         (match_operand:V2DF 1 "register_operand" "w")
1813         (parallel [(match_operand:SI 2 "immediate_operand")]))
1814       (neg:DF
1815         (match_operand:DF 3 "register_operand" "w"))
1816       (match_operand:DF 4 "register_operand" "0")))]
1817   "TARGET_SIMD"
1818   {
1819     operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1820     return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1821   }
1822   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1823 )
1824
1825 ;; Vector versions of the floating-point frint patterns.
1826 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1827 (define_insn "<frint_pattern><mode>2"
1828   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1829         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1830                        FRINT))]
1831   "TARGET_SIMD"
1832   "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1833   [(set_attr "type" "neon_fp_round_<stype><q>")]
1834 )
1835
1836 ;; Vector versions of the fcvt standard patterns.
1837 ;; Expands to lbtrunc, lround, lceil, lfloor
1838 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1839   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1840         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1841                                [(match_operand:VHSDF 1 "register_operand" "w")]
1842                                FCVT)))]
1843   "TARGET_SIMD"
1844   "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1845   [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1846 )
1847
1848 ;; HF Scalar variants of related SIMD instructions.
1849 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1850   [(set (match_operand:HI 0 "register_operand" "=w")
1851         (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1852                       FCVT)))]
1853   "TARGET_SIMD_F16INST"
1854   "fcvt<frint_suffix><su>\t%h0, %h1"
1855   [(set_attr "type" "neon_fp_to_int_s")]
1856 )
1857
1858 (define_insn "<optab>_trunchfhi2"
1859   [(set (match_operand:HI 0 "register_operand" "=w")
1860         (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1861   "TARGET_SIMD_F16INST"
1862   "fcvtz<su>\t%h0, %h1"
1863   [(set_attr "type" "neon_fp_to_int_s")]
1864 )
1865
1866 (define_insn "<optab>hihf2"
1867   [(set (match_operand:HF 0 "register_operand" "=w")
1868         (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1869   "TARGET_SIMD_F16INST"
1870   "<su_optab>cvtf\t%h0, %h1"
1871   [(set_attr "type" "neon_int_to_fp_s")]
1872 )
1873
1874 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1875   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1876         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1877                                [(mult:VDQF
1878          (match_operand:VDQF 1 "register_operand" "w")
1879          (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1880                                UNSPEC_FRINTZ)))]
1881   "TARGET_SIMD
1882    && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1883                 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1884   {
1885     int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1886     char buf[64];
1887     snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1888     output_asm_insn (buf, operands);
1889     return "";
1890   }
1891   [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1892 )
1893
1894 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
1895   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1896         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1897                                [(match_operand:VHSDF 1 "register_operand")]
1898                                 UNSPEC_FRINTZ)))]
1899   "TARGET_SIMD"
1900   {})
1901
1902 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
1903   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1904         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1905                                [(match_operand:VHSDF 1 "register_operand")]
1906                                 UNSPEC_FRINTZ)))]
1907   "TARGET_SIMD"
1908   {})
1909
1910 (define_expand "ftrunc<VHSDF:mode>2"
1911   [(set (match_operand:VHSDF 0 "register_operand")
1912         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
1913                        UNSPEC_FRINTZ))]
1914   "TARGET_SIMD"
1915   {})
1916
1917 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
1918   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1919         (FLOATUORS:VHSDF
1920           (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
1921   "TARGET_SIMD"
1922   "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
1923   [(set_attr "type" "neon_int_to_fp_<stype><q>")]
1924 )
1925
1926 ;; Conversions between vectors of floats and doubles.
1927 ;; Contains a mix of patterns to match standard pattern names
1928 ;; and those for intrinsics.
1929
1930 ;; Float widening operations.
1931
1932 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
1933   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1934         (float_extend:<VWIDE> (vec_select:<VHALF>
1935                                (match_operand:VQ_HSF 1 "register_operand" "w")
1936                                (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
1937                             )))]
1938   "TARGET_SIMD"
1939   "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
1940   [(set_attr "type" "neon_fp_cvt_widen_s")]
1941 )
1942
1943 ;; Convert between fixed-point and floating-point (vector modes)
1944
1945 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
1946   [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
1947         (unspec:<VHSDF:FCVT_TARGET>
1948           [(match_operand:VHSDF 1 "register_operand" "w")
1949            (match_operand:SI 2 "immediate_operand" "i")]
1950          FCVT_F2FIXED))]
1951   "TARGET_SIMD"
1952   "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
1953   [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
1954 )
1955
1956 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
1957   [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
1958         (unspec:<VDQ_HSDI:FCVT_TARGET>
1959           [(match_operand:VDQ_HSDI 1 "register_operand" "w")
1960            (match_operand:SI 2 "immediate_operand" "i")]
1961          FCVT_FIXED2F))]
1962   "TARGET_SIMD"
1963   "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
1964   [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
1965 )
1966
1967 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
1968 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
1969 ;; the meaning of HI and LO changes depending on the target endianness.
1970 ;; While elsewhere we map the higher numbered elements of a vector to
1971 ;; the lower architectural lanes of the vector, for these patterns we want
1972 ;; to always treat "hi" as referring to the higher architectural lanes.
1973 ;; Consequently, while the patterns below look inconsistent with our
1974 ;; other big-endian patterns their behavior is as required.
1975
1976 (define_expand "vec_unpacks_lo_<mode>"
1977   [(match_operand:<VWIDE> 0 "register_operand" "")
1978    (match_operand:VQ_HSF 1 "register_operand" "")]
1979   "TARGET_SIMD"
1980   {
1981     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1982     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
1983                                                        operands[1], p));
1984     DONE;
1985   }
1986 )
1987
1988 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
1989   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1990         (float_extend:<VWIDE> (vec_select:<VHALF>
1991                                (match_operand:VQ_HSF 1 "register_operand" "w")
1992                                (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
1993                             )))]
1994   "TARGET_SIMD"
1995   "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
1996   [(set_attr "type" "neon_fp_cvt_widen_s")]
1997 )
1998
1999 (define_expand "vec_unpacks_hi_<mode>"
2000   [(match_operand:<VWIDE> 0 "register_operand" "")
2001    (match_operand:VQ_HSF 1 "register_operand" "")]
2002   "TARGET_SIMD"
2003   {
2004     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2005     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2006                                                        operands[1], p));
2007     DONE;
2008   }
2009 )
2010 (define_insn "aarch64_float_extend_lo_<Vwide>"
2011   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2012         (float_extend:<VWIDE>
2013           (match_operand:VDF 1 "register_operand" "w")))]
2014   "TARGET_SIMD"
2015   "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2016   [(set_attr "type" "neon_fp_cvt_widen_s")]
2017 )
2018
2019 ;; Float narrowing operations.
2020
2021 (define_insn "aarch64_float_truncate_lo_<mode>"
2022   [(set (match_operand:VDF 0 "register_operand" "=w")
2023       (float_truncate:VDF
2024         (match_operand:<VWIDE> 1 "register_operand" "w")))]
2025   "TARGET_SIMD"
2026   "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2027   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2028 )
2029
2030 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2031   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2032     (vec_concat:<VDBL>
2033       (match_operand:VDF 1 "register_operand" "0")
2034       (float_truncate:VDF
2035         (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2036   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2037   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2038   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2039 )
2040
2041 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2042   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2043     (vec_concat:<VDBL>
2044       (float_truncate:VDF
2045         (match_operand:<VWIDE> 2 "register_operand" "w"))
2046       (match_operand:VDF 1 "register_operand" "0")))]
2047   "TARGET_SIMD && BYTES_BIG_ENDIAN"
2048   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2049   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2050 )
2051
2052 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2053   [(match_operand:<VDBL> 0 "register_operand" "=w")
2054    (match_operand:VDF 1 "register_operand" "0")
2055    (match_operand:<VWIDE> 2 "register_operand" "w")]
2056   "TARGET_SIMD"
2057 {
2058   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2059                              ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2060                              : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2061   emit_insn (gen (operands[0], operands[1], operands[2]));
2062   DONE;
2063 }
2064 )
2065
2066 (define_expand "vec_pack_trunc_v2df"
2067   [(set (match_operand:V4SF 0 "register_operand")
2068       (vec_concat:V4SF
2069         (float_truncate:V2SF
2070             (match_operand:V2DF 1 "register_operand"))
2071         (float_truncate:V2SF
2072             (match_operand:V2DF 2 "register_operand"))
2073           ))]
2074   "TARGET_SIMD"
2075   {
2076     rtx tmp = gen_reg_rtx (V2SFmode);
2077     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2078     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2079
2080     emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2081     emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2082                                                    tmp, operands[hi]));
2083     DONE;
2084   }
2085 )
2086
2087 (define_expand "vec_pack_trunc_df"
2088   [(set (match_operand:V2SF 0 "register_operand")
2089       (vec_concat:V2SF
2090         (float_truncate:SF
2091             (match_operand:DF 1 "register_operand"))
2092         (float_truncate:SF
2093             (match_operand:DF 2 "register_operand"))
2094           ))]
2095   "TARGET_SIMD"
2096   {
2097     rtx tmp = gen_reg_rtx (V2SFmode);
2098     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2099     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2100
2101     emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2102     emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2103     emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2104     DONE;
2105   }
2106 )
2107
2108 ;; FP Max/Min
2109 ;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
2110 ;; expression like:
2111 ;;      a = (b < c) ? b : c;
2112 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
2113 ;; either explicitly or indirectly via -ffast-math.
2114 ;;
2115 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2116 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2117 ;; operand will be returned when both operands are zero (i.e. they may not
2118 ;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
2119 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2120 ;; NaNs.
2121
2122 (define_insn "<su><maxmin><mode>3"
2123   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2124         (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2125                        (match_operand:VHSDF 2 "register_operand" "w")))]
2126   "TARGET_SIMD"
2127   "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2128   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2129 )
2130
2131 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2132 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2133 ;; which implement the IEEE fmax ()/fmin () functions.
2134 (define_insn "<maxmin_uns><mode>3"
2135   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2136        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2137                       (match_operand:VHSDF 2 "register_operand" "w")]
2138                       FMAXMIN_UNS))]
2139   "TARGET_SIMD"
2140   "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2141   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2142 )
2143
2144 ;; 'across lanes' add.
2145
2146 (define_expand "reduc_plus_scal_<mode>"
2147   [(match_operand:<VEL> 0 "register_operand" "=w")
2148    (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2149                UNSPEC_ADDV)]
2150   "TARGET_SIMD"
2151   {
2152     rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2153     rtx scratch = gen_reg_rtx (<MODE>mode);
2154     emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2155     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2156     DONE;
2157   }
2158 )
2159
2160 (define_insn "aarch64_faddp<mode>"
2161  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2162        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2163                       (match_operand:VHSDF 2 "register_operand" "w")]
2164         UNSPEC_FADDV))]
2165  "TARGET_SIMD"
2166  "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2167   [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2168 )
2169
2170 (define_insn "aarch64_reduc_plus_internal<mode>"
2171  [(set (match_operand:VDQV 0 "register_operand" "=w")
2172        (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2173                     UNSPEC_ADDV))]
2174  "TARGET_SIMD"
2175  "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2176   [(set_attr "type" "neon_reduc_add<q>")]
2177 )
2178
2179 (define_insn "aarch64_reduc_plus_internalv2si"
2180  [(set (match_operand:V2SI 0 "register_operand" "=w")
2181        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2182                     UNSPEC_ADDV))]
2183  "TARGET_SIMD"
2184  "addp\\t%0.2s, %1.2s, %1.2s"
2185   [(set_attr "type" "neon_reduc_add")]
2186 )
2187
2188 (define_insn "reduc_plus_scal_<mode>"
2189  [(set (match_operand:<VEL> 0 "register_operand" "=w")
2190        (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2191                    UNSPEC_FADDV))]
2192  "TARGET_SIMD"
2193  "faddp\\t%<Vetype>0, %1.<Vtype>"
2194   [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2195 )
2196
2197 (define_expand "reduc_plus_scal_v4sf"
2198  [(set (match_operand:SF 0 "register_operand")
2199        (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2200                     UNSPEC_FADDV))]
2201  "TARGET_SIMD"
2202 {
2203   rtx elt = GEN_INT (ENDIAN_LANE_N (V4SFmode, 0));
2204   rtx scratch = gen_reg_rtx (V4SFmode);
2205   emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2206   emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2207   emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2208   DONE;
2209 })
2210
2211 (define_insn "clrsb<mode>2"
2212   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2213         (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2214   "TARGET_SIMD"
2215   "cls\\t%0.<Vtype>, %1.<Vtype>"
2216   [(set_attr "type" "neon_cls<q>")]
2217 )
2218
2219 (define_insn "clz<mode>2"
2220  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2221        (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2222  "TARGET_SIMD"
2223  "clz\\t%0.<Vtype>, %1.<Vtype>"
2224   [(set_attr "type" "neon_cls<q>")]
2225 )
2226
2227 (define_insn "popcount<mode>2"
2228   [(set (match_operand:VB 0 "register_operand" "=w")
2229         (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2230   "TARGET_SIMD"
2231   "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2232   [(set_attr "type" "neon_cnt<q>")]
2233 )
2234
2235 ;; 'across lanes' max and min ops.
2236
2237 ;; Template for outputting a scalar, so we can create __builtins which can be
2238 ;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code.  (This is FP smax/smin).
2239 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2240   [(match_operand:<VEL> 0 "register_operand")
2241    (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2242                   FMAXMINV)]
2243   "TARGET_SIMD"
2244   {
2245     rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2246     rtx scratch = gen_reg_rtx (<MODE>mode);
2247     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2248                                                               operands[1]));
2249     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2250     DONE;
2251   }
2252 )
2253
2254 ;; Likewise for integer cases, signed and unsigned.
2255 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2256   [(match_operand:<VEL> 0 "register_operand")
2257    (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2258                     MAXMINV)]
2259   "TARGET_SIMD"
2260   {
2261     rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2262     rtx scratch = gen_reg_rtx (<MODE>mode);
2263     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2264                                                               operands[1]));
2265     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2266     DONE;
2267   }
2268 )
2269
2270 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2271  [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2272        (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2273                     MAXMINV))]
2274  "TARGET_SIMD"
2275  "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2276   [(set_attr "type" "neon_reduc_minmax<q>")]
2277 )
2278
2279 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2280  [(set (match_operand:V2SI 0 "register_operand" "=w")
2281        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2282                     MAXMINV))]
2283  "TARGET_SIMD"
2284  "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2285   [(set_attr "type" "neon_reduc_minmax")]
2286 )
2287
2288 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2289  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2290        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2291                       FMAXMINV))]
2292  "TARGET_SIMD"
2293  "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2294   [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2295 )
2296
2297 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2298 ;; allocation.
2299 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2300 ;; to select.
2301 ;;
2302 ;; Thus our BSL is of the form:
2303 ;;   op0 = bsl (mask, op2, op3)
2304 ;; We can use any of:
2305 ;;
2306 ;;   if (op0 = mask)
2307 ;;     bsl mask, op1, op2
2308 ;;   if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2309 ;;     bit op0, op2, mask
2310 ;;   if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2311 ;;     bif op0, op1, mask
2312 ;;
2313 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2314 ;; Some forms of straight-line code may generate the equivalent form
2315 ;; in *aarch64_simd_bsl<mode>_alt.
2316
2317 (define_insn "aarch64_simd_bsl<mode>_internal"
2318   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2319         (xor:VSDQ_I_DI
2320            (and:VSDQ_I_DI
2321              (xor:VSDQ_I_DI
2322                (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2323                (match_operand:VSDQ_I_DI 2 "register_operand" "w,w,0"))
2324              (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2325           (match_dup:<V_INT_EQUIV> 3)
2326         ))]
2327   "TARGET_SIMD"
2328   "@
2329   bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2330   bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2331   bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2332   [(set_attr "type" "neon_bsl<q>")]
2333 )
2334
2335 ;; We need this form in addition to the above pattern to match the case
2336 ;; when combine tries merging three insns such that the second operand of
2337 ;; the outer XOR matches the second operand of the inner XOR rather than
2338 ;; the first.  The two are equivalent but since recog doesn't try all
2339 ;; permutations of commutative operations, we have to have a separate pattern.
2340
2341 (define_insn "*aarch64_simd_bsl<mode>_alt"
2342   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2343         (xor:VSDQ_I_DI
2344            (and:VSDQ_I_DI
2345              (xor:VSDQ_I_DI
2346                (match_operand:VSDQ_I_DI 3 "register_operand" "w,w,0")
2347                (match_operand:VSDQ_I_DI 2 "register_operand" "w,0,w"))
2348               (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2349           (match_dup:VSDQ_I_DI 2)))]
2350   "TARGET_SIMD"
2351   "@
2352   bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2353   bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2354   bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2355   [(set_attr "type" "neon_bsl<q>")]
2356 )
2357
2358 (define_expand "aarch64_simd_bsl<mode>"
2359   [(match_operand:VALLDIF 0 "register_operand")
2360    (match_operand:<V_INT_EQUIV> 1 "register_operand")
2361    (match_operand:VALLDIF 2 "register_operand")
2362    (match_operand:VALLDIF 3 "register_operand")]
2363  "TARGET_SIMD"
2364 {
2365   /* We can't alias operands together if they have different modes.  */
2366   rtx tmp = operands[0];
2367   if (FLOAT_MODE_P (<MODE>mode))
2368     {
2369       operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2370       operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2371       tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2372     }
2373   operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2374   emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2375                                                          operands[1],
2376                                                          operands[2],
2377                                                          operands[3]));
2378   if (tmp != operands[0])
2379     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2380
2381   DONE;
2382 })
2383
2384 (define_expand "vcond_mask_<mode><v_int_equiv>"
2385   [(match_operand:VALLDI 0 "register_operand")
2386    (match_operand:VALLDI 1 "nonmemory_operand")
2387    (match_operand:VALLDI 2 "nonmemory_operand")
2388    (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2389   "TARGET_SIMD"
2390 {
2391   /* If we have (a = (P) ? -1 : 0);
2392      Then we can simply move the generated mask (result must be int).  */
2393   if (operands[1] == CONSTM1_RTX (<MODE>mode)
2394       && operands[2] == CONST0_RTX (<MODE>mode))
2395     emit_move_insn (operands[0], operands[3]);
2396   /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask.  */
2397   else if (operands[1] == CONST0_RTX (<MODE>mode)
2398            && operands[2] == CONSTM1_RTX (<MODE>mode))
2399     emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2400   else
2401     {
2402       if (!REG_P (operands[1]))
2403         operands[1] = force_reg (<MODE>mode, operands[1]);
2404       if (!REG_P (operands[2]))
2405         operands[2] = force_reg (<MODE>mode, operands[2]);
2406       emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2407                                              operands[1], operands[2]));
2408     }
2409
2410   DONE;
2411 })
2412
2413 ;; Patterns comparing two vectors to produce a mask.
2414
2415 (define_expand "vec_cmp<mode><mode>"
2416   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2417           (match_operator 1 "comparison_operator"
2418             [(match_operand:VSDQ_I_DI 2 "register_operand")
2419              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2420   "TARGET_SIMD"
2421 {
2422   rtx mask = operands[0];
2423   enum rtx_code code = GET_CODE (operands[1]);
2424
2425   switch (code)
2426     {
2427     case NE:
2428     case LE:
2429     case LT:
2430     case GE:
2431     case GT:
2432     case EQ:
2433       if (operands[3] == CONST0_RTX (<MODE>mode))
2434         break;
2435
2436       /* Fall through.  */
2437     default:
2438       if (!REG_P (operands[3]))
2439         operands[3] = force_reg (<MODE>mode, operands[3]);
2440
2441       break;
2442     }
2443
2444   switch (code)
2445     {
2446     case LT:
2447       emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2448       break;
2449
2450     case GE:
2451       emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2452       break;
2453
2454     case LE:
2455       emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2456       break;
2457
2458     case GT:
2459       emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2460       break;
2461
2462     case LTU:
2463       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2464       break;
2465
2466     case GEU:
2467       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2468       break;
2469
2470     case LEU:
2471       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2472       break;
2473
2474     case GTU:
2475       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2476       break;
2477
2478     case NE:
2479       /* Handle NE as !EQ.  */
2480       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2481       emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2482       break;
2483
2484     case EQ:
2485       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2486       break;
2487
2488     default:
2489       gcc_unreachable ();
2490     }
2491
2492   DONE;
2493 })
2494
2495 (define_expand "vec_cmp<mode><v_int_equiv>"
2496   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2497         (match_operator 1 "comparison_operator"
2498             [(match_operand:VDQF 2 "register_operand")
2499              (match_operand:VDQF 3 "nonmemory_operand")]))]
2500   "TARGET_SIMD"
2501 {
2502   int use_zero_form = 0;
2503   enum rtx_code code = GET_CODE (operands[1]);
2504   rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2505
2506   rtx (*comparison) (rtx, rtx, rtx) = NULL;
2507
2508   switch (code)
2509     {
2510     case LE:
2511     case LT:
2512     case GE:
2513     case GT:
2514     case EQ:
2515       if (operands[3] == CONST0_RTX (<MODE>mode))
2516         {
2517           use_zero_form = 1;
2518           break;
2519         }
2520       /* Fall through.  */
2521     default:
2522       if (!REG_P (operands[3]))
2523         operands[3] = force_reg (<MODE>mode, operands[3]);
2524
2525       break;
2526     }
2527
2528   switch (code)
2529     {
2530     case LT:
2531       if (use_zero_form)
2532         {
2533           comparison = gen_aarch64_cmlt<mode>;
2534           break;
2535         }
2536       /* Fall through.  */
2537     case UNGE:
2538       std::swap (operands[2], operands[3]);
2539       /* Fall through.  */
2540     case UNLE:
2541     case GT:
2542       comparison = gen_aarch64_cmgt<mode>;
2543       break;
2544     case LE:
2545       if (use_zero_form)
2546         {
2547           comparison = gen_aarch64_cmle<mode>;
2548           break;
2549         }
2550       /* Fall through.  */
2551     case UNGT:
2552       std::swap (operands[2], operands[3]);
2553       /* Fall through.  */
2554     case UNLT:
2555     case GE:
2556       comparison = gen_aarch64_cmge<mode>;
2557       break;
2558     case NE:
2559     case EQ:
2560       comparison = gen_aarch64_cmeq<mode>;
2561       break;
2562     case UNEQ:
2563     case ORDERED:
2564     case UNORDERED:
2565       break;
2566     default:
2567       gcc_unreachable ();
2568     }
2569
2570   switch (code)
2571     {
2572     case UNGE:
2573     case UNGT:
2574     case UNLE:
2575     case UNLT:
2576     case NE:
2577       /* FCM returns false for lanes which are unordered, so if we use
2578          the inverse of the comparison we actually want to emit, then
2579          invert the result, we will end up with the correct result.
2580          Note that a NE NaN and NaN NE b are true for all a, b.
2581
2582          Our transformations are:
2583          a UNGE b -> !(b GT a)
2584          a UNGT b -> !(b GE a)
2585          a UNLE b -> !(a GT b)
2586          a UNLT b -> !(a GE b)
2587          a   NE b -> !(a EQ b)  */
2588       gcc_assert (comparison != NULL);
2589       emit_insn (comparison (operands[0], operands[2], operands[3]));
2590       emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2591       break;
2592
2593     case LT:
2594     case LE:
2595     case GT:
2596     case GE:
2597     case EQ:
2598       /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
2599          As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
2600          a GE b -> a GE b
2601          a GT b -> a GT b
2602          a LE b -> b GE a
2603          a LT b -> b GT a
2604          a EQ b -> a EQ b  */
2605       gcc_assert (comparison != NULL);
2606       emit_insn (comparison (operands[0], operands[2], operands[3]));
2607       break;
2608
2609     case UNEQ:
2610       /* We first check (a > b ||  b > a) which is !UNEQ, inverting
2611          this result will then give us (a == b || a UNORDERED b).  */
2612       emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2613                                          operands[2], operands[3]));
2614       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2615       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2616       emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2617       break;
2618
2619     case UNORDERED:
2620       /* Operands are ORDERED iff (a > b || b >= a), so we can compute
2621          UNORDERED as !ORDERED.  */
2622       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2623       emit_insn (gen_aarch64_cmge<mode> (operands[0],
2624                                          operands[3], operands[2]));
2625       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2626       emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2627       break;
2628
2629     case ORDERED:
2630       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2631       emit_insn (gen_aarch64_cmge<mode> (operands[0],
2632                                          operands[3], operands[2]));
2633       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2634       break;
2635
2636     default:
2637       gcc_unreachable ();
2638     }
2639
2640   DONE;
2641 })
2642
2643 (define_expand "vec_cmpu<mode><mode>"
2644   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2645           (match_operator 1 "comparison_operator"
2646             [(match_operand:VSDQ_I_DI 2 "register_operand")
2647              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2648   "TARGET_SIMD"
2649 {
2650   emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2651                                       operands[2], operands[3]));
2652   DONE;
2653 })
2654
2655 (define_expand "vcond<mode><mode>"
2656   [(set (match_operand:VALLDI 0 "register_operand")
2657         (if_then_else:VALLDI
2658           (match_operator 3 "comparison_operator"
2659             [(match_operand:VALLDI 4 "register_operand")
2660              (match_operand:VALLDI 5 "nonmemory_operand")])
2661           (match_operand:VALLDI 1 "nonmemory_operand")
2662           (match_operand:VALLDI 2 "nonmemory_operand")))]
2663   "TARGET_SIMD"
2664 {
2665   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2666   enum rtx_code code = GET_CODE (operands[3]);
2667
2668   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2669      it as well as switch operands 1/2 in order to avoid the additional
2670      NOT instruction.  */
2671   if (code == NE)
2672     {
2673       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2674                                     operands[4], operands[5]);
2675       std::swap (operands[1], operands[2]);
2676     }
2677   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2678                                              operands[4], operands[5]));
2679   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2680                                                  operands[2], mask));
2681
2682   DONE;
2683 })
2684
2685 (define_expand "vcond<v_cmp_mixed><mode>"
2686   [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2687         (if_then_else:<V_cmp_mixed>
2688           (match_operator 3 "comparison_operator"
2689             [(match_operand:VDQF_COND 4 "register_operand")
2690              (match_operand:VDQF_COND 5 "nonmemory_operand")])
2691           (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2692           (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2693   "TARGET_SIMD"
2694 {
2695   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2696   enum rtx_code code = GET_CODE (operands[3]);
2697
2698   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2699      it as well as switch operands 1/2 in order to avoid the additional
2700      NOT instruction.  */
2701   if (code == NE)
2702     {
2703       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2704                                     operands[4], operands[5]);
2705       std::swap (operands[1], operands[2]);
2706     }
2707   emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2708                                              operands[4], operands[5]));
2709   emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
2710                                                 operands[0], operands[1],
2711                                                 operands[2], mask));
2712
2713   DONE;
2714 })
2715
2716 (define_expand "vcondu<mode><mode>"
2717   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2718         (if_then_else:VSDQ_I_DI
2719           (match_operator 3 "comparison_operator"
2720             [(match_operand:VSDQ_I_DI 4 "register_operand")
2721              (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2722           (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2723           (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2724   "TARGET_SIMD"
2725 {
2726   rtx mask = gen_reg_rtx (<MODE>mode);
2727   enum rtx_code code = GET_CODE (operands[3]);
2728
2729   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2730      it as well as switch operands 1/2 in order to avoid the additional
2731      NOT instruction.  */
2732   if (code == NE)
2733     {
2734       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2735                                     operands[4], operands[5]);
2736       std::swap (operands[1], operands[2]);
2737     }
2738   emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2739                                       operands[4], operands[5]));
2740   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2741                                                  operands[2], mask));
2742   DONE;
2743 })
2744
2745 (define_expand "vcondu<mode><v_cmp_mixed>"
2746   [(set (match_operand:VDQF 0 "register_operand")
2747         (if_then_else:VDQF
2748           (match_operator 3 "comparison_operator"
2749             [(match_operand:<V_cmp_mixed> 4 "register_operand")
2750              (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2751           (match_operand:VDQF 1 "nonmemory_operand")
2752           (match_operand:VDQF 2 "nonmemory_operand")))]
2753   "TARGET_SIMD"
2754 {
2755   rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2756   enum rtx_code code = GET_CODE (operands[3]);
2757
2758   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2759      it as well as switch operands 1/2 in order to avoid the additional
2760      NOT instruction.  */
2761   if (code == NE)
2762     {
2763       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2764                                     operands[4], operands[5]);
2765       std::swap (operands[1], operands[2]);
2766     }
2767   emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
2768                                                   mask, operands[3],
2769                                                   operands[4], operands[5]));
2770   emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2771                                                  operands[2], mask));
2772   DONE;
2773 })
2774
2775 ;; Patterns for AArch64 SIMD Intrinsics.
2776
2777 ;; Lane extraction with sign extension to general purpose register.
2778 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2779   [(set (match_operand:GPI 0 "register_operand" "=r")
2780         (sign_extend:GPI
2781           (vec_select:<VEL>
2782             (match_operand:VDQQH 1 "register_operand" "w")
2783             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2784   "TARGET_SIMD"
2785   {
2786     operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2787     return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
2788   }
2789   [(set_attr "type" "neon_to_gp<q>")]
2790 )
2791
2792 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
2793   [(set (match_operand:SI 0 "register_operand" "=r")
2794         (zero_extend:SI
2795           (vec_select:<VEL>
2796             (match_operand:VDQQH 1 "register_operand" "w")
2797             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2798   "TARGET_SIMD"
2799   {
2800     operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2801     return "umov\\t%w0, %1.<Vetype>[%2]";
2802   }
2803   [(set_attr "type" "neon_to_gp<q>")]
2804 )
2805
2806 ;; Lane extraction of a value, neither sign nor zero extension
2807 ;; is guaranteed so upper bits should be considered undefined.
2808 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
2809 (define_insn "aarch64_get_lane<mode>"
2810   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2811         (vec_select:<VEL>
2812           (match_operand:VALL_F16 1 "register_operand" "w, w, w")
2813           (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
2814   "TARGET_SIMD"
2815   {
2816     operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2817     switch (which_alternative)
2818       {
2819         case 0:
2820           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2821         case 1:
2822           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2823         case 2:
2824           return "st1\\t{%1.<Vetype>}[%2], %0";
2825         default:
2826           gcc_unreachable ();
2827       }
2828   }
2829   [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
2830 )
2831
2832 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2833 ;; dest vector.
2834
2835 (define_insn "*aarch64_combinez<mode>"
2836   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2837         (vec_concat:<VDBL>
2838            (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")
2839            (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")))]
2840   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2841   "@
2842    mov\\t%0.8b, %1.8b
2843    fmov\t%d0, %1
2844    ldr\\t%d0, %1"
2845   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2846    (set_attr "simd" "yes,*,yes")
2847    (set_attr "fp" "*,yes,*")]
2848 )
2849
2850 (define_insn "*aarch64_combinez_be<mode>"
2851   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2852         (vec_concat:<VDBL>
2853            (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")
2854            (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")))]
2855   "TARGET_SIMD && BYTES_BIG_ENDIAN"
2856   "@
2857    mov\\t%0.8b, %1.8b
2858    fmov\t%d0, %1
2859    ldr\\t%d0, %1"
2860   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2861    (set_attr "simd" "yes,*,yes")
2862    (set_attr "fp" "*,yes,*")]
2863 )
2864
2865 (define_expand "aarch64_combine<mode>"
2866   [(match_operand:<VDBL> 0 "register_operand")
2867    (match_operand:VDC 1 "register_operand")
2868    (match_operand:VDC 2 "register_operand")]
2869   "TARGET_SIMD"
2870 {
2871   aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
2872
2873   DONE;
2874 }
2875 )
2876
2877 (define_expand "aarch64_simd_combine<mode>"
2878   [(match_operand:<VDBL> 0 "register_operand")
2879    (match_operand:VDC 1 "register_operand")
2880    (match_operand:VDC 2 "register_operand")]
2881   "TARGET_SIMD"
2882   {
2883     emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
2884     emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
2885     DONE;
2886   }
2887 [(set_attr "type" "multiple")]
2888 )
2889
2890 ;; <su><addsub>l<q>.
2891
2892 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
2893  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2894        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2895                            (match_operand:VQW 1 "register_operand" "w")
2896                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2897                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2898                            (match_operand:VQW 2 "register_operand" "w")
2899                            (match_dup 3)))))]
2900   "TARGET_SIMD"
2901   "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2902   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2903 )
2904
2905 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
2906  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2907        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2908                            (match_operand:VQW 1 "register_operand" "w")
2909                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2910                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2911                            (match_operand:VQW 2 "register_operand" "w")
2912                            (match_dup 3)))))]
2913   "TARGET_SIMD"
2914   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2915   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2916 )
2917
2918
2919 (define_expand "aarch64_saddl2<mode>"
2920   [(match_operand:<VWIDE> 0 "register_operand" "=w")
2921    (match_operand:VQW 1 "register_operand" "w")
2922    (match_operand:VQW 2 "register_operand" "w")]
2923   "TARGET_SIMD"
2924 {
2925   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2926   emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
2927                                                   operands[2], p));
2928   DONE;
2929 })
2930
2931 (define_expand "aarch64_uaddl2<mode>"
2932   [(match_operand:<VWIDE> 0 "register_operand" "=w")
2933    (match_operand:VQW 1 "register_operand" "w")
2934    (match_operand:VQW 2 "register_operand" "w")]
2935   "TARGET_SIMD"
2936 {
2937   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2938   emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
2939                                                   operands[2], p));
2940   DONE;
2941 })
2942
2943 (define_expand "aarch64_ssubl2<mode>"
2944   [(match_operand:<VWIDE> 0 "register_operand" "=w")
2945    (match_operand:VQW 1 "register_operand" "w")
2946    (match_operand:VQW 2 "register_operand" "w")]
2947   "TARGET_SIMD"
2948 {
2949   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2950   emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
2951                                                 operands[2], p));
2952   DONE;
2953 })
2954
2955 (define_expand "aarch64_usubl2<mode>"
2956   [(match_operand:<VWIDE> 0 "register_operand" "=w")
2957    (match_operand:VQW 1 "register_operand" "w")
2958    (match_operand:VQW 2 "register_operand" "w")]
2959   "TARGET_SIMD"
2960 {
2961   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2962   emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
2963                                                 operands[2], p));
2964   DONE;
2965 })
2966
2967 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
2968  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2969        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
2970                            (match_operand:VD_BHSI 1 "register_operand" "w"))
2971                        (ANY_EXTEND:<VWIDE>
2972                            (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2973   "TARGET_SIMD"
2974   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2975   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2976 )
2977
2978 ;; <su><addsub>w<q>.
2979
2980 (define_expand "widen_ssum<mode>3"
2981   [(set (match_operand:<VDBLW> 0 "register_operand" "")
2982         (plus:<VDBLW> (sign_extend:<VDBLW>
2983                         (match_operand:VQW 1 "register_operand" ""))
2984                       (match_operand:<VDBLW> 2 "register_operand" "")))]
2985   "TARGET_SIMD"
2986   {
2987     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
2988     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
2989
2990     emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
2991                                                 operands[1], p));
2992     emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
2993     DONE;
2994   }
2995 )
2996
2997 (define_expand "widen_ssum<mode>3"
2998   [(set (match_operand:<VWIDE> 0 "register_operand" "")
2999         (plus:<VWIDE> (sign_extend:<VWIDE>
3000                         (match_operand:VD_BHSI 1 "register_operand" ""))
3001                       (match_operand:<VWIDE> 2 "register_operand" "")))]
3002   "TARGET_SIMD"
3003 {
3004   emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3005   DONE;
3006 })
3007
3008 (define_expand "widen_usum<mode>3"
3009   [(set (match_operand:<VDBLW> 0 "register_operand" "")
3010         (plus:<VDBLW> (zero_extend:<VDBLW>
3011                         (match_operand:VQW 1 "register_operand" ""))
3012                       (match_operand:<VDBLW> 2 "register_operand" "")))]
3013   "TARGET_SIMD"
3014   {
3015     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
3016     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3017
3018     emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3019                                                  operands[1], p));
3020     emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3021     DONE;
3022   }
3023 )
3024
3025 (define_expand "widen_usum<mode>3"
3026   [(set (match_operand:<VWIDE> 0 "register_operand" "")
3027         (plus:<VWIDE> (zero_extend:<VWIDE>
3028                         (match_operand:VD_BHSI 1 "register_operand" ""))
3029                       (match_operand:<VWIDE> 2 "register_operand" "")))]
3030   "TARGET_SIMD"
3031 {
3032   emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3033   DONE;
3034 })
3035
3036 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
3037   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3038         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3039                         (ANY_EXTEND:<VWIDE>
3040                           (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3041   "TARGET_SIMD"
3042   "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3043   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3044 )
3045
3046 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
3047   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3048         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3049                         (ANY_EXTEND:<VWIDE>
3050                           (vec_select:<VHALF>
3051                            (match_operand:VQW 2 "register_operand" "w")
3052                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3053   "TARGET_SIMD"
3054   "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3055   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3056 )
3057
3058 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
3059   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3060         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3061                         (ANY_EXTEND:<VWIDE>
3062                           (vec_select:<VHALF>
3063                            (match_operand:VQW 2 "register_operand" "w")
3064                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3065   "TARGET_SIMD"
3066   "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3067   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3068 )
3069
3070 (define_expand "aarch64_saddw2<mode>"
3071   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3072    (match_operand:<VWIDE> 1 "register_operand" "w")
3073    (match_operand:VQW 2 "register_operand" "w")]
3074   "TARGET_SIMD"
3075 {
3076   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3077   emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3078                                                 operands[2], p));
3079   DONE;
3080 })
3081
3082 (define_expand "aarch64_uaddw2<mode>"
3083   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3084    (match_operand:<VWIDE> 1 "register_operand" "w")
3085    (match_operand:VQW 2 "register_operand" "w")]
3086   "TARGET_SIMD"
3087 {
3088   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3089   emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3090                                                 operands[2], p));
3091   DONE;
3092 })
3093
3094
3095 (define_expand "aarch64_ssubw2<mode>"
3096   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3097    (match_operand:<VWIDE> 1 "register_operand" "w")
3098    (match_operand:VQW 2 "register_operand" "w")]
3099   "TARGET_SIMD"
3100 {
3101   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3102   emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3103                                                 operands[2], p));
3104   DONE;
3105 })
3106
3107 (define_expand "aarch64_usubw2<mode>"
3108   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3109    (match_operand:<VWIDE> 1 "register_operand" "w")
3110    (match_operand:VQW 2 "register_operand" "w")]
3111   "TARGET_SIMD"
3112 {
3113   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3114   emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3115                                                 operands[2], p));
3116   DONE;
3117 })
3118
3119 ;; <su><r>h<addsub>.
3120
3121 (define_insn "aarch64_<sur>h<addsub><mode>"
3122   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3123         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3124                       (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3125                      HADDSUB))]
3126   "TARGET_SIMD"
3127   "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3128   [(set_attr "type" "neon_<addsub>_halve<q>")]
3129 )
3130
3131 ;; <r><addsub>hn<q>.
3132
3133 (define_insn "aarch64_<sur><addsub>hn<mode>"
3134   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3135         (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3136                             (match_operand:VQN 2 "register_operand" "w")]
3137                            ADDSUBHN))]
3138   "TARGET_SIMD"
3139   "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3140   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3141 )
3142
3143 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3144   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3145         (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3146                              (match_operand:VQN 2 "register_operand" "w")
3147                              (match_operand:VQN 3 "register_operand" "w")]
3148                             ADDSUBHN2))]
3149   "TARGET_SIMD"
3150   "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3151   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3152 )
3153
3154 ;; pmul.
3155
3156 (define_insn "aarch64_pmul<mode>"
3157   [(set (match_operand:VB 0 "register_operand" "=w")
3158         (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3159                     (match_operand:VB 2 "register_operand" "w")]
3160                    UNSPEC_PMUL))]
3161  "TARGET_SIMD"
3162  "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3163   [(set_attr "type" "neon_mul_<Vetype><q>")]
3164 )
3165
3166 ;; fmulx.
3167
3168 (define_insn "aarch64_fmulx<mode>"
3169   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3170         (unspec:VHSDF_HSDF
3171           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3172            (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3173            UNSPEC_FMULX))]
3174  "TARGET_SIMD"
3175  "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3176  [(set_attr "type" "neon_fp_mul_<stype>")]
3177 )
3178
3179 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3180
3181 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3182   [(set (match_operand:VDQSF 0 "register_operand" "=w")
3183         (unspec:VDQSF
3184          [(match_operand:VDQSF 1 "register_operand" "w")
3185           (vec_duplicate:VDQSF
3186            (vec_select:<VEL>
3187             (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3188             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3189          UNSPEC_FMULX))]
3190   "TARGET_SIMD"
3191   {
3192     operands[3] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
3193                                           INTVAL (operands[3])));
3194     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3195   }
3196   [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3197 )
3198
3199 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3200
3201 (define_insn "*aarch64_mulx_elt<mode>"
3202   [(set (match_operand:VDQF 0 "register_operand" "=w")
3203         (unspec:VDQF
3204          [(match_operand:VDQF 1 "register_operand" "w")
3205           (vec_duplicate:VDQF
3206            (vec_select:<VEL>
3207             (match_operand:VDQF 2 "register_operand" "w")
3208             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3209          UNSPEC_FMULX))]
3210   "TARGET_SIMD"
3211   {
3212     operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
3213     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3214   }
3215   [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3216 )
3217
3218 ;; vmulxq_lane
3219
3220 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3221   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3222         (unspec:VHSDF
3223          [(match_operand:VHSDF 1 "register_operand" "w")
3224           (vec_duplicate:VHSDF
3225             (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3226          UNSPEC_FMULX))]
3227   "TARGET_SIMD"
3228   "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3229   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3230 )
3231
3232 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3233 ;; vmulxd_lane_f64 ==  vmulx_lane_f64
3234 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3235
3236 (define_insn "*aarch64_vgetfmulx<mode>"
3237   [(set (match_operand:<VEL> 0 "register_operand" "=w")
3238         (unspec:<VEL>
3239          [(match_operand:<VEL> 1 "register_operand" "w")
3240           (vec_select:<VEL>
3241            (match_operand:VDQF 2 "register_operand" "w")
3242             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3243          UNSPEC_FMULX))]
3244   "TARGET_SIMD"
3245   {
3246     operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
3247     return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3248   }
3249   [(set_attr "type" "fmul<Vetype>")]
3250 )
3251 ;; <su>q<addsub>
3252
3253 (define_insn "aarch64_<su_optab><optab><mode>"
3254   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3255         (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3256                           (match_operand:VSDQ_I 2 "register_operand" "w")))]
3257   "TARGET_SIMD"
3258   "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3259   [(set_attr "type" "neon_<optab><q>")]
3260 )
3261
3262 ;; suqadd and usqadd
3263
3264 (define_insn "aarch64_<sur>qadd<mode>"
3265   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3266         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3267                         (match_operand:VSDQ_I 2 "register_operand" "w")]
3268                        USSUQADD))]
3269   "TARGET_SIMD"
3270   "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3271   [(set_attr "type" "neon_qadd<q>")]
3272 )
3273
3274 ;; sqmovun
3275
3276 (define_insn "aarch64_sqmovun<mode>"
3277   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3278         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3279                             UNSPEC_SQXTUN))]
3280    "TARGET_SIMD"
3281    "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3282    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3283 )
3284
3285 ;; sqmovn and uqmovn
3286
3287 (define_insn "aarch64_<sur>qmovn<mode>"
3288   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3289         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3290                             SUQMOVN))]
3291   "TARGET_SIMD"
3292   "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3293    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3294 )
3295
3296 ;; <su>q<absneg>
3297
3298 (define_insn "aarch64_s<optab><mode>"
3299   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3300         (UNQOPS:VSDQ_I
3301           (match_operand:VSDQ_I 1 "register_operand" "w")))]
3302   "TARGET_SIMD"
3303   "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3304   [(set_attr "type" "neon_<optab><q>")]
3305 )
3306
3307 ;; sq<r>dmulh.
3308
3309 (define_insn "aarch64_sq<r>dmulh<mode>"
3310   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3311         (unspec:VSDQ_HSI
3312           [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3313            (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3314          VQDMULH))]
3315   "TARGET_SIMD"
3316   "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3317   [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3318 )
3319
3320 ;; sq<r>dmulh_lane
3321
3322 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3323   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3324         (unspec:VDQHS
3325           [(match_operand:VDQHS 1 "register_operand" "w")
3326            (vec_select:<VEL>
3327              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3328              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3329          VQDMULH))]
3330   "TARGET_SIMD"
3331   "*
3332    operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3333    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3334   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3335 )
3336
3337 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3338   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3339         (unspec:VDQHS
3340           [(match_operand:VDQHS 1 "register_operand" "w")
3341            (vec_select:<VEL>
3342              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3343              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3344          VQDMULH))]
3345   "TARGET_SIMD"
3346   "*
3347    operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3348    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3349   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3350 )
3351
3352 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3353   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3354         (unspec:SD_HSI
3355           [(match_operand:SD_HSI 1 "register_operand" "w")
3356            (vec_select:<VEL>
3357              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3358              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3359          VQDMULH))]
3360   "TARGET_SIMD"
3361   "*
3362    operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3363    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3364   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3365 )
3366
3367 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3368   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3369         (unspec:SD_HSI
3370           [(match_operand:SD_HSI 1 "register_operand" "w")
3371            (vec_select:<VEL>
3372              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3373              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3374          VQDMULH))]
3375   "TARGET_SIMD"
3376   "*
3377    operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3378    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3379   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3380 )
3381
3382 ;; sqrdml[as]h.
3383
3384 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3385   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3386         (unspec:VSDQ_HSI
3387           [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3388            (match_operand:VSDQ_HSI 2 "register_operand" "w")
3389            (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3390           SQRDMLH_AS))]
3391    "TARGET_SIMD_RDMA"
3392    "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3393    [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3394 )
3395
3396 ;; sqrdml[as]h_lane.
3397
3398 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3399   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3400         (unspec:VDQHS
3401           [(match_operand:VDQHS 1 "register_operand" "0")
3402            (match_operand:VDQHS 2 "register_operand" "w")
3403            (vec_select:<VEL>
3404              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3405              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3406           SQRDMLH_AS))]
3407    "TARGET_SIMD_RDMA"
3408    {
3409      operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3410      return
3411       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3412    }
3413    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3414 )
3415
3416 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3417   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3418         (unspec:SD_HSI
3419           [(match_operand:SD_HSI 1 "register_operand" "0")
3420            (match_operand:SD_HSI 2 "register_operand" "w")
3421            (vec_select:<VEL>
3422              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3423              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3424           SQRDMLH_AS))]
3425    "TARGET_SIMD_RDMA"
3426    {
3427      operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3428      return
3429       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3430    }
3431    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3432 )
3433
3434 ;; sqrdml[as]h_laneq.
3435
3436 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3437   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3438         (unspec:VDQHS
3439           [(match_operand:VDQHS 1 "register_operand" "0")
3440            (match_operand:VDQHS 2 "register_operand" "w")
3441            (vec_select:<VEL>
3442              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3443              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3444           SQRDMLH_AS))]
3445    "TARGET_SIMD_RDMA"
3446    {
3447      operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3448      return
3449       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3450    }
3451    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3452 )
3453
3454 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3455   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3456         (unspec:SD_HSI
3457           [(match_operand:SD_HSI 1 "register_operand" "0")
3458            (match_operand:SD_HSI 2 "register_operand" "w")
3459            (vec_select:<VEL>
3460              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3461              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3462           SQRDMLH_AS))]
3463    "TARGET_SIMD_RDMA"
3464    {
3465      operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3466      return
3467       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3468    }
3469    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3470 )
3471
3472 ;; vqdml[sa]l
3473
3474 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3475   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3476         (SBINQOPS:<VWIDE>
3477           (match_operand:<VWIDE> 1 "register_operand" "0")
3478           (ss_ashift:<VWIDE>
3479               (mult:<VWIDE>
3480                 (sign_extend:<VWIDE>
3481                       (match_operand:VSD_HSI 2 "register_operand" "w"))
3482                 (sign_extend:<VWIDE>
3483                       (match_operand:VSD_HSI 3 "register_operand" "w")))
3484               (const_int 1))))]
3485   "TARGET_SIMD"
3486   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3487   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3488 )
3489
3490 ;; vqdml[sa]l_lane
3491
3492 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3493   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3494         (SBINQOPS:<VWIDE>
3495           (match_operand:<VWIDE> 1 "register_operand" "0")
3496           (ss_ashift:<VWIDE>
3497             (mult:<VWIDE>
3498               (sign_extend:<VWIDE>
3499                 (match_operand:VD_HSI 2 "register_operand" "w"))
3500               (sign_extend:<VWIDE>
3501                 (vec_duplicate:VD_HSI
3502                   (vec_select:<VEL>
3503                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3504                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3505               ))
3506             (const_int 1))))]
3507   "TARGET_SIMD"
3508   {
3509     operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3510     return
3511       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3512   }
3513   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3514 )
3515
3516 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3517   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3518         (SBINQOPS:<VWIDE>
3519           (match_operand:<VWIDE> 1 "register_operand" "0")
3520           (ss_ashift:<VWIDE>
3521             (mult:<VWIDE>
3522               (sign_extend:<VWIDE>
3523                 (match_operand:VD_HSI 2 "register_operand" "w"))
3524               (sign_extend:<VWIDE>
3525                 (vec_duplicate:VD_HSI
3526                   (vec_select:<VEL>
3527                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3528                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3529               ))
3530             (const_int 1))))]
3531   "TARGET_SIMD"
3532   {
3533     operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3534     return
3535       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3536   }
3537   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3538 )
3539
3540 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3541   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3542         (SBINQOPS:<VWIDE>
3543           (match_operand:<VWIDE> 1 "register_operand" "0")
3544           (ss_ashift:<VWIDE>
3545             (mult:<VWIDE>
3546               (sign_extend:<VWIDE>
3547                 (match_operand:SD_HSI 2 "register_operand" "w"))
3548               (sign_extend:<VWIDE>
3549                 (vec_select:<VEL>
3550                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3551                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3552               )
3553             (const_int 1))))]
3554   "TARGET_SIMD"
3555   {
3556     operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3557     return
3558       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3559   }
3560   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3561 )
3562
3563 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3564   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3565         (SBINQOPS:<VWIDE>
3566           (match_operand:<VWIDE> 1 "register_operand" "0")
3567           (ss_ashift:<VWIDE>
3568             (mult:<VWIDE>
3569               (sign_extend:<VWIDE>
3570                 (match_operand:SD_HSI 2 "register_operand" "w"))
3571               (sign_extend:<VWIDE>
3572                 (vec_select:<VEL>
3573                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3574                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3575               )
3576             (const_int 1))))]
3577   "TARGET_SIMD"
3578   {
3579     operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3580     return
3581       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3582   }
3583   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3584 )
3585
3586 ;; vqdml[sa]l_n
3587
3588 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3589   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3590         (SBINQOPS:<VWIDE>
3591           (match_operand:<VWIDE> 1 "register_operand" "0")
3592           (ss_ashift:<VWIDE>
3593               (mult:<VWIDE>
3594                 (sign_extend:<VWIDE>
3595                       (match_operand:VD_HSI 2 "register_operand" "w"))
3596                 (sign_extend:<VWIDE>
3597                   (vec_duplicate:VD_HSI
3598                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3599               (const_int 1))))]
3600   "TARGET_SIMD"
3601   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3602   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3603 )
3604
3605 ;; sqdml[as]l2
3606
3607 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3608   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3609         (SBINQOPS:<VWIDE>
3610          (match_operand:<VWIDE> 1 "register_operand" "0")
3611          (ss_ashift:<VWIDE>
3612              (mult:<VWIDE>
3613                (sign_extend:<VWIDE>
3614                  (vec_select:<VHALF>
3615                      (match_operand:VQ_HSI 2 "register_operand" "w")
3616                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3617                (sign_extend:<VWIDE>
3618                  (vec_select:<VHALF>
3619                      (match_operand:VQ_HSI 3 "register_operand" "w")
3620                      (match_dup 4))))
3621              (const_int 1))))]
3622   "TARGET_SIMD"
3623   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3624   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3625 )
3626
3627 (define_expand "aarch64_sqdmlal2<mode>"
3628   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3629    (match_operand:<VWIDE> 1 "register_operand" "w")
3630    (match_operand:VQ_HSI 2 "register_operand" "w")
3631    (match_operand:VQ_HSI 3 "register_operand" "w")]
3632   "TARGET_SIMD"
3633 {
3634   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3635   emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3636                                                   operands[2], operands[3], p));
3637   DONE;
3638 })
3639
3640 (define_expand "aarch64_sqdmlsl2<mode>"
3641   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3642    (match_operand:<VWIDE> 1 "register_operand" "w")
3643    (match_operand:VQ_HSI 2 "register_operand" "w")
3644    (match_operand:VQ_HSI 3 "register_operand" "w")]
3645   "TARGET_SIMD"
3646 {
3647   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3648   emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3649                                                   operands[2], operands[3], p));
3650   DONE;
3651 })
3652
3653 ;; vqdml[sa]l2_lane
3654
3655 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3656   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3657         (SBINQOPS:<VWIDE>
3658           (match_operand:<VWIDE> 1 "register_operand" "0")
3659           (ss_ashift:<VWIDE>
3660               (mult:<VWIDE>
3661                 (sign_extend:<VWIDE>
3662                   (vec_select:<VHALF>
3663                     (match_operand:VQ_HSI 2 "register_operand" "w")
3664                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3665                 (sign_extend:<VWIDE>
3666                   (vec_duplicate:<VHALF>
3667                     (vec_select:<VEL>
3668                       (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3669                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3670                     ))))
3671               (const_int 1))))]
3672   "TARGET_SIMD"
3673   {
3674     operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3675     return
3676      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3677   }
3678   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3679 )
3680
3681 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3682   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3683         (SBINQOPS:<VWIDE>
3684           (match_operand:<VWIDE> 1 "register_operand" "0")
3685           (ss_ashift:<VWIDE>
3686               (mult:<VWIDE>
3687                 (sign_extend:<VWIDE>
3688                   (vec_select:<VHALF>
3689                     (match_operand:VQ_HSI 2 "register_operand" "w")
3690                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3691                 (sign_extend:<VWIDE>
3692                   (vec_duplicate:<VHALF>
3693                     (vec_select:<VEL>
3694                       (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3695                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3696                     ))))
3697               (const_int 1))))]
3698   "TARGET_SIMD"
3699   {
3700     operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3701     return
3702      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3703   }
3704   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3705 )
3706
3707 (define_expand "aarch64_sqdmlal2_lane<mode>"
3708   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3709    (match_operand:<VWIDE> 1 "register_operand" "w")
3710    (match_operand:VQ_HSI 2 "register_operand" "w")
3711    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3712    (match_operand:SI 4 "immediate_operand" "i")]
3713   "TARGET_SIMD"
3714 {
3715   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3716   emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3717                                                        operands[2], operands[3],
3718                                                        operands[4], p));
3719   DONE;
3720 })
3721
3722 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3723   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3724    (match_operand:<VWIDE> 1 "register_operand" "w")
3725    (match_operand:VQ_HSI 2 "register_operand" "w")
3726    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3727    (match_operand:SI 4 "immediate_operand" "i")]
3728   "TARGET_SIMD"
3729 {
3730   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3731   emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3732                                                        operands[2], operands[3],
3733                                                        operands[4], p));
3734   DONE;
3735 })
3736
3737 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3738   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3739    (match_operand:<VWIDE> 1 "register_operand" "w")
3740    (match_operand:VQ_HSI 2 "register_operand" "w")
3741    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3742    (match_operand:SI 4 "immediate_operand" "i")]
3743   "TARGET_SIMD"
3744 {
3745   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3746   emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3747                                                        operands[2], operands[3],
3748                                                        operands[4], p));
3749   DONE;
3750 })
3751
3752 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3753   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3754    (match_operand:<VWIDE> 1 "register_operand" "w")
3755    (match_operand:VQ_HSI 2 "register_operand" "w")
3756    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3757    (match_operand:SI 4 "immediate_operand" "i")]
3758   "TARGET_SIMD"
3759 {
3760   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3761   emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
3762                                                        operands[2], operands[3],
3763                                                        operands[4], p));
3764   DONE;
3765 })
3766
3767 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
3768   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3769         (SBINQOPS:<VWIDE>
3770           (match_operand:<VWIDE> 1 "register_operand" "0")
3771           (ss_ashift:<VWIDE>
3772             (mult:<VWIDE>
3773               (sign_extend:<VWIDE>
3774                 (vec_select:<VHALF>
3775                   (match_operand:VQ_HSI 2 "register_operand" "w")
3776                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3777               (sign_extend:<VWIDE>
3778                 (vec_duplicate:<VHALF>
3779                   (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3780             (const_int 1))))]
3781   "TARGET_SIMD"
3782   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3783   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3784 )
3785
3786 (define_expand "aarch64_sqdmlal2_n<mode>"
3787   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3788    (match_operand:<VWIDE> 1 "register_operand" "w")
3789    (match_operand:VQ_HSI 2 "register_operand" "w")
3790    (match_operand:<VEL> 3 "register_operand" "w")]
3791   "TARGET_SIMD"
3792 {
3793   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3794   emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
3795                                                     operands[2], operands[3],
3796                                                     p));
3797   DONE;
3798 })
3799
3800 (define_expand "aarch64_sqdmlsl2_n<mode>"
3801   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3802    (match_operand:<VWIDE> 1 "register_operand" "w")
3803    (match_operand:VQ_HSI 2 "register_operand" "w")
3804    (match_operand:<VEL> 3 "register_operand" "w")]
3805   "TARGET_SIMD"
3806 {
3807   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3808   emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
3809                                                     operands[2], operands[3],
3810                                                     p));
3811   DONE;
3812 })
3813
3814 ;; vqdmull
3815
3816 (define_insn "aarch64_sqdmull<mode>"
3817   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3818         (ss_ashift:<VWIDE>
3819              (mult:<VWIDE>
3820                (sign_extend:<VWIDE>
3821                      (match_operand:VSD_HSI 1 "register_operand" "w"))
3822                (sign_extend:<VWIDE>
3823                      (match_operand:VSD_HSI 2 "register_operand" "w")))
3824              (const_int 1)))]
3825   "TARGET_SIMD"
3826   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3827   [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
3828 )
3829
3830 ;; vqdmull_lane
3831
3832 (define_insn "aarch64_sqdmull_lane<mode>"
3833   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3834         (ss_ashift:<VWIDE>
3835              (mult:<VWIDE>
3836                (sign_extend:<VWIDE>
3837                  (match_operand:VD_HSI 1 "register_operand" "w"))
3838                (sign_extend:<VWIDE>
3839                  (vec_duplicate:VD_HSI
3840                    (vec_select:<VEL>
3841                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3842                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3843                ))
3844              (const_int 1)))]
3845   "TARGET_SIMD"
3846   {
3847     operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3848     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3849   }
3850   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3851 )
3852
3853 (define_insn "aarch64_sqdmull_laneq<mode>"
3854   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3855         (ss_ashift:<VWIDE>
3856              (mult:<VWIDE>
3857                (sign_extend:<VWIDE>
3858                  (match_operand:VD_HSI 1 "register_operand" "w"))
3859                (sign_extend:<VWIDE>
3860                  (vec_duplicate:VD_HSI
3861                    (vec_select:<VEL>
3862                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3863                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3864                ))
3865              (const_int 1)))]
3866   "TARGET_SIMD"
3867   {
3868     operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3869     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3870   }
3871   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3872 )
3873
3874 (define_insn "aarch64_sqdmull_lane<mode>"
3875   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3876         (ss_ashift:<VWIDE>
3877              (mult:<VWIDE>
3878                (sign_extend:<VWIDE>
3879                  (match_operand:SD_HSI 1 "register_operand" "w"))
3880                (sign_extend:<VWIDE>
3881                  (vec_select:<VEL>
3882                    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3883                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3884                ))
3885              (const_int 1)))]
3886   "TARGET_SIMD"
3887   {
3888     operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3889     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3890   }
3891   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3892 )
3893
3894 (define_insn "aarch64_sqdmull_laneq<mode>"
3895   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3896         (ss_ashift:<VWIDE>
3897              (mult:<VWIDE>
3898                (sign_extend:<VWIDE>
3899                  (match_operand:SD_HSI 1 "register_operand" "w"))
3900                (sign_extend:<VWIDE>
3901                  (vec_select:<VEL>
3902                    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3903                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3904                ))
3905              (const_int 1)))]
3906   "TARGET_SIMD"
3907   {
3908     operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3909     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3910   }
3911   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3912 )
3913
3914 ;; vqdmull_n
3915
3916 (define_insn "aarch64_sqdmull_n<mode>"
3917   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3918         (ss_ashift:<VWIDE>
3919              (mult:<VWIDE>
3920                (sign_extend:<VWIDE>
3921                  (match_operand:VD_HSI 1 "register_operand" "w"))
3922                (sign_extend:<VWIDE>
3923                  (vec_duplicate:VD_HSI
3924                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3925                )
3926              (const_int 1)))]
3927   "TARGET_SIMD"
3928   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3929   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3930 )
3931
3932 ;; vqdmull2
3933
3934
3935
3936 (define_insn "aarch64_sqdmull2<mode>_internal"
3937   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3938         (ss_ashift:<VWIDE>
3939              (mult:<VWIDE>
3940                (sign_extend:<VWIDE>
3941                  (vec_select:<VHALF>
3942                    (match_operand:VQ_HSI 1 "register_operand" "w")
3943                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3944                (sign_extend:<VWIDE>
3945                  (vec_select:<VHALF>
3946                    (match_operand:VQ_HSI 2 "register_operand" "w")
3947                    (match_dup 3)))
3948                )
3949              (const_int 1)))]
3950   "TARGET_SIMD"
3951   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3952   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3953 )
3954
3955 (define_expand "aarch64_sqdmull2<mode>"
3956   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3957    (match_operand:VQ_HSI 1 "register_operand" "w")
3958    (match_operand:VQ_HSI 2 "register_operand" "w")]
3959   "TARGET_SIMD"
3960 {
3961   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3962   emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
3963                                                   operands[2], p));
3964   DONE;
3965 })
3966
3967 ;; vqdmull2_lane
3968
3969 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
3970   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3971         (ss_ashift:<VWIDE>
3972              (mult:<VWIDE>
3973                (sign_extend:<VWIDE>
3974                  (vec_select:<VHALF>
3975                    (match_operand:VQ_HSI 1 "register_operand" "w")
3976                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3977                (sign_extend:<VWIDE>
3978                  (vec_duplicate:<VHALF>
3979                    (vec_select:<VEL>
3980                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3981                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3982                ))
3983              (const_int 1)))]
3984   "TARGET_SIMD"
3985   {
3986     operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3987     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3988   }
3989   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3990 )
3991
3992 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
3993   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3994         (ss_ashift:<VWIDE>
3995              (mult:<VWIDE>
3996                (sign_extend:<VWIDE>
3997                  (vec_select:<VHALF>
3998                    (match_operand:VQ_HSI 1 "register_operand" "w")
3999                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4000                (sign_extend:<VWIDE>
4001                  (vec_duplicate:<VHALF>
4002                    (vec_select:<VEL>
4003                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4004                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4005                ))
4006              (const_int 1)))]
4007   "TARGET_SIMD"
4008   {
4009     operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
4010     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4011   }
4012   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4013 )
4014
4015 (define_expand "aarch64_sqdmull2_lane<mode>"
4016   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4017    (match_operand:VQ_HSI 1 "register_operand" "w")
4018    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4019    (match_operand:SI 3 "immediate_operand" "i")]
4020   "TARGET_SIMD"
4021 {
4022   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
4023   emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4024                                                        operands[2], operands[3],
4025                                                        p));
4026   DONE;
4027 })
4028
4029 (define_expand "aarch64_sqdmull2_laneq<mode>"
4030   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4031    (match_operand:VQ_HSI 1 "register_operand" "w")
4032    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4033    (match_operand:SI 3 "immediate_operand" "i")]
4034   "TARGET_SIMD"
4035 {
4036   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
4037   emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4038                                                        operands[2], operands[3],
4039                                                        p));
4040   DONE;
4041 })
4042
4043 ;; vqdmull2_n
4044
4045 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4046   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4047         (ss_ashift:<VWIDE>
4048              (mult:<VWIDE>
4049                (sign_extend:<VWIDE>
4050                  (vec_select:<VHALF>
4051                    (match_operand:VQ_HSI 1 "register_operand" "w")
4052                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4053                (sign_extend:<VWIDE>
4054                  (vec_duplicate:<VHALF>
4055                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4056                )
4057              (const_int 1)))]
4058   "TARGET_SIMD"
4059   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4060   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4061 )
4062
4063 (define_expand "aarch64_sqdmull2_n<mode>"
4064   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4065    (match_operand:VQ_HSI 1 "register_operand" "w")
4066    (match_operand:<VEL> 2 "register_operand" "w")]
4067   "TARGET_SIMD"
4068 {
4069   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
4070   emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4071                                                     operands[2], p));
4072   DONE;
4073 })
4074
4075 ;; vshl
4076
4077 (define_insn "aarch64_<sur>shl<mode>"
4078   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4079         (unspec:VSDQ_I_DI
4080           [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4081            (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4082          VSHL))]
4083   "TARGET_SIMD"
4084   "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4085   [(set_attr "type" "neon_shift_reg<q>")]
4086 )
4087
4088
4089 ;; vqshl
4090
4091 (define_insn "aarch64_<sur>q<r>shl<mode>"
4092   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4093         (unspec:VSDQ_I
4094           [(match_operand:VSDQ_I 1 "register_operand" "w")
4095            (match_operand:VSDQ_I 2 "register_operand" "w")]
4096          VQSHL))]
4097   "TARGET_SIMD"
4098   "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4099   [(set_attr "type" "neon_sat_shift_reg<q>")]
4100 )
4101
4102 ;; vshll_n
4103
4104 (define_insn "aarch64_<sur>shll_n<mode>"
4105   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4106         (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4107                          (match_operand:SI 2
4108                            "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4109                          VSHLL))]
4110   "TARGET_SIMD"
4111   {
4112     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4113       return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4114     else
4115       return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4116   }
4117   [(set_attr "type" "neon_shift_imm_long")]
4118 )
4119
4120 ;; vshll_high_n
4121
4122 (define_insn "aarch64_<sur>shll2_n<mode>"
4123   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4124         (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4125                          (match_operand:SI 2 "immediate_operand" "i")]
4126                          VSHLL))]
4127   "TARGET_SIMD"
4128   {
4129     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4130       return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4131     else
4132       return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4133   }
4134   [(set_attr "type" "neon_shift_imm_long")]
4135 )
4136
4137 ;; vrshr_n
4138
4139 (define_insn "aarch64_<sur>shr_n<mode>"
4140   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4141         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4142                            (match_operand:SI 2
4143                              "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4144                           VRSHR_N))]
4145   "TARGET_SIMD"
4146   "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4147   [(set_attr "type" "neon_sat_shift_imm<q>")]
4148 )
4149
4150 ;; v(r)sra_n
4151
4152 (define_insn "aarch64_<sur>sra_n<mode>"
4153   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4154         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4155                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4156                        (match_operand:SI 3
4157                          "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4158                       VSRA))]
4159   "TARGET_SIMD"
4160   "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4161   [(set_attr "type" "neon_shift_acc<q>")]
4162 )
4163
4164 ;; vs<lr>i_n
4165
4166 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4167   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4168         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4169                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4170                        (match_operand:SI 3
4171                          "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4172                       VSLRI))]
4173   "TARGET_SIMD"
4174   "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4175   [(set_attr "type" "neon_shift_imm<q>")]
4176 )
4177
4178 ;; vqshl(u)
4179
4180 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4181   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4182         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4183                        (match_operand:SI 2
4184                          "aarch64_simd_shift_imm_<ve_mode>" "i")]
4185                       VQSHL_N))]
4186   "TARGET_SIMD"
4187   "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4188   [(set_attr "type" "neon_sat_shift_imm<q>")]
4189 )
4190
4191
4192 ;; vq(r)shr(u)n_n
4193
4194 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4195   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4196         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4197                             (match_operand:SI 2
4198                               "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4199                            VQSHRN_N))]
4200   "TARGET_SIMD"
4201   "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4202   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4203 )
4204
4205
4206 ;; cm(eq|ge|gt|lt|le)
4207 ;; Note, we have constraints for Dz and Z as different expanders
4208 ;; have different ideas of what should be passed to this pattern.
4209
4210 (define_insn "aarch64_cm<optab><mode>"
4211   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4212         (neg:<V_INT_EQUIV>
4213           (COMPARISONS:<V_INT_EQUIV>
4214             (match_operand:VDQ_I 1 "register_operand" "w,w")
4215             (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4216           )))]
4217   "TARGET_SIMD"
4218   "@
4219   cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4220   cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4221   [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4222 )
4223
4224 (define_insn_and_split "aarch64_cm<optab>di"
4225   [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4226         (neg:DI
4227           (COMPARISONS:DI
4228             (match_operand:DI 1 "register_operand" "w,w,r")
4229             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4230           )))
4231      (clobber (reg:CC CC_REGNUM))]
4232   "TARGET_SIMD"
4233   "#"
4234   "reload_completed"
4235   [(set (match_operand:DI 0 "register_operand")
4236         (neg:DI
4237           (COMPARISONS:DI
4238             (match_operand:DI 1 "register_operand")
4239             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4240           )))]
4241   {
4242     /* If we are in the general purpose register file,
4243        we split to a sequence of comparison and store.  */
4244     if (GP_REGNUM_P (REGNO (operands[0]))
4245         && GP_REGNUM_P (REGNO (operands[1])))
4246       {
4247         machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4248         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4249         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4250         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4251         DONE;
4252       }
4253     /* Otherwise, we expand to a similar pattern which does not
4254        clobber CC_REGNUM.  */
4255   }
4256   [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4257 )
4258
4259 (define_insn "*aarch64_cm<optab>di"
4260   [(set (match_operand:DI 0 "register_operand" "=w,w")
4261         (neg:DI
4262           (COMPARISONS:DI
4263             (match_operand:DI 1 "register_operand" "w,w")
4264             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4265           )))]
4266   "TARGET_SIMD && reload_completed"
4267   "@
4268   cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4269   cm<optab>\t%d0, %d1, #0"
4270   [(set_attr "type" "neon_compare, neon_compare_zero")]
4271 )
4272
4273 ;; cm(hs|hi)
4274
4275 (define_insn "aarch64_cm<optab><mode>"
4276   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4277         (neg:<V_INT_EQUIV>
4278           (UCOMPARISONS:<V_INT_EQUIV>
4279             (match_operand:VDQ_I 1 "register_operand" "w")
4280             (match_operand:VDQ_I 2 "register_operand" "w")
4281           )))]
4282   "TARGET_SIMD"
4283   "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4284   [(set_attr "type" "neon_compare<q>")]
4285 )
4286
4287 (define_insn_and_split "aarch64_cm<optab>di"
4288   [(set (match_operand:DI 0 "register_operand" "=w,r")
4289         (neg:DI
4290           (UCOMPARISONS:DI
4291             (match_operand:DI 1 "register_operand" "w,r")
4292             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4293           )))
4294     (clobber (reg:CC CC_REGNUM))]
4295   "TARGET_SIMD"
4296   "#"
4297   "reload_completed"
4298   [(set (match_operand:DI 0 "register_operand")
4299         (neg:DI
4300           (UCOMPARISONS:DI
4301             (match_operand:DI 1 "register_operand")
4302             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4303           )))]
4304   {
4305     /* If we are in the general purpose register file,
4306        we split to a sequence of comparison and store.  */
4307     if (GP_REGNUM_P (REGNO (operands[0]))
4308         && GP_REGNUM_P (REGNO (operands[1])))
4309       {
4310         machine_mode mode = CCmode;
4311         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4312         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4313         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4314         DONE;
4315       }
4316     /* Otherwise, we expand to a similar pattern which does not
4317        clobber CC_REGNUM.  */
4318   }
4319   [(set_attr "type" "neon_compare,multiple")]
4320 )
4321
4322 (define_insn "*aarch64_cm<optab>di"
4323   [(set (match_operand:DI 0 "register_operand" "=w")
4324         (neg:DI
4325           (UCOMPARISONS:DI
4326             (match_operand:DI 1 "register_operand" "w")
4327             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4328           )))]
4329   "TARGET_SIMD && reload_completed"
4330   "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4331   [(set_attr "type" "neon_compare")]
4332 )
4333
4334 ;; cmtst
4335
4336 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4337 ;; we don't have any insns using ne, and aarch64_vcond outputs
4338 ;; not (neg (eq (and x y) 0))
4339 ;; which is rewritten by simplify_rtx as
4340 ;; plus (eq (and x y) 0) -1.
4341
4342 (define_insn "aarch64_cmtst<mode>"
4343   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4344         (plus:<V_INT_EQUIV>
4345           (eq:<V_INT_EQUIV>
4346             (and:VDQ_I
4347               (match_operand:VDQ_I 1 "register_operand" "w")
4348               (match_operand:VDQ_I 2 "register_operand" "w"))
4349             (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4350           (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4351   ]
4352   "TARGET_SIMD"
4353   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4354   [(set_attr "type" "neon_tst<q>")]
4355 )
4356
4357 (define_insn_and_split "aarch64_cmtstdi"
4358   [(set (match_operand:DI 0 "register_operand" "=w,r")
4359         (neg:DI
4360           (ne:DI
4361             (and:DI
4362               (match_operand:DI 1 "register_operand" "w,r")
4363               (match_operand:DI 2 "register_operand" "w,r"))
4364             (const_int 0))))
4365     (clobber (reg:CC CC_REGNUM))]
4366   "TARGET_SIMD"
4367   "#"
4368   "reload_completed"
4369   [(set (match_operand:DI 0 "register_operand")
4370         (neg:DI
4371           (ne:DI
4372             (and:DI
4373               (match_operand:DI 1 "register_operand")
4374               (match_operand:DI 2 "register_operand"))
4375             (const_int 0))))]
4376   {
4377     /* If we are in the general purpose register file,
4378        we split to a sequence of comparison and store.  */
4379     if (GP_REGNUM_P (REGNO (operands[0]))
4380         && GP_REGNUM_P (REGNO (operands[1])))
4381       {
4382         rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4383         machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4384         rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4385         rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4386         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4387         DONE;
4388       }
4389     /* Otherwise, we expand to a similar pattern which does not
4390        clobber CC_REGNUM.  */
4391   }
4392   [(set_attr "type" "neon_tst,multiple")]
4393 )
4394
4395 (define_insn "*aarch64_cmtstdi"
4396   [(set (match_operand:DI 0 "register_operand" "=w")
4397         (neg:DI
4398           (ne:DI
4399             (and:DI
4400               (match_operand:DI 1 "register_operand" "w")
4401               (match_operand:DI 2 "register_operand" "w"))
4402             (const_int 0))))]
4403   "TARGET_SIMD"
4404   "cmtst\t%d0, %d1, %d2"
4405   [(set_attr "type" "neon_tst")]
4406 )
4407
4408 ;; fcm(eq|ge|gt|le|lt)
4409
4410 (define_insn "aarch64_cm<optab><mode>"
4411   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4412         (neg:<V_INT_EQUIV>
4413           (COMPARISONS:<V_INT_EQUIV>
4414             (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4415             (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4416           )))]
4417   "TARGET_SIMD"
4418   "@
4419   fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4420   fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4421   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4422 )
4423
4424 ;; fac(ge|gt)
4425 ;; Note we can also handle what would be fac(le|lt) by
4426 ;; generating fac(ge|gt).
4427
4428 (define_insn "aarch64_fac<optab><mode>"
4429   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4430         (neg:<V_INT_EQUIV>
4431           (FAC_COMPARISONS:<V_INT_EQUIV>
4432             (abs:VHSDF_HSDF
4433               (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4434             (abs:VHSDF_HSDF
4435               (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4436   )))]
4437   "TARGET_SIMD"
4438   "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4439   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4440 )
4441
4442 ;; addp
4443
4444 (define_insn "aarch64_addp<mode>"
4445   [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4446         (unspec:VD_BHSI
4447           [(match_operand:VD_BHSI 1 "register_operand" "w")
4448            (match_operand:VD_BHSI 2 "register_operand" "w")]
4449           UNSPEC_ADDP))]
4450   "TARGET_SIMD"
4451   "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4452   [(set_attr "type" "neon_reduc_add<q>")]
4453 )
4454
4455 (define_insn "aarch64_addpdi"
4456   [(set (match_operand:DI 0 "register_operand" "=w")
4457         (unspec:DI
4458           [(match_operand:V2DI 1 "register_operand" "w")]
4459           UNSPEC_ADDP))]
4460   "TARGET_SIMD"
4461   "addp\t%d0, %1.2d"
4462   [(set_attr "type" "neon_reduc_add")]
4463 )
4464
4465 ;; sqrt
4466
4467 (define_expand "sqrt<mode>2"
4468   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4469         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4470   "TARGET_SIMD"
4471 {
4472   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4473     DONE;
4474 })
4475
4476 (define_insn "*sqrt<mode>2"
4477   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4478         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4479   "TARGET_SIMD"
4480   "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4481   [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4482 )
4483
4484 ;; Patterns for vector struct loads and stores.
4485
4486 (define_insn "aarch64_simd_ld2<mode>"
4487   [(set (match_operand:OI 0 "register_operand" "=w")
4488         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4489                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4490                    UNSPEC_LD2))]
4491   "TARGET_SIMD"
4492   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4493   [(set_attr "type" "neon_load2_2reg<q>")]
4494 )
4495
4496 (define_insn "aarch64_simd_ld2r<mode>"
4497   [(set (match_operand:OI 0 "register_operand" "=w")
4498        (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4499                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4500                   UNSPEC_LD2_DUP))]
4501   "TARGET_SIMD"
4502   "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4503   [(set_attr "type" "neon_load2_all_lanes<q>")]
4504 )
4505
4506 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4507   [(set (match_operand:OI 0 "register_operand" "=w")
4508         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4509                     (match_operand:OI 2 "register_operand" "0")
4510                     (match_operand:SI 3 "immediate_operand" "i")
4511                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4512                    UNSPEC_LD2_LANE))]
4513   "TARGET_SIMD"
4514   {
4515     operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4516     return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4517   }
4518   [(set_attr "type" "neon_load2_one_lane")]
4519 )
4520
4521 (define_expand "vec_load_lanesoi<mode>"
4522   [(set (match_operand:OI 0 "register_operand" "=w")
4523         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4524                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4525                    UNSPEC_LD2))]
4526   "TARGET_SIMD"
4527 {
4528   if (BYTES_BIG_ENDIAN)
4529     {
4530       rtx tmp = gen_reg_rtx (OImode);
4531       rtx mask = aarch64_reverse_mask (<MODE>mode);
4532       emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4533       emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4534     }
4535   else
4536     emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4537   DONE;
4538 })
4539
4540 (define_insn "aarch64_simd_st2<mode>"
4541   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4542         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4543                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4544                    UNSPEC_ST2))]
4545   "TARGET_SIMD"
4546   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4547   [(set_attr "type" "neon_store2_2reg<q>")]
4548 )
4549
4550 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4551 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4552   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4553         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4554                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4555                     (match_operand:SI 2 "immediate_operand" "i")]
4556                    UNSPEC_ST2_LANE))]
4557   "TARGET_SIMD"
4558   {
4559     operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4560     return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4561   }
4562   [(set_attr "type" "neon_store2_one_lane<q>")]
4563 )
4564
4565 (define_expand "vec_store_lanesoi<mode>"
4566   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4567         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4568                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4569                    UNSPEC_ST2))]
4570   "TARGET_SIMD"
4571 {
4572   if (BYTES_BIG_ENDIAN)
4573     {
4574       rtx tmp = gen_reg_rtx (OImode);
4575       rtx mask = aarch64_reverse_mask (<MODE>mode);
4576       emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4577       emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4578     }
4579   else
4580     emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4581   DONE;
4582 })
4583
4584 (define_insn "aarch64_simd_ld3<mode>"
4585   [(set (match_operand:CI 0 "register_operand" "=w")
4586         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4587                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4588                    UNSPEC_LD3))]
4589   "TARGET_SIMD"
4590   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4591   [(set_attr "type" "neon_load3_3reg<q>")]
4592 )
4593
4594 (define_insn "aarch64_simd_ld3r<mode>"
4595   [(set (match_operand:CI 0 "register_operand" "=w")
4596        (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4597                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4598                   UNSPEC_LD3_DUP))]
4599   "TARGET_SIMD"
4600   "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4601   [(set_attr "type" "neon_load3_all_lanes<q>")]
4602 )
4603
4604 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4605   [(set (match_operand:CI 0 "register_operand" "=w")
4606         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4607                     (match_operand:CI 2 "register_operand" "0")
4608                     (match_operand:SI 3 "immediate_operand" "i")
4609                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4610                    UNSPEC_LD3_LANE))]
4611   "TARGET_SIMD"
4612 {
4613     operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4614     return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4615 }
4616   [(set_attr "type" "neon_load3_one_lane")]
4617 )
4618
4619 (define_expand "vec_load_lanesci<mode>"
4620   [(set (match_operand:CI 0 "register_operand" "=w")
4621         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4622                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4623                    UNSPEC_LD3))]
4624   "TARGET_SIMD"
4625 {
4626   if (BYTES_BIG_ENDIAN)
4627     {
4628       rtx tmp = gen_reg_rtx (CImode);
4629       rtx mask = aarch64_reverse_mask (<MODE>mode);
4630       emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4631       emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4632     }
4633   else
4634     emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4635   DONE;
4636 })
4637
4638 (define_insn "aarch64_simd_st3<mode>"
4639   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4640         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4641                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4642                    UNSPEC_ST3))]
4643   "TARGET_SIMD"
4644   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4645   [(set_attr "type" "neon_store3_3reg<q>")]
4646 )
4647
4648 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4649 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4650   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4651         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4652                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4653                      (match_operand:SI 2 "immediate_operand" "i")]
4654                     UNSPEC_ST3_LANE))]
4655   "TARGET_SIMD"
4656   {
4657     operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4658     return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4659   }
4660   [(set_attr "type" "neon_store3_one_lane<q>")]
4661 )
4662
4663 (define_expand "vec_store_lanesci<mode>"
4664   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4665         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4666                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4667                    UNSPEC_ST3))]
4668   "TARGET_SIMD"
4669 {
4670   if (BYTES_BIG_ENDIAN)
4671     {
4672       rtx tmp = gen_reg_rtx (CImode);
4673       rtx mask = aarch64_reverse_mask (<MODE>mode);
4674       emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4675       emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4676     }
4677   else
4678     emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4679   DONE;
4680 })
4681
4682 (define_insn "aarch64_simd_ld4<mode>"
4683   [(set (match_operand:XI 0 "register_operand" "=w")
4684         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4685                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4686                    UNSPEC_LD4))]
4687   "TARGET_SIMD"
4688   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4689   [(set_attr "type" "neon_load4_4reg<q>")]
4690 )
4691
4692 (define_insn "aarch64_simd_ld4r<mode>"
4693   [(set (match_operand:XI 0 "register_operand" "=w")
4694        (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4695                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4696                   UNSPEC_LD4_DUP))]
4697   "TARGET_SIMD"
4698   "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4699   [(set_attr "type" "neon_load4_all_lanes<q>")]
4700 )
4701
4702 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4703   [(set (match_operand:XI 0 "register_operand" "=w")
4704         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4705                     (match_operand:XI 2 "register_operand" "0")
4706                     (match_operand:SI 3 "immediate_operand" "i")
4707                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4708                    UNSPEC_LD4_LANE))]
4709   "TARGET_SIMD"
4710 {
4711     operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4712     return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4713 }
4714   [(set_attr "type" "neon_load4_one_lane")]
4715 )
4716
4717 (define_expand "vec_load_lanesxi<mode>"
4718   [(set (match_operand:XI 0 "register_operand" "=w")
4719         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4720                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4721                    UNSPEC_LD4))]
4722   "TARGET_SIMD"
4723 {
4724   if (BYTES_BIG_ENDIAN)
4725     {
4726       rtx tmp = gen_reg_rtx (XImode);
4727       rtx mask = aarch64_reverse_mask (<MODE>mode);
4728       emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4729       emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4730     }
4731   else
4732     emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4733   DONE;
4734 })
4735
4736 (define_insn "aarch64_simd_st4<mode>"
4737   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4738         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4739                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4740                    UNSPEC_ST4))]
4741   "TARGET_SIMD"
4742   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4743   [(set_attr "type" "neon_store4_4reg<q>")]
4744 )
4745
4746 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4747 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4748   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4749         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4750                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4751                      (match_operand:SI 2 "immediate_operand" "i")]
4752                     UNSPEC_ST4_LANE))]
4753   "TARGET_SIMD"
4754   {
4755     operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4756     return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
4757   }
4758   [(set_attr "type" "neon_store4_one_lane<q>")]
4759 )
4760
4761 (define_expand "vec_store_lanesxi<mode>"
4762   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4763         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4764                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4765                    UNSPEC_ST4))]
4766   "TARGET_SIMD"
4767 {
4768   if (BYTES_BIG_ENDIAN)
4769     {
4770       rtx tmp = gen_reg_rtx (XImode);
4771       rtx mask = aarch64_reverse_mask (<MODE>mode);
4772       emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
4773       emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
4774     }
4775   else
4776     emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
4777   DONE;
4778 })
4779
4780 (define_insn_and_split "aarch64_rev_reglist<mode>"
4781 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
4782         (unspec:VSTRUCT
4783                    [(match_operand:VSTRUCT 1 "register_operand" "w")
4784                     (match_operand:V16QI 2 "register_operand" "w")]
4785                    UNSPEC_REV_REGLIST))]
4786   "TARGET_SIMD"
4787   "#"
4788   "&& reload_completed"
4789   [(const_int 0)]
4790 {
4791   int i;
4792   int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
4793   for (i = 0; i < nregs; i++)
4794     {
4795       rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
4796       rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
4797       emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
4798     }
4799   DONE;
4800 }
4801   [(set_attr "type" "neon_tbl1_q")
4802    (set_attr "length" "<insn_count>")]
4803 )
4804
4805 ;; Reload patterns for AdvSIMD register list operands.
4806
4807 (define_expand "mov<mode>"
4808   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
4809         (match_operand:VSTRUCT 1 "general_operand" ""))]
4810   "TARGET_SIMD"
4811 {
4812   if (can_create_pseudo_p ())
4813     {
4814       if (GET_CODE (operands[0]) != REG)
4815         operands[1] = force_reg (<MODE>mode, operands[1]);
4816     }
4817 })
4818
4819 (define_insn "*aarch64_mov<mode>"
4820   [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
4821         (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
4822   "TARGET_SIMD && !BYTES_BIG_ENDIAN
4823    && (register_operand (operands[0], <MODE>mode)
4824        || register_operand (operands[1], <MODE>mode))"
4825   "@
4826    #
4827    st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
4828    ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
4829   [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
4830                      neon_load<nregs>_<nregs>reg_q")
4831    (set_attr "length" "<insn_count>,4,4")]
4832 )
4833
4834 (define_insn "aarch64_be_ld1<mode>"
4835   [(set (match_operand:VALLDI_F16 0     "register_operand" "=w")
4836         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
4837                              "aarch64_simd_struct_operand" "Utv")]
4838         UNSPEC_LD1))]
4839   "TARGET_SIMD"
4840   "ld1\\t{%0<Vmtype>}, %1"
4841   [(set_attr "type" "neon_load1_1reg<q>")]
4842 )
4843
4844 (define_insn "aarch64_be_st1<mode>"
4845   [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
4846         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
4847         UNSPEC_ST1))]
4848   "TARGET_SIMD"
4849   "st1\\t{%1<Vmtype>}, %0"
4850   [(set_attr "type" "neon_store1_1reg<q>")]
4851 )
4852
4853 (define_insn "*aarch64_be_movoi"
4854   [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
4855         (match_operand:OI 1 "general_operand"      " w,w,m"))]
4856   "TARGET_SIMD && BYTES_BIG_ENDIAN
4857    && (register_operand (operands[0], OImode)
4858        || register_operand (operands[1], OImode))"
4859   "@
4860    #
4861    stp\\t%q1, %R1, %0
4862    ldp\\t%q0, %R0, %1"
4863   [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
4864    (set_attr "length" "8,4,4")]
4865 )
4866
4867 (define_insn "*aarch64_be_movci"
4868   [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
4869         (match_operand:CI 1 "general_operand"      " w,w,o"))]
4870   "TARGET_SIMD && BYTES_BIG_ENDIAN
4871    && (register_operand (operands[0], CImode)
4872        || register_operand (operands[1], CImode))"
4873   "#"
4874   [(set_attr "type" "multiple")
4875    (set_attr "length" "12,4,4")]
4876 )
4877
4878 (define_insn "*aarch64_be_movxi"
4879   [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
4880         (match_operand:XI 1 "general_operand"      " w,w,o"))]
4881   "TARGET_SIMD && BYTES_BIG_ENDIAN
4882    && (register_operand (operands[0], XImode)
4883        || register_operand (operands[1], XImode))"
4884   "#"
4885   [(set_attr "type" "multiple")
4886    (set_attr "length" "16,4,4")]
4887 )
4888
4889 (define_split
4890   [(set (match_operand:OI 0 "register_operand")
4891         (match_operand:OI 1 "register_operand"))]
4892   "TARGET_SIMD && reload_completed"
4893   [(const_int 0)]
4894 {
4895   aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
4896   DONE;
4897 })
4898
4899 (define_split
4900   [(set (match_operand:CI 0 "nonimmediate_operand")
4901         (match_operand:CI 1 "general_operand"))]
4902   "TARGET_SIMD && reload_completed"
4903   [(const_int 0)]
4904 {
4905   if (register_operand (operands[0], CImode)
4906       && register_operand (operands[1], CImode))
4907     {
4908       aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
4909       DONE;
4910     }
4911   else if (BYTES_BIG_ENDIAN)
4912     {
4913       emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
4914                       simplify_gen_subreg (OImode, operands[1], CImode, 0));
4915       emit_move_insn (gen_lowpart (V16QImode,
4916                                    simplify_gen_subreg (TImode, operands[0],
4917                                                         CImode, 32)),
4918                       gen_lowpart (V16QImode,
4919                                    simplify_gen_subreg (TImode, operands[1],
4920                                                         CImode, 32)));
4921       DONE;
4922     }
4923   else
4924     FAIL;
4925 })
4926
4927 (define_split
4928   [(set (match_operand:XI 0 "nonimmediate_operand")
4929         (match_operand:XI 1 "general_operand"))]
4930   "TARGET_SIMD && reload_completed"
4931   [(const_int 0)]
4932 {
4933   if (register_operand (operands[0], XImode)
4934       && register_operand (operands[1], XImode))
4935     {
4936       aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
4937       DONE;
4938     }
4939   else if (BYTES_BIG_ENDIAN)
4940     {
4941       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
4942                       simplify_gen_subreg (OImode, operands[1], XImode, 0));
4943       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
4944                       simplify_gen_subreg (OImode, operands[1], XImode, 32));
4945       DONE;
4946     }
4947   else
4948     FAIL;
4949 })
4950
4951 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
4952   [(match_operand:VSTRUCT 0 "register_operand" "=w")
4953    (match_operand:DI 1 "register_operand" "w")
4954    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4955   "TARGET_SIMD"
4956 {
4957   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
4958   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
4959                      * <VSTRUCT:nregs>);
4960
4961   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
4962                                                                 mem));
4963   DONE;
4964 })
4965
4966 (define_insn "aarch64_ld2<mode>_dreg"
4967   [(set (match_operand:OI 0 "register_operand" "=w")
4968         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4969                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4970                    UNSPEC_LD2_DREG))]
4971   "TARGET_SIMD"
4972   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4973   [(set_attr "type" "neon_load2_2reg<q>")]
4974 )
4975
4976 (define_insn "aarch64_ld2<mode>_dreg"
4977   [(set (match_operand:OI 0 "register_operand" "=w")
4978         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4979                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4980                    UNSPEC_LD2_DREG))]
4981   "TARGET_SIMD"
4982   "ld1\\t{%S0.1d - %T0.1d}, %1"
4983   [(set_attr "type" "neon_load1_2reg<q>")]
4984 )
4985
4986 (define_insn "aarch64_ld3<mode>_dreg"
4987   [(set (match_operand:CI 0 "register_operand" "=w")
4988         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4989                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4990                    UNSPEC_LD3_DREG))]
4991   "TARGET_SIMD"
4992   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4993   [(set_attr "type" "neon_load3_3reg<q>")]
4994 )
4995
4996 (define_insn "aarch64_ld3<mode>_dreg"
4997   [(set (match_operand:CI 0 "register_operand" "=w")
4998         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4999                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5000                    UNSPEC_LD3_DREG))]
5001   "TARGET_SIMD"
5002   "ld1\\t{%S0.1d - %U0.1d}, %1"
5003   [(set_attr "type" "neon_load1_3reg<q>")]
5004 )
5005
5006 (define_insn "aarch64_ld4<mode>_dreg"
5007   [(set (match_operand:XI 0 "register_operand" "=w")
5008         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5009                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5010                    UNSPEC_LD4_DREG))]
5011   "TARGET_SIMD"
5012   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5013   [(set_attr "type" "neon_load4_4reg<q>")]
5014 )
5015
5016 (define_insn "aarch64_ld4<mode>_dreg"
5017   [(set (match_operand:XI 0 "register_operand" "=w")
5018         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5019                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5020                    UNSPEC_LD4_DREG))]
5021   "TARGET_SIMD"
5022   "ld1\\t{%S0.1d - %V0.1d}, %1"
5023   [(set_attr "type" "neon_load1_4reg<q>")]
5024 )
5025
5026 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5027  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5028   (match_operand:DI 1 "register_operand" "r")
5029   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5030   "TARGET_SIMD"
5031 {
5032   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5033   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5034
5035   emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5036   DONE;
5037 })
5038
5039 (define_expand "aarch64_ld1<VALL_F16:mode>"
5040  [(match_operand:VALL_F16 0 "register_operand")
5041   (match_operand:DI 1 "register_operand")]
5042   "TARGET_SIMD"
5043 {
5044   machine_mode mode = <VALL_F16:MODE>mode;
5045   rtx mem = gen_rtx_MEM (mode, operands[1]);
5046
5047   if (BYTES_BIG_ENDIAN)
5048     emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5049   else
5050     emit_move_insn (operands[0], mem);
5051   DONE;
5052 })
5053
5054 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5055  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5056   (match_operand:DI 1 "register_operand" "r")
5057   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5058   "TARGET_SIMD"
5059 {
5060   machine_mode mode = <VSTRUCT:MODE>mode;
5061   rtx mem = gen_rtx_MEM (mode, operands[1]);
5062
5063   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5064   DONE;
5065 })
5066
5067 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5068   [(match_operand:VSTRUCT 0 "register_operand" "=w")
5069         (match_operand:DI 1 "register_operand" "w")
5070         (match_operand:VSTRUCT 2 "register_operand" "0")
5071         (match_operand:SI 3 "immediate_operand" "i")
5072         (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5073   "TARGET_SIMD"
5074 {
5075   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5076   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5077                      * <VSTRUCT:nregs>);
5078
5079   aarch64_simd_lane_bounds (operands[3], 0,
5080                             GET_MODE_NUNITS (<VALLDIF:MODE>mode),
5081                             NULL);
5082   emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5083         operands[0], mem, operands[2], operands[3]));
5084   DONE;
5085 })
5086
5087 ;; Expanders for builtins to extract vector registers from large
5088 ;; opaque integer modes.
5089
5090 ;; D-register list.
5091
5092 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5093  [(match_operand:VDC 0 "register_operand" "=w")
5094   (match_operand:VSTRUCT 1 "register_operand" "w")
5095   (match_operand:SI 2 "immediate_operand" "i")]
5096   "TARGET_SIMD"
5097 {
5098   int part = INTVAL (operands[2]);
5099   rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5100   int offset = part * 16;
5101
5102   emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5103   emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5104   DONE;
5105 })
5106
5107 ;; Q-register list.
5108
5109 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5110  [(match_operand:VQ 0 "register_operand" "=w")
5111   (match_operand:VSTRUCT 1 "register_operand" "w")
5112   (match_operand:SI 2 "immediate_operand" "i")]
5113   "TARGET_SIMD"
5114 {
5115   int part = INTVAL (operands[2]);
5116   int offset = part * 16;
5117
5118   emit_move_insn (operands[0],
5119                   gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5120   DONE;
5121 })
5122
5123 ;; Permuted-store expanders for neon intrinsics.
5124
5125 ;; Permute instructions
5126
5127 ;; vec_perm support
5128
5129 (define_expand "vec_perm_const<mode>"
5130   [(match_operand:VALL_F16 0 "register_operand")
5131    (match_operand:VALL_F16 1 "register_operand")
5132    (match_operand:VALL_F16 2 "register_operand")
5133    (match_operand:<V_INT_EQUIV> 3)]
5134   "TARGET_SIMD"
5135 {
5136   if (aarch64_expand_vec_perm_const (operands[0], operands[1],
5137                                      operands[2], operands[3]))
5138     DONE;
5139   else
5140     FAIL;
5141 })
5142
5143 (define_expand "vec_perm<mode>"
5144   [(match_operand:VB 0 "register_operand")
5145    (match_operand:VB 1 "register_operand")
5146    (match_operand:VB 2 "register_operand")
5147    (match_operand:VB 3 "register_operand")]
5148   "TARGET_SIMD"
5149 {
5150   aarch64_expand_vec_perm (operands[0], operands[1],
5151                            operands[2], operands[3]);
5152   DONE;
5153 })
5154
5155 (define_insn "aarch64_tbl1<mode>"
5156   [(set (match_operand:VB 0 "register_operand" "=w")
5157         (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5158                     (match_operand:VB 2 "register_operand" "w")]
5159                    UNSPEC_TBL))]
5160   "TARGET_SIMD"
5161   "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5162   [(set_attr "type" "neon_tbl1<q>")]
5163 )
5164
5165 ;; Two source registers.
5166
5167 (define_insn "aarch64_tbl2v16qi"
5168   [(set (match_operand:V16QI 0 "register_operand" "=w")
5169         (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5170                        (match_operand:V16QI 2 "register_operand" "w")]
5171                       UNSPEC_TBL))]
5172   "TARGET_SIMD"
5173   "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5174   [(set_attr "type" "neon_tbl2_q")]
5175 )
5176
5177 (define_insn "aarch64_tbl3<mode>"
5178   [(set (match_operand:VB 0 "register_operand" "=w")
5179         (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5180                       (match_operand:VB 2 "register_operand" "w")]
5181                       UNSPEC_TBL))]
5182   "TARGET_SIMD"
5183   "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5184   [(set_attr "type" "neon_tbl3")]
5185 )
5186
5187 (define_insn "aarch64_tbx4<mode>"
5188   [(set (match_operand:VB 0 "register_operand" "=w")
5189         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5190                       (match_operand:OI 2 "register_operand" "w")
5191                       (match_operand:VB 3 "register_operand" "w")]
5192                       UNSPEC_TBX))]
5193   "TARGET_SIMD"
5194   "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5195   [(set_attr "type" "neon_tbl4")]
5196 )
5197
5198 ;; Three source registers.
5199
5200 (define_insn "aarch64_qtbl3<mode>"
5201   [(set (match_operand:VB 0 "register_operand" "=w")
5202         (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5203                       (match_operand:VB 2 "register_operand" "w")]
5204                       UNSPEC_TBL))]
5205   "TARGET_SIMD"
5206   "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5207   [(set_attr "type" "neon_tbl3")]
5208 )
5209
5210 (define_insn "aarch64_qtbx3<mode>"
5211   [(set (match_operand:VB 0 "register_operand" "=w")
5212         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5213                       (match_operand:CI 2 "register_operand" "w")
5214                       (match_operand:VB 3 "register_operand" "w")]
5215                       UNSPEC_TBX))]
5216   "TARGET_SIMD"
5217   "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5218   [(set_attr "type" "neon_tbl3")]
5219 )
5220
5221 ;; Four source registers.
5222
5223 (define_insn "aarch64_qtbl4<mode>"
5224   [(set (match_operand:VB 0 "register_operand" "=w")
5225         (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5226                       (match_operand:VB 2 "register_operand" "w")]
5227                       UNSPEC_TBL))]
5228   "TARGET_SIMD"
5229   "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5230   [(set_attr "type" "neon_tbl4")]
5231 )
5232
5233 (define_insn "aarch64_qtbx4<mode>"
5234   [(set (match_operand:VB 0 "register_operand" "=w")
5235         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5236                       (match_operand:XI 2 "register_operand" "w")
5237                       (match_operand:VB 3 "register_operand" "w")]
5238                       UNSPEC_TBX))]
5239   "TARGET_SIMD"
5240   "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5241   [(set_attr "type" "neon_tbl4")]
5242 )
5243
5244 (define_insn_and_split "aarch64_combinev16qi"
5245   [(set (match_operand:OI 0 "register_operand" "=w")
5246         (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5247                     (match_operand:V16QI 2 "register_operand" "w")]
5248                    UNSPEC_CONCAT))]
5249   "TARGET_SIMD"
5250   "#"
5251   "&& reload_completed"
5252   [(const_int 0)]
5253 {
5254   aarch64_split_combinev16qi (operands);
5255   DONE;
5256 }
5257 [(set_attr "type" "multiple")]
5258 )
5259
5260 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5261   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5262         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5263                           (match_operand:VALL_F16 2 "register_operand" "w")]
5264          PERMUTE))]
5265   "TARGET_SIMD"
5266   "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5267   [(set_attr "type" "neon_permute<q>")]
5268 )
5269
5270 ;; Note immediate (third) operand is lane index not byte index.
5271 (define_insn "aarch64_ext<mode>"
5272   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5273         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5274                           (match_operand:VALL_F16 2 "register_operand" "w")
5275                           (match_operand:SI 3 "immediate_operand" "i")]
5276          UNSPEC_EXT))]
5277   "TARGET_SIMD"
5278 {
5279   operands[3] = GEN_INT (INTVAL (operands[3])
5280       * GET_MODE_UNIT_SIZE (<MODE>mode));
5281   return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5282 }
5283   [(set_attr "type" "neon_ext<q>")]
5284 )
5285
5286 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5287   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5288         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5289                     REVERSE))]
5290   "TARGET_SIMD"
5291   "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5292   [(set_attr "type" "neon_rev<q>")]
5293 )
5294
5295 (define_insn "aarch64_st2<mode>_dreg"
5296   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5297         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5298                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5299                    UNSPEC_ST2))]
5300   "TARGET_SIMD"
5301   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5302   [(set_attr "type" "neon_store2_2reg")]
5303 )
5304
5305 (define_insn "aarch64_st2<mode>_dreg"
5306   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5307         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5308                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5309                    UNSPEC_ST2))]
5310   "TARGET_SIMD"
5311   "st1\\t{%S1.1d - %T1.1d}, %0"
5312   [(set_attr "type" "neon_store1_2reg")]
5313 )
5314
5315 (define_insn "aarch64_st3<mode>_dreg"
5316   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5317         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5318                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5319                    UNSPEC_ST3))]
5320   "TARGET_SIMD"
5321   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5322   [(set_attr "type" "neon_store3_3reg")]
5323 )
5324
5325 (define_insn "aarch64_st3<mode>_dreg"
5326   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5327         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5328                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5329                    UNSPEC_ST3))]
5330   "TARGET_SIMD"
5331   "st1\\t{%S1.1d - %U1.1d}, %0"
5332   [(set_attr "type" "neon_store1_3reg")]
5333 )
5334
5335 (define_insn "aarch64_st4<mode>_dreg"
5336   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5337         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5338                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5339                    UNSPEC_ST4))]
5340   "TARGET_SIMD"
5341   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5342   [(set_attr "type" "neon_store4_4reg")]
5343 )
5344
5345 (define_insn "aarch64_st4<mode>_dreg"
5346   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5347         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5348                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5349                    UNSPEC_ST4))]
5350   "TARGET_SIMD"
5351   "st1\\t{%S1.1d - %V1.1d}, %0"
5352   [(set_attr "type" "neon_store1_4reg")]
5353 )
5354
5355 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5356  [(match_operand:DI 0 "register_operand" "r")
5357   (match_operand:VSTRUCT 1 "register_operand" "w")
5358   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5359   "TARGET_SIMD"
5360 {
5361   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5362   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5363
5364   emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5365   DONE;
5366 })
5367
5368 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5369  [(match_operand:DI 0 "register_operand" "r")
5370   (match_operand:VSTRUCT 1 "register_operand" "w")
5371   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5372   "TARGET_SIMD"
5373 {
5374   machine_mode mode = <VSTRUCT:MODE>mode;
5375   rtx mem = gen_rtx_MEM (mode, operands[0]);
5376
5377   emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5378   DONE;
5379 })
5380
5381 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5382  [(match_operand:DI 0 "register_operand" "r")
5383   (match_operand:VSTRUCT 1 "register_operand" "w")
5384   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5385   (match_operand:SI 2 "immediate_operand")]
5386   "TARGET_SIMD"
5387 {
5388   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5389   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5390                      * <VSTRUCT:nregs>);
5391
5392   emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5393                 mem, operands[1], operands[2]));
5394   DONE;
5395 })
5396
5397 (define_expand "aarch64_st1<VALL_F16:mode>"
5398  [(match_operand:DI 0 "register_operand")
5399   (match_operand:VALL_F16 1 "register_operand")]
5400   "TARGET_SIMD"
5401 {
5402   machine_mode mode = <VALL_F16:MODE>mode;
5403   rtx mem = gen_rtx_MEM (mode, operands[0]);
5404
5405   if (BYTES_BIG_ENDIAN)
5406     emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5407   else
5408     emit_move_insn (mem, operands[1]);
5409   DONE;
5410 })
5411
5412 ;; Expander for builtins to insert vector registers into large
5413 ;; opaque integer modes.
5414
5415 ;; Q-register list.  We don't need a D-reg inserter as we zero
5416 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5417
5418 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5419  [(match_operand:VSTRUCT 0 "register_operand" "+w")
5420   (match_operand:VSTRUCT 1 "register_operand" "0")
5421   (match_operand:VQ 2 "register_operand" "w")
5422   (match_operand:SI 3 "immediate_operand" "i")]
5423   "TARGET_SIMD"
5424 {
5425   int part = INTVAL (operands[3]);
5426   int offset = part * 16;
5427
5428   emit_move_insn (operands[0], operands[1]);
5429   emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5430                   operands[2]);
5431   DONE;
5432 })
5433
5434 ;; Standard pattern name vec_init<mode><Vel>.
5435
5436 (define_expand "vec_init<mode><Vel>"
5437   [(match_operand:VALL_F16 0 "register_operand" "")
5438    (match_operand 1 "" "")]
5439   "TARGET_SIMD"
5440 {
5441   aarch64_expand_vector_init (operands[0], operands[1]);
5442   DONE;
5443 })
5444
5445 (define_insn "*aarch64_simd_ld1r<mode>"
5446   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5447         (vec_duplicate:VALL_F16
5448           (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5449   "TARGET_SIMD"
5450   "ld1r\\t{%0.<Vtype>}, %1"
5451   [(set_attr "type" "neon_load1_all_lanes")]
5452 )
5453
5454 (define_insn "aarch64_frecpe<mode>"
5455   [(set (match_operand:VHSDF 0 "register_operand" "=w")
5456         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5457          UNSPEC_FRECPE))]
5458   "TARGET_SIMD"
5459   "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5460   [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5461 )
5462
5463 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5464   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5465         (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5466          FRECP))]
5467   "TARGET_SIMD"
5468   "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5469   [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5470 )
5471
5472 (define_insn "aarch64_frecps<mode>"
5473   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5474         (unspec:VHSDF_HSDF
5475           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5476           (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5477           UNSPEC_FRECPS))]
5478   "TARGET_SIMD"
5479   "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5480   [(set_attr "type" "neon_fp_recps_<stype><q>")]
5481 )
5482
5483 (define_insn "aarch64_urecpe<mode>"
5484   [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5485         (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5486                 UNSPEC_URECPE))]
5487  "TARGET_SIMD"
5488  "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5489   [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5490
5491 ;; Standard pattern name vec_extract<mode><Vel>.
5492
5493 (define_expand "vec_extract<mode><Vel>"
5494   [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5495    (match_operand:VALL_F16 1 "register_operand" "")
5496    (match_operand:SI 2 "immediate_operand" "")]
5497   "TARGET_SIMD"
5498 {
5499     emit_insn
5500       (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5501     DONE;
5502 })
5503
5504 ;; aes
5505
5506 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5507   [(set (match_operand:V16QI 0 "register_operand" "=w")
5508         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5509                        (match_operand:V16QI 2 "register_operand" "w")]
5510          CRYPTO_AES))]
5511   "TARGET_SIMD && TARGET_CRYPTO"
5512   "aes<aes_op>\\t%0.16b, %2.16b"
5513   [(set_attr "type" "crypto_aese")]
5514 )
5515
5516 ;; When AES/AESMC fusion is enabled we want the register allocation to
5517 ;; look like:
5518 ;;    AESE Vn, _
5519 ;;    AESMC Vn, Vn
5520 ;; So prefer to tie operand 1 to operand 0 when fusing.
5521
5522 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5523   [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5524         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5525          CRYPTO_AESMC))]
5526   "TARGET_SIMD && TARGET_CRYPTO"
5527   "aes<aesmc_op>\\t%0.16b, %1.16b"
5528   [(set_attr "type" "crypto_aesmc")
5529    (set_attr_alternative "enabled"
5530      [(if_then_else (match_test
5531                        "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5532                      (const_string "yes" )
5533                      (const_string "no"))
5534       (const_string "yes")])]
5535 )
5536
5537 ;; sha1
5538
5539 (define_insn "aarch64_crypto_sha1hsi"
5540   [(set (match_operand:SI 0 "register_operand" "=w")
5541         (unspec:SI [(match_operand:SI 1
5542                        "register_operand" "w")]
5543          UNSPEC_SHA1H))]
5544   "TARGET_SIMD && TARGET_CRYPTO"
5545   "sha1h\\t%s0, %s1"
5546   [(set_attr "type" "crypto_sha1_fast")]
5547 )
5548
5549 (define_insn "aarch64_crypto_sha1hv4si"
5550   [(set (match_operand:SI 0 "register_operand" "=w")
5551         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5552                      (parallel [(const_int 0)]))]
5553          UNSPEC_SHA1H))]
5554   "TARGET_SIMD && TARGET_CRYPTO && !BYTES_BIG_ENDIAN"
5555   "sha1h\\t%s0, %s1"
5556   [(set_attr "type" "crypto_sha1_fast")]
5557 )
5558
5559 (define_insn "aarch64_be_crypto_sha1hv4si"
5560   [(set (match_operand:SI 0 "register_operand" "=w")
5561         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5562                      (parallel [(const_int 3)]))]
5563          UNSPEC_SHA1H))]
5564   "TARGET_SIMD && TARGET_CRYPTO && BYTES_BIG_ENDIAN"
5565   "sha1h\\t%s0, %s1"
5566   [(set_attr "type" "crypto_sha1_fast")]
5567 )
5568
5569 (define_insn "aarch64_crypto_sha1su1v4si"
5570   [(set (match_operand:V4SI 0 "register_operand" "=w")
5571         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5572                       (match_operand:V4SI 2 "register_operand" "w")]
5573          UNSPEC_SHA1SU1))]
5574   "TARGET_SIMD && TARGET_CRYPTO"
5575   "sha1su1\\t%0.4s, %2.4s"
5576   [(set_attr "type" "crypto_sha1_fast")]
5577 )
5578
5579 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5580   [(set (match_operand:V4SI 0 "register_operand" "=w")
5581         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5582                       (match_operand:SI 2 "register_operand" "w")
5583                       (match_operand:V4SI 3 "register_operand" "w")]
5584          CRYPTO_SHA1))]
5585   "TARGET_SIMD && TARGET_CRYPTO"
5586   "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5587   [(set_attr "type" "crypto_sha1_slow")]
5588 )
5589
5590 (define_insn "aarch64_crypto_sha1su0v4si"
5591   [(set (match_operand:V4SI 0 "register_operand" "=w")
5592         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5593                       (match_operand:V4SI 2 "register_operand" "w")
5594                       (match_operand:V4SI 3 "register_operand" "w")]
5595          UNSPEC_SHA1SU0))]
5596   "TARGET_SIMD && TARGET_CRYPTO"
5597   "sha1su0\\t%0.4s, %2.4s, %3.4s"
5598   [(set_attr "type" "crypto_sha1_xor")]
5599 )
5600
5601 ;; sha256
5602
5603 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
5604   [(set (match_operand:V4SI 0 "register_operand" "=w")
5605         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5606                       (match_operand:V4SI 2 "register_operand" "w")
5607                       (match_operand:V4SI 3 "register_operand" "w")]
5608          CRYPTO_SHA256))]
5609   "TARGET_SIMD && TARGET_CRYPTO"
5610   "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5611   [(set_attr "type" "crypto_sha256_slow")]
5612 )
5613
5614 (define_insn "aarch64_crypto_sha256su0v4si"
5615   [(set (match_operand:V4SI 0 "register_operand" "=w")
5616         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5617                       (match_operand:V4SI 2 "register_operand" "w")]
5618          UNSPEC_SHA256SU0))]
5619   "TARGET_SIMD &&TARGET_CRYPTO"
5620   "sha256su0\\t%0.4s, %2.4s"
5621   [(set_attr "type" "crypto_sha256_fast")]
5622 )
5623
5624 (define_insn "aarch64_crypto_sha256su1v4si"
5625   [(set (match_operand:V4SI 0 "register_operand" "=w")
5626         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5627                       (match_operand:V4SI 2 "register_operand" "w")
5628                       (match_operand:V4SI 3 "register_operand" "w")]
5629          UNSPEC_SHA256SU1))]
5630   "TARGET_SIMD &&TARGET_CRYPTO"
5631   "sha256su1\\t%0.4s, %2.4s, %3.4s"
5632   [(set_attr "type" "crypto_sha256_slow")]
5633 )
5634
5635 ;; pmull
5636
5637 (define_insn "aarch64_crypto_pmulldi"
5638   [(set (match_operand:TI 0 "register_operand" "=w")
5639         (unspec:TI  [(match_operand:DI 1 "register_operand" "w")
5640                      (match_operand:DI 2 "register_operand" "w")]
5641                     UNSPEC_PMULL))]
5642  "TARGET_SIMD && TARGET_CRYPTO"
5643  "pmull\\t%0.1q, %1.1d, %2.1d"
5644   [(set_attr "type" "crypto_pmull")]
5645 )
5646
5647 (define_insn "aarch64_crypto_pmullv2di"
5648  [(set (match_operand:TI 0 "register_operand" "=w")
5649        (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
5650                    (match_operand:V2DI 2 "register_operand" "w")]
5651                   UNSPEC_PMULL2))]
5652   "TARGET_SIMD && TARGET_CRYPTO"
5653   "pmull2\\t%0.1q, %1.2d, %2.2d"
5654   [(set_attr "type" "crypto_pmull")]
5655 )