gcc/config/aarch64/aarch64-simd.md

   1 ;; Machine description for AArch64 AdvSIMD architecture.
   2 ;; Copyright (C) 2011-2017 Free Software Foundation, Inc.
   3 ;; Contributed by ARM Ltd.
   4 ;;
   5 ;; This file is part of GCC.
   6 ;;
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published by
   9 ;; the Free Software Foundation; either version 3, or (at your option)
  10 ;; any later version.
  11 ;;
  12 ;; GCC is distributed in the hope that it will be useful, but
  13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 ;; General Public License for more details.
  16 ;;
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 (define_expand "mov<mode>"
  22   [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
  23         (match_operand:VALL_F16 1 "general_operand" ""))]
  24   "TARGET_SIMD"
  25   "
  26     if (GET_CODE (operands[0]) == MEM)
  27       operands[1] = force_reg (<MODE>mode, operands[1]);
  28   "
  29 )
  30
  31 (define_expand "movmisalign<mode>"
  32   [(set (match_operand:VALL 0 "nonimmediate_operand" "")
  33         (match_operand:VALL 1 "general_operand" ""))]
  34   "TARGET_SIMD"
  35 {
  36   /* This pattern is not permitted to fail during expansion: if both arguments
  37      are non-registers (e.g. memory := constant, which can be created by the
  38      auto-vectorizer), force operand 1 into a register.  */
  39   if (!register_operand (operands[0], <MODE>mode)
  40       && !register_operand (operands[1], <MODE>mode))
  41     operands[1] = force_reg (<MODE>mode, operands[1]);
  42 })
  43
  44 (define_insn "aarch64_simd_dup<mode>"
  45   [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
  46         (vec_duplicate:VDQ_I
  47           (match_operand:<VEL> 1 "register_operand" "r, w")))]
  48   "TARGET_SIMD"
  49   "@
  50    dup\\t%0.<Vtype>, %<vw>1
  51    dup\\t%0.<Vtype>, %1.<Vetype>[0]"
  52   [(set_attr "type" "neon_from_gp<q>, neon_dup<q>")]
  53 )
  54
  55 (define_insn "aarch64_simd_dup<mode>"
  56   [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
  57         (vec_duplicate:VDQF_F16
  58           (match_operand:<VEL> 1 "register_operand" "w")))]
  59   "TARGET_SIMD"
  60   "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
  61   [(set_attr "type" "neon_dup<q>")]
  62 )
  63
  64 (define_insn "aarch64_dup_lane<mode>"
  65   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
  66         (vec_duplicate:VALL_F16
  67           (vec_select:<VEL>
  68             (match_operand:VALL_F16 1 "register_operand" "w")
  69             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  70           )))]
  71   "TARGET_SIMD"
  72   {
  73     operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
  74     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
  75   }
  76   [(set_attr "type" "neon_dup<q>")]
  77 )
  78
  79 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
  80   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
  81         (vec_duplicate:VALL_F16_NO_V2Q
  82           (vec_select:<VEL>
  83             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
  84             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
  85           )))]
  86   "TARGET_SIMD"
  87   {
  88     operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
  89                                           INTVAL (operands[2])));
  90     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
  91   }
  92   [(set_attr "type" "neon_dup<q>")]
  93 )
  94
  95 (define_insn "*aarch64_simd_mov<mode>"
  96   [(set (match_operand:VD 0 "nonimmediate_operand"
  97                 "=w, m,  w, ?r, ?w, ?r, w")
  98         (match_operand:VD 1 "general_operand"
  99                 "m,  w,  w,  w,  r,  r, Dn"))]
 100   "TARGET_SIMD
 101    && (register_operand (operands[0], <MODE>mode)
 102        || register_operand (operands[1], <MODE>mode))"
 103 {
 104    switch (which_alternative)
 105      {
 106      case 0: return "ldr\\t%d0, %1";
 107      case 1: return "str\\t%d1, %0";
 108      case 2: return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
 109      case 3: return "umov\t%0, %1.d[0]";
 110      case 4: return "ins\t%0.d[0], %1";
 111      case 5: return "mov\t%0, %1";
 112      case 6:
 113         return aarch64_output_simd_mov_immediate (operands[1],
 114                                                   <MODE>mode, 64);
 115      default: gcc_unreachable ();
 116      }
 117 }
 118   [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
 119                      neon_logic<q>, neon_to_gp<q>, neon_from_gp<q>,\
 120                      mov_reg, neon_move<q>")]
 121 )
 122
 123 (define_insn "*aarch64_simd_mov<mode>"
 124   [(set (match_operand:VQ 0 "nonimmediate_operand"
 125                 "=w, m,  w, ?r, ?w, ?r, w")
 126         (match_operand:VQ 1 "general_operand"
 127                 "m,  w,  w,  w,  r,  r, Dn"))]
 128   "TARGET_SIMD
 129    && (register_operand (operands[0], <MODE>mode)
 130        || register_operand (operands[1], <MODE>mode))"
 131 {
 132   switch (which_alternative)
 133     {
 134     case 0:
 135         return "ldr\\t%q0, %1";
 136     case 1:
 137         return "str\\t%q1, %0";
 138     case 2:
 139         return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
 140     case 3:
 141     case 4:
 142     case 5:
 143         return "#";
 144     case 6:
 145         return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
 146     default:
 147         gcc_unreachable ();
 148     }
 149 }
 150   [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
 151                      neon_logic<q>, multiple, multiple, multiple,\
 152                      neon_move<q>")
 153    (set_attr "length" "4,4,4,8,8,8,4")]
 154 )
 155
 156 (define_insn "load_pair<mode>"
 157   [(set (match_operand:VD 0 "register_operand" "=w")
 158         (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
 159    (set (match_operand:VD 2 "register_operand" "=w")
 160         (match_operand:VD 3 "memory_operand" "m"))]
 161   "TARGET_SIMD
 162    && rtx_equal_p (XEXP (operands[3], 0),
 163                    plus_constant (Pmode,
 164                                   XEXP (operands[1], 0),
 165                                   GET_MODE_SIZE (<MODE>mode)))"
 166   "ldp\\t%d0, %d2, %1"
 167   [(set_attr "type" "neon_ldp")]
 168 )
 169
 170 (define_insn "store_pair<mode>"
 171   [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
 172         (match_operand:VD 1 "register_operand" "w"))
 173    (set (match_operand:VD 2 "memory_operand" "=m")
 174         (match_operand:VD 3 "register_operand" "w"))]
 175   "TARGET_SIMD
 176    && rtx_equal_p (XEXP (operands[2], 0),
 177                    plus_constant (Pmode,
 178                                   XEXP (operands[0], 0),
 179                                   GET_MODE_SIZE (<MODE>mode)))"
 180   "stp\\t%d1, %d3, %0"
 181   [(set_attr "type" "neon_stp")]
 182 )
 183
 184 (define_split
 185   [(set (match_operand:VQ 0 "register_operand" "")
 186       (match_operand:VQ 1 "register_operand" ""))]
 187   "TARGET_SIMD && reload_completed
 188    && GP_REGNUM_P (REGNO (operands[0]))
 189    && GP_REGNUM_P (REGNO (operands[1]))"
 190   [(const_int 0)]
 191 {
 192   aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
 193   DONE;
 194 })
 195
 196 (define_split
 197   [(set (match_operand:VQ 0 "register_operand" "")
 198         (match_operand:VQ 1 "register_operand" ""))]
 199   "TARGET_SIMD && reload_completed
 200    && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
 201        || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
 202   [(const_int 0)]
 203 {
 204   aarch64_split_simd_move (operands[0], operands[1]);
 205   DONE;
 206 })
 207
 208 (define_expand "aarch64_split_simd_mov<mode>"
 209   [(set (match_operand:VQ 0)
 210         (match_operand:VQ 1))]
 211   "TARGET_SIMD"
 212   {
 213     rtx dst = operands[0];
 214     rtx src = operands[1];
 215
 216     if (GP_REGNUM_P (REGNO (src)))
 217       {
 218         rtx src_low_part = gen_lowpart (<VHALF>mode, src);
 219         rtx src_high_part = gen_highpart (<VHALF>mode, src);
 220
 221         emit_insn
 222           (gen_move_lo_quad_<mode> (dst, src_low_part));
 223         emit_insn
 224           (gen_move_hi_quad_<mode> (dst, src_high_part));
 225       }
 226
 227     else
 228       {
 229         rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
 230         rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
 231         rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
 232         rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
 233
 234         emit_insn
 235           (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
 236         emit_insn
 237           (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
 238       }
 239     DONE;
 240   }
 241 )
 242
 243 (define_insn "aarch64_simd_mov_from_<mode>low"
 244   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
 245         (vec_select:<VHALF>
 246           (match_operand:VQ 1 "register_operand" "w")
 247           (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
 248   "TARGET_SIMD && reload_completed"
 249   "umov\t%0, %1.d[0]"
 250   [(set_attr "type" "neon_to_gp<q>")
 251    (set_attr "length" "4")
 252   ])
 253
 254 (define_insn "aarch64_simd_mov_from_<mode>high"
 255   [(set (match_operand:<VHALF> 0 "register_operand" "=r")
 256         (vec_select:<VHALF>
 257           (match_operand:VQ 1 "register_operand" "w")
 258           (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
 259   "TARGET_SIMD && reload_completed"
 260   "umov\t%0, %1.d[1]"
 261   [(set_attr "type" "neon_to_gp<q>")
 262    (set_attr "length" "4")
 263   ])
 264
 265 (define_insn "orn<mode>3"
 266  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 267        (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 268                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 269  "TARGET_SIMD"
 270  "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 271   [(set_attr "type" "neon_logic<q>")]
 272 )
 273
 274 (define_insn "bic<mode>3"
 275  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 276        (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
 277                 (match_operand:VDQ_I 2 "register_operand" "w")))]
 278  "TARGET_SIMD"
 279  "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
 280   [(set_attr "type" "neon_logic<q>")]
 281 )
 282
 283 (define_insn "add<mode>3"
 284   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 285         (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 286                   (match_operand:VDQ_I 2 "register_operand" "w")))]
 287   "TARGET_SIMD"
 288   "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 289   [(set_attr "type" "neon_add<q>")]
 290 )
 291
 292 (define_insn "sub<mode>3"
 293   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 294         (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 295                    (match_operand:VDQ_I 2 "register_operand" "w")))]
 296   "TARGET_SIMD"
 297   "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 298   [(set_attr "type" "neon_sub<q>")]
 299 )
 300
 301 (define_insn "mul<mode>3"
 302   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 303         (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
 304                    (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
 305   "TARGET_SIMD"
 306   "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 307   [(set_attr "type" "neon_mul_<Vetype><q>")]
 308 )
 309
 310 (define_insn "bswap<mode>2"
 311   [(set (match_operand:VDQHSD 0 "register_operand" "=w")
 312         (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
 313   "TARGET_SIMD"
 314   "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
 315   [(set_attr "type" "neon_rev<q>")]
 316 )
 317
 318 (define_insn "aarch64_rbit<mode>"
 319   [(set (match_operand:VB 0 "register_operand" "=w")
 320         (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
 321                    UNSPEC_RBIT))]
 322   "TARGET_SIMD"
 323   "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
 324   [(set_attr "type" "neon_rbit")]
 325 )
 326
 327 (define_expand "ctz<mode>2"
 328   [(set (match_operand:VS 0 "register_operand")
 329         (ctz:VS (match_operand:VS 1 "register_operand")))]
 330   "TARGET_SIMD"
 331   {
 332      emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
 333      rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
 334                                              <MODE>mode, 0);
 335      emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
 336      emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
 337      DONE;
 338   }
 339 )
 340
 341 (define_expand "copysign<mode>3"
 342   [(match_operand:VHSDF 0 "register_operand")
 343    (match_operand:VHSDF 1 "register_operand")
 344    (match_operand:VHSDF 2 "register_operand")]
 345   "TARGET_FLOAT && TARGET_SIMD"
 346 {
 347   rtx v_bitmask = gen_reg_rtx (<V_cmp_result>mode);
 348   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 349
 350   emit_move_insn (v_bitmask,
 351                   aarch64_simd_gen_const_vector_dup (<V_cmp_result>mode,
 352                                                      HOST_WIDE_INT_M1U << bits));
 353   emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
 354                                          operands[2], operands[1]));
 355   DONE;
 356 }
 357 )
 358
 359 (define_insn "*aarch64_mul3_elt<mode>"
 360  [(set (match_operand:VMUL 0 "register_operand" "=w")
 361     (mult:VMUL
 362       (vec_duplicate:VMUL
 363           (vec_select:<VEL>
 364             (match_operand:VMUL 1 "register_operand" "<h_con>")
 365             (parallel [(match_operand:SI 2 "immediate_operand")])))
 366       (match_operand:VMUL 3 "register_operand" "w")))]
 367   "TARGET_SIMD"
 368   {
 369     operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
 370     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 371   }
 372   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 373 )
 374
 375 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
 376   [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
 377      (mult:VMUL_CHANGE_NLANES
 378        (vec_duplicate:VMUL_CHANGE_NLANES
 379           (vec_select:<VEL>
 380             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
 381             (parallel [(match_operand:SI 2 "immediate_operand")])))
 382       (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
 383   "TARGET_SIMD"
 384   {
 385     operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
 386                                           INTVAL (operands[2])));
 387     return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
 388   }
 389   [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
 390 )
 391
 392 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
 393  [(set (match_operand:VMUL 0 "register_operand" "=w")
 394     (mult:VMUL
 395       (vec_duplicate:VMUL
 396             (match_operand:<VEL> 1 "register_operand" "<h_con>"))
 397       (match_operand:VMUL 2 "register_operand" "w")))]
 398   "TARGET_SIMD"
 399   "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
 400   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 401 )
 402
 403 (define_insn "aarch64_rsqrte<mode>"
 404   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 405         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
 406                      UNSPEC_RSQRTE))]
 407   "TARGET_SIMD"
 408   "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 409   [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
 410
 411 (define_insn "aarch64_rsqrts<mode>"
 412   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 413         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
 414                             (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
 415          UNSPEC_RSQRTS))]
 416   "TARGET_SIMD"
 417   "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 418   [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
 419
 420 (define_expand "rsqrt<mode>2"
 421   [(set (match_operand:VALLF 0 "register_operand" "=w")
 422         (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
 423                      UNSPEC_RSQRT))]
 424   "TARGET_SIMD"
 425 {
 426   aarch64_emit_approx_sqrt (operands[0], operands[1], true);
 427   DONE;
 428 })
 429
 430 (define_insn "*aarch64_mul3_elt_to_64v2df"
 431   [(set (match_operand:DF 0 "register_operand" "=w")
 432      (mult:DF
 433        (vec_select:DF
 434          (match_operand:V2DF 1 "register_operand" "w")
 435          (parallel [(match_operand:SI 2 "immediate_operand")]))
 436        (match_operand:DF 3 "register_operand" "w")))]
 437   "TARGET_SIMD"
 438   {
 439     operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
 440     return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
 441   }
 442   [(set_attr "type" "neon_fp_mul_d_scalar_q")]
 443 )
 444
 445 (define_insn "neg<mode>2"
 446   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 447         (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 448   "TARGET_SIMD"
 449   "neg\t%0.<Vtype>, %1.<Vtype>"
 450   [(set_attr "type" "neon_neg<q>")]
 451 )
 452
 453 (define_insn "abs<mode>2"
 454   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 455         (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 456   "TARGET_SIMD"
 457   "abs\t%0.<Vtype>, %1.<Vtype>"
 458   [(set_attr "type" "neon_abs<q>")]
 459 )
 460
 461 ;; The intrinsic version of integer ABS must not be allowed to
 462 ;; combine with any operation with an integerated ABS step, such
 463 ;; as SABD.
 464 (define_insn "aarch64_abs<mode>"
 465   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
 466           (unspec:VSDQ_I_DI
 467             [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
 468            UNSPEC_ABS))]
 469   "TARGET_SIMD"
 470   "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 471   [(set_attr "type" "neon_abs<q>")]
 472 )
 473
 474 (define_insn "abd<mode>_3"
 475   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 476         (abs:VDQ_BHSI (minus:VDQ_BHSI
 477                        (match_operand:VDQ_BHSI 1 "register_operand" "w")
 478                        (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
 479   "TARGET_SIMD"
 480   "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 481   [(set_attr "type" "neon_abd<q>")]
 482 )
 483
 484 (define_insn "aba<mode>_3"
 485   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 486         (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
 487                          (match_operand:VDQ_BHSI 1 "register_operand" "w")
 488                          (match_operand:VDQ_BHSI 2 "register_operand" "w")))
 489                        (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
 490   "TARGET_SIMD"
 491   "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 492   [(set_attr "type" "neon_arith_acc<q>")]
 493 )
 494
 495 (define_insn "fabd<mode>3"
 496   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 497         (abs:VHSDF_HSDF
 498           (minus:VHSDF_HSDF
 499             (match_operand:VHSDF_HSDF 1 "register_operand" "w")
 500             (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
 501   "TARGET_SIMD"
 502   "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 503   [(set_attr "type" "neon_fp_abd_<stype><q>")]
 504 )
 505
 506 (define_insn "and<mode>3"
 507   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 508         (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 509                  (match_operand:VDQ_I 2 "register_operand" "w")))]
 510   "TARGET_SIMD"
 511   "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
 512   [(set_attr "type" "neon_logic<q>")]
 513 )
 514
 515 (define_insn "ior<mode>3"
 516   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 517         (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 518                  (match_operand:VDQ_I 2 "register_operand" "w")))]
 519   "TARGET_SIMD"
 520   "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
 521   [(set_attr "type" "neon_logic<q>")]
 522 )
 523
 524 (define_insn "xor<mode>3"
 525   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 526         (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 527                  (match_operand:VDQ_I 2 "register_operand" "w")))]
 528   "TARGET_SIMD"
 529   "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
 530   [(set_attr "type" "neon_logic<q>")]
 531 )
 532
 533 (define_insn "one_cmpl<mode>2"
 534   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 535         (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 536   "TARGET_SIMD"
 537   "not\t%0.<Vbtype>, %1.<Vbtype>"
 538   [(set_attr "type" "neon_logic<q>")]
 539 )
 540
 541 (define_insn "aarch64_simd_vec_set<mode>"
 542   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
 543         (vec_merge:VDQ_BHSI
 544             (vec_duplicate:VDQ_BHSI
 545                 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
 546             (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
 547             (match_operand:SI 2 "immediate_operand" "i,i,i")))]
 548   "TARGET_SIMD"
 549   {
 550    int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
 551    operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
 552    switch (which_alternative)
 553      {
 554      case 0:
 555         return "ins\\t%0.<Vetype>[%p2], %w1";
 556      case 1:
 557         return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
 558      case 2:
 559         return "ld1\\t{%0.<Vetype>}[%p2], %1";
 560      default:
 561         gcc_unreachable ();
 562      }
 563   }
 564   [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_1reg<q>")]
 565 )
 566
 567 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
 568   [(set (match_operand:VALL 0 "register_operand" "=w")
 569         (vec_merge:VALL
 570             (vec_duplicate:VALL
 571               (vec_select:<VEL>
 572                 (match_operand:VALL 3 "register_operand" "w")
 573                 (parallel
 574                   [(match_operand:SI 4 "immediate_operand" "i")])))
 575             (match_operand:VALL 1 "register_operand" "0")
 576             (match_operand:SI 2 "immediate_operand" "i")))]
 577   "TARGET_SIMD"
 578   {
 579     int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
 580     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
 581     operands[4] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[4])));
 582
 583     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
 584   }
 585   [(set_attr "type" "neon_ins<q>")]
 586 )
 587
 588 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
 589   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
 590         (vec_merge:VALL_F16_NO_V2Q
 591             (vec_duplicate:VALL_F16_NO_V2Q
 592               (vec_select:<VEL>
 593                 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
 594                 (parallel
 595                   [(match_operand:SI 4 "immediate_operand" "i")])))
 596             (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
 597             (match_operand:SI 2 "immediate_operand" "i")))]
 598   "TARGET_SIMD"
 599   {
 600     int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
 601     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
 602     operands[4] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
 603                            INTVAL (operands[4])));
 604
 605     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
 606   }
 607   [(set_attr "type" "neon_ins<q>")]
 608 )
 609
 610 (define_insn "aarch64_simd_lshr<mode>"
 611  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 612        (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 613                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
 614  "TARGET_SIMD"
 615  "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
 616   [(set_attr "type" "neon_shift_imm<q>")]
 617 )
 618
 619 (define_insn "aarch64_simd_ashr<mode>"
 620  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 621        (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 622                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
 623  "TARGET_SIMD"
 624  "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
 625   [(set_attr "type" "neon_shift_imm<q>")]
 626 )
 627
 628 (define_insn "aarch64_simd_imm_shl<mode>"
 629  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 630        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 631                    (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm" "Dl")))]
 632  "TARGET_SIMD"
 633   "shl\t%0.<Vtype>, %1.<Vtype>, %2"
 634   [(set_attr "type" "neon_shift_imm<q>")]
 635 )
 636
 637 (define_insn "aarch64_simd_reg_sshl<mode>"
 638  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 639        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 640                    (match_operand:VDQ_I 2 "register_operand" "w")))]
 641  "TARGET_SIMD"
 642  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 643   [(set_attr "type" "neon_shift_reg<q>")]
 644 )
 645
 646 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
 647  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 648        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
 649                     (match_operand:VDQ_I 2 "register_operand" "w")]
 650                    UNSPEC_ASHIFT_UNSIGNED))]
 651  "TARGET_SIMD"
 652  "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 653   [(set_attr "type" "neon_shift_reg<q>")]
 654 )
 655
 656 (define_insn "aarch64_simd_reg_shl<mode>_signed"
 657  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 658        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
 659                     (match_operand:VDQ_I 2 "register_operand" "w")]
 660                    UNSPEC_ASHIFT_SIGNED))]
 661  "TARGET_SIMD"
 662  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 663   [(set_attr "type" "neon_shift_reg<q>")]
 664 )
 665
 666 (define_expand "ashl<mode>3"
 667   [(match_operand:VDQ_I 0 "register_operand" "")
 668    (match_operand:VDQ_I 1 "register_operand" "")
 669    (match_operand:SI  2 "general_operand" "")]
 670  "TARGET_SIMD"
 671 {
 672   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
 673   int shift_amount;
 674
 675   if (CONST_INT_P (operands[2]))
 676     {
 677       shift_amount = INTVAL (operands[2]);
 678       if (shift_amount >= 0 && shift_amount < bit_width)
 679         {
 680           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 681                                                        shift_amount);
 682           emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
 683                                                      operands[1],
 684                                                      tmp));
 685           DONE;
 686         }
 687       else
 688         {
 689           operands[2] = force_reg (SImode, operands[2]);
 690         }
 691     }
 692   else if (MEM_P (operands[2]))
 693     {
 694       operands[2] = force_reg (SImode, operands[2]);
 695     }
 696
 697   if (REG_P (operands[2]))
 698     {
 699       rtx tmp = gen_reg_rtx (<MODE>mode);
 700       emit_insn (gen_aarch64_simd_dup<mode> (tmp,
 701                                              convert_to_mode (<VEL>mode,
 702                                                               operands[2],
 703                                                               0)));
 704       emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
 705                                                   tmp));
 706       DONE;
 707     }
 708   else
 709     FAIL;
 710 }
 711 )
 712
 713 (define_expand "lshr<mode>3"
 714   [(match_operand:VDQ_I 0 "register_operand" "")
 715    (match_operand:VDQ_I 1 "register_operand" "")
 716    (match_operand:SI  2 "general_operand" "")]
 717  "TARGET_SIMD"
 718 {
 719   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
 720   int shift_amount;
 721
 722   if (CONST_INT_P (operands[2]))
 723     {
 724       shift_amount = INTVAL (operands[2]);
 725       if (shift_amount > 0 && shift_amount <= bit_width)
 726         {
 727           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 728                                                        shift_amount);
 729           emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
 730                                                   operands[1],
 731                                                   tmp));
 732           DONE;
 733         }
 734       else
 735         operands[2] = force_reg (SImode, operands[2]);
 736     }
 737   else if (MEM_P (operands[2]))
 738     {
 739       operands[2] = force_reg (SImode, operands[2]);
 740     }
 741
 742   if (REG_P (operands[2]))
 743     {
 744       rtx tmp = gen_reg_rtx (SImode);
 745       rtx tmp1 = gen_reg_rtx (<MODE>mode);
 746       emit_insn (gen_negsi2 (tmp, operands[2]));
 747       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
 748                                              convert_to_mode (<VEL>mode,
 749                                                               tmp, 0)));
 750       emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
 751                                                           operands[1],
 752                                                           tmp1));
 753       DONE;
 754     }
 755   else
 756     FAIL;
 757 }
 758 )
 759
 760 (define_expand "ashr<mode>3"
 761   [(match_operand:VDQ_I 0 "register_operand" "")
 762    (match_operand:VDQ_I 1 "register_operand" "")
 763    (match_operand:SI  2 "general_operand" "")]
 764  "TARGET_SIMD"
 765 {
 766   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
 767   int shift_amount;
 768
 769   if (CONST_INT_P (operands[2]))
 770     {
 771       shift_amount = INTVAL (operands[2]);
 772       if (shift_amount > 0 && shift_amount <= bit_width)
 773         {
 774           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
 775                                                        shift_amount);
 776           emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
 777                                                   operands[1],
 778                                                   tmp));
 779           DONE;
 780         }
 781       else
 782         operands[2] = force_reg (SImode, operands[2]);
 783     }
 784   else if (MEM_P (operands[2]))
 785     {
 786       operands[2] = force_reg (SImode, operands[2]);
 787     }
 788
 789   if (REG_P (operands[2]))
 790     {
 791       rtx tmp = gen_reg_rtx (SImode);
 792       rtx tmp1 = gen_reg_rtx (<MODE>mode);
 793       emit_insn (gen_negsi2 (tmp, operands[2]));
 794       emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
 795                                              convert_to_mode (<VEL>mode,
 796                                                               tmp, 0)));
 797       emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
 798                                                         operands[1],
 799                                                         tmp1));
 800       DONE;
 801     }
 802   else
 803     FAIL;
 804 }
 805 )
 806
 807 (define_expand "vashl<mode>3"
 808  [(match_operand:VDQ_I 0 "register_operand" "")
 809   (match_operand:VDQ_I 1 "register_operand" "")
 810   (match_operand:VDQ_I 2 "register_operand" "")]
 811  "TARGET_SIMD"
 812 {
 813   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
 814                                               operands[2]));
 815   DONE;
 816 })
 817
 818 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
 819 ;; Negating individual lanes most certainly offsets the
 820 ;; gain from vectorization.
 821 (define_expand "vashr<mode>3"
 822  [(match_operand:VDQ_BHSI 0 "register_operand" "")
 823   (match_operand:VDQ_BHSI 1 "register_operand" "")
 824   (match_operand:VDQ_BHSI 2 "register_operand" "")]
 825  "TARGET_SIMD"
 826 {
 827   rtx neg = gen_reg_rtx (<MODE>mode);
 828   emit (gen_neg<mode>2 (neg, operands[2]));
 829   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
 830                                                     neg));
 831   DONE;
 832 })
 833
 834 ;; DI vector shift
 835 (define_expand "aarch64_ashr_simddi"
 836   [(match_operand:DI 0 "register_operand" "=w")
 837    (match_operand:DI 1 "register_operand" "w")
 838    (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
 839   "TARGET_SIMD"
 840   {
 841     /* An arithmetic shift right by 64 fills the result with copies of the sign
 842        bit, just like asr by 63 - however the standard pattern does not handle
 843        a shift by 64.  */
 844     if (INTVAL (operands[2]) == 64)
 845       operands[2] = GEN_INT (63);
 846     emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
 847     DONE;
 848   }
 849 )
 850
 851 (define_expand "vlshr<mode>3"
 852  [(match_operand:VDQ_BHSI 0 "register_operand" "")
 853   (match_operand:VDQ_BHSI 1 "register_operand" "")
 854   (match_operand:VDQ_BHSI 2 "register_operand" "")]
 855  "TARGET_SIMD"
 856 {
 857   rtx neg = gen_reg_rtx (<MODE>mode);
 858   emit (gen_neg<mode>2 (neg, operands[2]));
 859   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
 860                                                       neg));
 861   DONE;
 862 })
 863
 864 (define_expand "aarch64_lshr_simddi"
 865   [(match_operand:DI 0 "register_operand" "=w")
 866    (match_operand:DI 1 "register_operand" "w")
 867    (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
 868   "TARGET_SIMD"
 869   {
 870     if (INTVAL (operands[2]) == 64)
 871       emit_move_insn (operands[0], const0_rtx);
 872     else
 873       emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
 874     DONE;
 875   }
 876 )
 877
 878 (define_expand "vec_set<mode>"
 879   [(match_operand:VDQ_BHSI 0 "register_operand")
 880    (match_operand:<VEL> 1 "register_operand")
 881    (match_operand:SI 2 "immediate_operand")]
 882   "TARGET_SIMD"
 883   {
 884     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
 885     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
 886                                             GEN_INT (elem), operands[0]));
 887     DONE;
 888   }
 889 )
 890
 891 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
 892 (define_insn "vec_shr_<mode>"
 893   [(set (match_operand:VD 0 "register_operand" "=w")
 894         (unspec:VD [(match_operand:VD 1 "register_operand" "w")
 895                     (match_operand:SI 2 "immediate_operand" "i")]
 896                    UNSPEC_VEC_SHR))]
 897   "TARGET_SIMD"
 898   {
 899     if (BYTES_BIG_ENDIAN)
 900       return "shl %d0, %d1, %2";
 901     else
 902       return "ushr %d0, %d1, %2";
 903   }
 904   [(set_attr "type" "neon_shift_imm")]
 905 )
 906
 907 (define_insn "aarch64_simd_vec_setv2di"
 908   [(set (match_operand:V2DI 0 "register_operand" "=w,w")
 909         (vec_merge:V2DI
 910             (vec_duplicate:V2DI
 911                 (match_operand:DI 1 "register_operand" "r,w"))
 912             (match_operand:V2DI 3 "register_operand" "0,0")
 913             (match_operand:SI 2 "immediate_operand" "i,i")))]
 914   "TARGET_SIMD"
 915   {
 916     int elt = ENDIAN_LANE_N (V2DImode, exact_log2 (INTVAL (operands[2])));
 917     operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
 918     switch (which_alternative)
 919       {
 920       case 0:
 921         return "ins\\t%0.d[%p2], %1";
 922       case 1:
 923         return "ins\\t%0.d[%p2], %1.d[0]";
 924       default:
 925         gcc_unreachable ();
 926       }
 927   }
 928   [(set_attr "type" "neon_from_gp, neon_ins_q")]
 929 )
 930
 931 (define_expand "vec_setv2di"
 932   [(match_operand:V2DI 0 "register_operand")
 933    (match_operand:DI 1 "register_operand")
 934    (match_operand:SI 2 "immediate_operand")]
 935   "TARGET_SIMD"
 936   {
 937     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
 938     emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
 939                                           GEN_INT (elem), operands[0]));
 940     DONE;
 941   }
 942 )
 943
 944 (define_insn "aarch64_simd_vec_set<mode>"
 945   [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
 946         (vec_merge:VDQF_F16
 947             (vec_duplicate:VDQF_F16
 948                 (match_operand:<VEL> 1 "register_operand" "w"))
 949             (match_operand:VDQF_F16 3 "register_operand" "0")
 950             (match_operand:SI 2 "immediate_operand" "i")))]
 951   "TARGET_SIMD"
 952   {
 953     int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
 954
 955     operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
 956     return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
 957   }
 958   [(set_attr "type" "neon_ins<q>")]
 959 )
 960
 961 (define_expand "vec_set<mode>"
 962   [(match_operand:VDQF_F16 0 "register_operand" "+w")
 963    (match_operand:<VEL> 1 "register_operand" "w")
 964    (match_operand:SI 2 "immediate_operand" "")]
 965   "TARGET_SIMD"
 966   {
 967     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
 968     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
 969                                           GEN_INT (elem), operands[0]));
 970     DONE;
 971   }
 972 )
 973
 974
 975 (define_insn "aarch64_mla<mode>"
 976  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 977        (plus:VDQ_BHSI (mult:VDQ_BHSI
 978                         (match_operand:VDQ_BHSI 2 "register_operand" "w")
 979                         (match_operand:VDQ_BHSI 3 "register_operand" "w"))
 980                       (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
 981  "TARGET_SIMD"
 982  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
 983   [(set_attr "type" "neon_mla_<Vetype><q>")]
 984 )
 985
 986 (define_insn "*aarch64_mla_elt<mode>"
 987  [(set (match_operand:VDQHS 0 "register_operand" "=w")
 988        (plus:VDQHS
 989          (mult:VDQHS
 990            (vec_duplicate:VDQHS
 991               (vec_select:<VEL>
 992                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
 993                   (parallel [(match_operand:SI 2 "immediate_operand")])))
 994            (match_operand:VDQHS 3 "register_operand" "w"))
 995          (match_operand:VDQHS 4 "register_operand" "0")))]
 996  "TARGET_SIMD"
 997   {
 998     operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
 999     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1000   }
1001   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1002 )
1003
1004 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1005  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1006        (plus:VDQHS
1007          (mult:VDQHS
1008            (vec_duplicate:VDQHS
1009               (vec_select:<VEL>
1010                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1011                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1012            (match_operand:VDQHS 3 "register_operand" "w"))
1013          (match_operand:VDQHS 4 "register_operand" "0")))]
1014  "TARGET_SIMD"
1015   {
1016     operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1017                                           INTVAL (operands[2])));
1018     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1019   }
1020   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1021 )
1022
1023 (define_insn "aarch64_mls<mode>"
1024  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1025        (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1026                    (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1027                               (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1028  "TARGET_SIMD"
1029  "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1030   [(set_attr "type" "neon_mla_<Vetype><q>")]
1031 )
1032
1033 (define_insn "*aarch64_mls_elt<mode>"
1034  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1035        (minus:VDQHS
1036          (match_operand:VDQHS 4 "register_operand" "0")
1037          (mult:VDQHS
1038            (vec_duplicate:VDQHS
1039               (vec_select:<VEL>
1040                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1041                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1042            (match_operand:VDQHS 3 "register_operand" "w"))))]
1043  "TARGET_SIMD"
1044   {
1045     operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1046     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1047   }
1048   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1049 )
1050
1051 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1052  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1053        (minus:VDQHS
1054          (match_operand:VDQHS 4 "register_operand" "0")
1055          (mult:VDQHS
1056            (vec_duplicate:VDQHS
1057               (vec_select:<VEL>
1058                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1059                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1060            (match_operand:VDQHS 3 "register_operand" "w"))))]
1061  "TARGET_SIMD"
1062   {
1063     operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1064                                           INTVAL (operands[2])));
1065     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1066   }
1067   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1068 )
1069
1070 ;; Max/Min operations.
1071 (define_insn "<su><maxmin><mode>3"
1072  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1073        (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1074                     (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1075  "TARGET_SIMD"
1076  "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1077   [(set_attr "type" "neon_minmax<q>")]
1078 )
1079
1080 (define_expand "<su><maxmin>v2di3"
1081  [(set (match_operand:V2DI 0 "register_operand" "")
1082        (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1083                     (match_operand:V2DI 2 "register_operand" "")))]
1084  "TARGET_SIMD"
1085 {
1086   enum rtx_code cmp_operator;
1087   rtx cmp_fmt;
1088
1089   switch (<CODE>)
1090     {
1091     case UMIN:
1092       cmp_operator = LTU;
1093       break;
1094     case SMIN:
1095       cmp_operator = LT;
1096       break;
1097     case UMAX:
1098       cmp_operator = GTU;
1099       break;
1100     case SMAX:
1101       cmp_operator = GT;
1102       break;
1103     default:
1104       gcc_unreachable ();
1105     }
1106
1107   cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1108   emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1109               operands[2], cmp_fmt, operands[1], operands[2]));
1110   DONE;
1111 })
1112
1113 ;; Pairwise Integer Max/Min operations.
1114 (define_insn "aarch64_<maxmin_uns>p<mode>"
1115  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1116        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1117                          (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1118                         MAXMINV))]
1119  "TARGET_SIMD"
1120  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1121   [(set_attr "type" "neon_minmax<q>")]
1122 )
1123
1124 ;; Pairwise FP Max/Min operations.
1125 (define_insn "aarch64_<maxmin_uns>p<mode>"
1126  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1127        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1128                       (match_operand:VHSDF 2 "register_operand" "w")]
1129                       FMAXMINV))]
1130  "TARGET_SIMD"
1131  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1132   [(set_attr "type" "neon_minmax<q>")]
1133 )
1134
1135 ;; vec_concat gives a new vector with the low elements from operand 1, and
1136 ;; the high elements from operand 2.  That is to say, given op1 = { a, b }
1137 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1138 ;; What that means, is that the RTL descriptions of the below patterns
1139 ;; need to change depending on endianness.
1140
1141 ;; Move to the low architectural bits of the register.
1142 ;; On little-endian this is { operand, zeroes }
1143 ;; On big-endian this is { zeroes, operand }
1144
1145 (define_insn "move_lo_quad_internal_<mode>"
1146   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1147         (vec_concat:VQ_NO2E
1148           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1149           (vec_duplicate:<VHALF> (const_int 0))))]
1150   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1151   "@
1152    dup\\t%d0, %1.d[0]
1153    fmov\\t%d0, %1
1154    dup\\t%d0, %1"
1155   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1156    (set_attr "simd" "yes,*,yes")
1157    (set_attr "fp" "*,yes,*")
1158    (set_attr "length" "4")]
1159 )
1160
1161 (define_insn "move_lo_quad_internal_<mode>"
1162   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1163         (vec_concat:VQ_2E
1164           (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1165           (const_int 0)))]
1166   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1167   "@
1168    dup\\t%d0, %1.d[0]
1169    fmov\\t%d0, %1
1170    dup\\t%d0, %1"
1171   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1172    (set_attr "simd" "yes,*,yes")
1173    (set_attr "fp" "*,yes,*")
1174    (set_attr "length" "4")]
1175 )
1176
1177 (define_insn "move_lo_quad_internal_be_<mode>"
1178   [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1179         (vec_concat:VQ_NO2E
1180           (vec_duplicate:<VHALF> (const_int 0))
1181           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1182   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1183   "@
1184    dup\\t%d0, %1.d[0]
1185    fmov\\t%d0, %1
1186    dup\\t%d0, %1"
1187   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1188    (set_attr "simd" "yes,*,yes")
1189    (set_attr "fp" "*,yes,*")
1190    (set_attr "length" "4")]
1191 )
1192
1193 (define_insn "move_lo_quad_internal_be_<mode>"
1194   [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1195         (vec_concat:VQ_2E
1196           (const_int 0)
1197           (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1198   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1199   "@
1200    dup\\t%d0, %1.d[0]
1201    fmov\\t%d0, %1
1202    dup\\t%d0, %1"
1203   [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1204    (set_attr "simd" "yes,*,yes")
1205    (set_attr "fp" "*,yes,*")
1206    (set_attr "length" "4")]
1207 )
1208
1209 (define_expand "move_lo_quad_<mode>"
1210   [(match_operand:VQ 0 "register_operand")
1211    (match_operand:VQ 1 "register_operand")]
1212   "TARGET_SIMD"
1213 {
1214   if (BYTES_BIG_ENDIAN)
1215     emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1216   else
1217     emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1218   DONE;
1219 }
1220 )
1221
1222 ;; Move operand1 to the high architectural bits of the register, keeping
1223 ;; the low architectural bits of operand2.
1224 ;; For little-endian this is { operand2, operand1 }
1225 ;; For big-endian this is { operand1, operand2 }
1226
1227 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1228   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1229         (vec_concat:VQ
1230           (vec_select:<VHALF>
1231                 (match_dup 0)
1232                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1233           (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1234   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1235   "@
1236    ins\\t%0.d[1], %1.d[0]
1237    ins\\t%0.d[1], %1"
1238   [(set_attr "type" "neon_ins")]
1239 )
1240
1241 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1242   [(set (match_operand:VQ 0 "register_operand" "+w,w")
1243         (vec_concat:VQ
1244           (match_operand:<VHALF> 1 "register_operand" "w,r")
1245           (vec_select:<VHALF>
1246                 (match_dup 0)
1247                 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1248   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1249   "@
1250    ins\\t%0.d[1], %1.d[0]
1251    ins\\t%0.d[1], %1"
1252   [(set_attr "type" "neon_ins")]
1253 )
1254
1255 (define_expand "move_hi_quad_<mode>"
1256  [(match_operand:VQ 0 "register_operand" "")
1257   (match_operand:<VHALF> 1 "register_operand" "")]
1258  "TARGET_SIMD"
1259 {
1260   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1261   if (BYTES_BIG_ENDIAN)
1262     emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1263                     operands[1], p));
1264   else
1265     emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1266                     operands[1], p));
1267   DONE;
1268 })
1269
1270 ;; Narrowing operations.
1271
1272 ;; For doubles.
1273 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1274  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1275        (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1276  "TARGET_SIMD"
1277  "xtn\\t%0.<Vntype>, %1.<Vtype>"
1278   [(set_attr "type" "neon_shift_imm_narrow_q")]
1279 )
1280
1281 (define_expand "vec_pack_trunc_<mode>"
1282  [(match_operand:<VNARROWD> 0 "register_operand" "")
1283   (match_operand:VDN 1 "register_operand" "")
1284   (match_operand:VDN 2 "register_operand" "")]
1285  "TARGET_SIMD"
1286 {
1287   rtx tempreg = gen_reg_rtx (<VDBL>mode);
1288   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1289   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1290
1291   emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1292   emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1293   emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1294   DONE;
1295 })
1296
1297 ;; For quads.
1298
1299 (define_insn "vec_pack_trunc_<mode>"
1300  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1301        (vec_concat:<VNARROWQ2>
1302          (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1303          (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1304  "TARGET_SIMD"
1305  {
1306    if (BYTES_BIG_ENDIAN)
1307      return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1308    else
1309      return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1310  }
1311   [(set_attr "type" "multiple")
1312    (set_attr "length" "8")]
1313 )
1314
1315 ;; Widening operations.
1316
1317 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1318   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1319         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1320                                (match_operand:VQW 1 "register_operand" "w")
1321                                (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1322                             )))]
1323   "TARGET_SIMD"
1324   "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1325   [(set_attr "type" "neon_shift_imm_long")]
1326 )
1327
1328 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1329   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1330         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1331                                (match_operand:VQW 1 "register_operand" "w")
1332                                (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1333                             )))]
1334   "TARGET_SIMD"
1335   "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1336   [(set_attr "type" "neon_shift_imm_long")]
1337 )
1338
1339 (define_expand "vec_unpack<su>_hi_<mode>"
1340   [(match_operand:<VWIDE> 0 "register_operand" "")
1341    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1342   "TARGET_SIMD"
1343   {
1344     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1345     emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1346                                                           operands[1], p));
1347     DONE;
1348   }
1349 )
1350
1351 (define_expand "vec_unpack<su>_lo_<mode>"
1352   [(match_operand:<VWIDE> 0 "register_operand" "")
1353    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1354   "TARGET_SIMD"
1355   {
1356     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1357     emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1358                                                           operands[1], p));
1359     DONE;
1360   }
1361 )
1362
1363 ;; Widening arithmetic.
1364
1365 (define_insn "*aarch64_<su>mlal_lo<mode>"
1366   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1367         (plus:<VWIDE>
1368           (mult:<VWIDE>
1369               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1370                  (match_operand:VQW 2 "register_operand" "w")
1371                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1372               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1373                  (match_operand:VQW 4 "register_operand" "w")
1374                  (match_dup 3))))
1375           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1376   "TARGET_SIMD"
1377   "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1378   [(set_attr "type" "neon_mla_<Vetype>_long")]
1379 )
1380
1381 (define_insn "*aarch64_<su>mlal_hi<mode>"
1382   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1383         (plus:<VWIDE>
1384           (mult:<VWIDE>
1385               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1386                  (match_operand:VQW 2 "register_operand" "w")
1387                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1388               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1389                  (match_operand:VQW 4 "register_operand" "w")
1390                  (match_dup 3))))
1391           (match_operand:<VWIDE> 1 "register_operand" "0")))]
1392   "TARGET_SIMD"
1393   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1394   [(set_attr "type" "neon_mla_<Vetype>_long")]
1395 )
1396
1397 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1398   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1399         (minus:<VWIDE>
1400           (match_operand:<VWIDE> 1 "register_operand" "0")
1401           (mult:<VWIDE>
1402               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1403                  (match_operand:VQW 2 "register_operand" "w")
1404                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1405               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1406                  (match_operand:VQW 4 "register_operand" "w")
1407                  (match_dup 3))))))]
1408   "TARGET_SIMD"
1409   "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1410   [(set_attr "type" "neon_mla_<Vetype>_long")]
1411 )
1412
1413 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1414   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1415         (minus:<VWIDE>
1416           (match_operand:<VWIDE> 1 "register_operand" "0")
1417           (mult:<VWIDE>
1418               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1419                  (match_operand:VQW 2 "register_operand" "w")
1420                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1421               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1422                  (match_operand:VQW 4 "register_operand" "w")
1423                  (match_dup 3))))))]
1424   "TARGET_SIMD"
1425   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1426   [(set_attr "type" "neon_mla_<Vetype>_long")]
1427 )
1428
1429 (define_insn "*aarch64_<su>mlal<mode>"
1430   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1431         (plus:<VWIDE>
1432           (mult:<VWIDE>
1433             (ANY_EXTEND:<VWIDE>
1434               (match_operand:VD_BHSI 1 "register_operand" "w"))
1435             (ANY_EXTEND:<VWIDE>
1436               (match_operand:VD_BHSI 2 "register_operand" "w")))
1437           (match_operand:<VWIDE> 3 "register_operand" "0")))]
1438   "TARGET_SIMD"
1439   "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1440   [(set_attr "type" "neon_mla_<Vetype>_long")]
1441 )
1442
1443 (define_insn "*aarch64_<su>mlsl<mode>"
1444   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1445         (minus:<VWIDE>
1446           (match_operand:<VWIDE> 1 "register_operand" "0")
1447           (mult:<VWIDE>
1448             (ANY_EXTEND:<VWIDE>
1449               (match_operand:VD_BHSI 2 "register_operand" "w"))
1450             (ANY_EXTEND:<VWIDE>
1451               (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1452   "TARGET_SIMD"
1453   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1454   [(set_attr "type" "neon_mla_<Vetype>_long")]
1455 )
1456
1457 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1458  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1459        (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1460                            (match_operand:VQW 1 "register_operand" "w")
1461                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1462                      (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1463                            (match_operand:VQW 2 "register_operand" "w")
1464                            (match_dup 3)))))]
1465   "TARGET_SIMD"
1466   "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1467   [(set_attr "type" "neon_mul_<Vetype>_long")]
1468 )
1469
1470 (define_expand "vec_widen_<su>mult_lo_<mode>"
1471   [(match_operand:<VWIDE> 0 "register_operand" "")
1472    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1473    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1474  "TARGET_SIMD"
1475  {
1476    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1477    emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1478                                                        operands[1],
1479                                                        operands[2], p));
1480    DONE;
1481  }
1482 )
1483
1484 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1485  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1486       (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1487                             (match_operand:VQW 1 "register_operand" "w")
1488                             (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1489                     (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1490                             (match_operand:VQW 2 "register_operand" "w")
1491                             (match_dup 3)))))]
1492   "TARGET_SIMD"
1493   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1494   [(set_attr "type" "neon_mul_<Vetype>_long")]
1495 )
1496
1497 (define_expand "vec_widen_<su>mult_hi_<mode>"
1498   [(match_operand:<VWIDE> 0 "register_operand" "")
1499    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1500    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1501  "TARGET_SIMD"
1502  {
1503    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1504    emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1505                                                        operands[1],
1506                                                        operands[2], p));
1507    DONE;
1508
1509  }
1510 )
1511
1512 ;; FP vector operations.
1513 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1514 ;; double-precision (64-bit) floating-point data types and arithmetic as
1515 ;; defined by the IEEE 754-2008 standard.  This makes them vectorizable
1516 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1517 ;;
1518 ;; Floating-point operations can raise an exception.  Vectorizing such
1519 ;; operations are safe because of reasons explained below.
1520 ;;
1521 ;; ARMv8 permits an extension to enable trapped floating-point
1522 ;; exception handling, however this is an optional feature.  In the
1523 ;; event of a floating-point exception being raised by vectorised
1524 ;; code then:
1525 ;; 1.  If trapped floating-point exceptions are available, then a trap
1526 ;;     will be taken when any lane raises an enabled exception.  A trap
1527 ;;     handler may determine which lane raised the exception.
1528 ;; 2.  Alternatively a sticky exception flag is set in the
1529 ;;     floating-point status register (FPSR).  Software may explicitly
1530 ;;     test the exception flags, in which case the tests will either
1531 ;;     prevent vectorisation, allowing precise identification of the
1532 ;;     failing operation, or if tested outside of vectorisable regions
1533 ;;     then the specific operation and lane are not of interest.
1534
1535 ;; FP arithmetic operations.
1536
1537 (define_insn "add<mode>3"
1538  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1539        (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1540                    (match_operand:VHSDF 2 "register_operand" "w")))]
1541  "TARGET_SIMD"
1542  "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1543   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1544 )
1545
1546 (define_insn "sub<mode>3"
1547  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1548        (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1549                     (match_operand:VHSDF 2 "register_operand" "w")))]
1550  "TARGET_SIMD"
1551  "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1552   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1553 )
1554
1555 (define_insn "mul<mode>3"
1556  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1557        (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1558                    (match_operand:VHSDF 2 "register_operand" "w")))]
1559  "TARGET_SIMD"
1560  "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1561   [(set_attr "type" "neon_fp_mul_<stype><q>")]
1562 )
1563
1564 (define_expand "div<mode>3"
1565  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1566        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1567                   (match_operand:VHSDF 2 "register_operand" "w")))]
1568  "TARGET_SIMD"
1569 {
1570   if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1571     DONE;
1572
1573   operands[1] = force_reg (<MODE>mode, operands[1]);
1574 })
1575
1576 (define_insn "*div<mode>3"
1577  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1578        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1579                  (match_operand:VHSDF 2 "register_operand" "w")))]
1580  "TARGET_SIMD"
1581  "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1582   [(set_attr "type" "neon_fp_div_<stype><q>")]
1583 )
1584
1585 (define_insn "neg<mode>2"
1586  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1587        (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1588  "TARGET_SIMD"
1589  "fneg\\t%0.<Vtype>, %1.<Vtype>"
1590   [(set_attr "type" "neon_fp_neg_<stype><q>")]
1591 )
1592
1593 (define_insn "abs<mode>2"
1594  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1595        (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1596  "TARGET_SIMD"
1597  "fabs\\t%0.<Vtype>, %1.<Vtype>"
1598   [(set_attr "type" "neon_fp_abs_<stype><q>")]
1599 )
1600
1601 (define_insn "fma<mode>4"
1602   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1603        (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1604                   (match_operand:VHSDF 2 "register_operand" "w")
1605                   (match_operand:VHSDF 3 "register_operand" "0")))]
1606   "TARGET_SIMD"
1607  "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1608   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1609 )
1610
1611 (define_insn "*aarch64_fma4_elt<mode>"
1612   [(set (match_operand:VDQF 0 "register_operand" "=w")
1613     (fma:VDQF
1614       (vec_duplicate:VDQF
1615         (vec_select:<VEL>
1616           (match_operand:VDQF 1 "register_operand" "<h_con>")
1617           (parallel [(match_operand:SI 2 "immediate_operand")])))
1618       (match_operand:VDQF 3 "register_operand" "w")
1619       (match_operand:VDQF 4 "register_operand" "0")))]
1620   "TARGET_SIMD"
1621   {
1622     operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1623     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1624   }
1625   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1626 )
1627
1628 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1629   [(set (match_operand:VDQSF 0 "register_operand" "=w")
1630     (fma:VDQSF
1631       (vec_duplicate:VDQSF
1632         (vec_select:<VEL>
1633           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1634           (parallel [(match_operand:SI 2 "immediate_operand")])))
1635       (match_operand:VDQSF 3 "register_operand" "w")
1636       (match_operand:VDQSF 4 "register_operand" "0")))]
1637   "TARGET_SIMD"
1638   {
1639     operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1640                                           INTVAL (operands[2])));
1641     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1642   }
1643   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1644 )
1645
1646 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1647   [(set (match_operand:VMUL 0 "register_operand" "=w")
1648     (fma:VMUL
1649       (vec_duplicate:VMUL
1650           (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1651       (match_operand:VMUL 2 "register_operand" "w")
1652       (match_operand:VMUL 3 "register_operand" "0")))]
1653   "TARGET_SIMD"
1654   "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1655   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1656 )
1657
1658 (define_insn "*aarch64_fma4_elt_to_64v2df"
1659   [(set (match_operand:DF 0 "register_operand" "=w")
1660     (fma:DF
1661         (vec_select:DF
1662           (match_operand:V2DF 1 "register_operand" "w")
1663           (parallel [(match_operand:SI 2 "immediate_operand")]))
1664       (match_operand:DF 3 "register_operand" "w")
1665       (match_operand:DF 4 "register_operand" "0")))]
1666   "TARGET_SIMD"
1667   {
1668     operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1669     return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1670   }
1671   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1672 )
1673
1674 (define_insn "fnma<mode>4"
1675   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1676         (fma:VHSDF
1677           (match_operand:VHSDF 1 "register_operand" "w")
1678           (neg:VHSDF
1679             (match_operand:VHSDF 2 "register_operand" "w"))
1680           (match_operand:VHSDF 3 "register_operand" "0")))]
1681   "TARGET_SIMD"
1682   "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1683   [(set_attr "type" "neon_fp_mla_<stype><q>")]
1684 )
1685
1686 (define_insn "*aarch64_fnma4_elt<mode>"
1687   [(set (match_operand:VDQF 0 "register_operand" "=w")
1688     (fma:VDQF
1689       (neg:VDQF
1690         (match_operand:VDQF 3 "register_operand" "w"))
1691       (vec_duplicate:VDQF
1692         (vec_select:<VEL>
1693           (match_operand:VDQF 1 "register_operand" "<h_con>")
1694           (parallel [(match_operand:SI 2 "immediate_operand")])))
1695       (match_operand:VDQF 4 "register_operand" "0")))]
1696   "TARGET_SIMD"
1697   {
1698     operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1699     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1700   }
1701   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1702 )
1703
1704 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1705   [(set (match_operand:VDQSF 0 "register_operand" "=w")
1706     (fma:VDQSF
1707       (neg:VDQSF
1708         (match_operand:VDQSF 3 "register_operand" "w"))
1709       (vec_duplicate:VDQSF
1710         (vec_select:<VEL>
1711           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1712           (parallel [(match_operand:SI 2 "immediate_operand")])))
1713       (match_operand:VDQSF 4 "register_operand" "0")))]
1714   "TARGET_SIMD"
1715   {
1716     operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1717                                           INTVAL (operands[2])));
1718     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1719   }
1720   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1721 )
1722
1723 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1724   [(set (match_operand:VMUL 0 "register_operand" "=w")
1725     (fma:VMUL
1726       (neg:VMUL
1727         (match_operand:VMUL 2 "register_operand" "w"))
1728       (vec_duplicate:VMUL
1729         (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1730       (match_operand:VMUL 3 "register_operand" "0")))]
1731   "TARGET_SIMD"
1732   "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1733   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1734 )
1735
1736 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1737   [(set (match_operand:DF 0 "register_operand" "=w")
1738     (fma:DF
1739       (vec_select:DF
1740         (match_operand:V2DF 1 "register_operand" "w")
1741         (parallel [(match_operand:SI 2 "immediate_operand")]))
1742       (neg:DF
1743         (match_operand:DF 3 "register_operand" "w"))
1744       (match_operand:DF 4 "register_operand" "0")))]
1745   "TARGET_SIMD"
1746   {
1747     operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1748     return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1749   }
1750   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1751 )
1752
1753 ;; Vector versions of the floating-point frint patterns.
1754 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1755 (define_insn "<frint_pattern><mode>2"
1756   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1757         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1758                        FRINT))]
1759   "TARGET_SIMD"
1760   "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1761   [(set_attr "type" "neon_fp_round_<stype><q>")]
1762 )
1763
1764 ;; Vector versions of the fcvt standard patterns.
1765 ;; Expands to lbtrunc, lround, lceil, lfloor
1766 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1767   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1768         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1769                                [(match_operand:VHSDF 1 "register_operand" "w")]
1770                                FCVT)))]
1771   "TARGET_SIMD"
1772   "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1773   [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1774 )
1775
1776 ;; HF Scalar variants of related SIMD instructions.
1777 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1778   [(set (match_operand:HI 0 "register_operand" "=w")
1779         (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1780                       FCVT)))]
1781   "TARGET_SIMD_F16INST"
1782   "fcvt<frint_suffix><su>\t%h0, %h1"
1783   [(set_attr "type" "neon_fp_to_int_s")]
1784 )
1785
1786 (define_insn "<optab>_trunchfhi2"
1787   [(set (match_operand:HI 0 "register_operand" "=w")
1788         (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1789   "TARGET_SIMD_F16INST"
1790   "fcvtz<su>\t%h0, %h1"
1791   [(set_attr "type" "neon_fp_to_int_s")]
1792 )
1793
1794 (define_insn "<optab>hihf2"
1795   [(set (match_operand:HF 0 "register_operand" "=w")
1796         (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1797   "TARGET_SIMD_F16INST"
1798   "<su_optab>cvtf\t%h0, %h1"
1799   [(set_attr "type" "neon_int_to_fp_s")]
1800 )
1801
1802 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1803   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1804         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1805                                [(mult:VDQF
1806          (match_operand:VDQF 1 "register_operand" "w")
1807          (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1808                                UNSPEC_FRINTZ)))]
1809   "TARGET_SIMD
1810    && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1811                 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1812   {
1813     int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1814     char buf[64];
1815     snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1816     output_asm_insn (buf, operands);
1817     return "";
1818   }
1819   [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1820 )
1821
1822 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
1823   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1824         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1825                                [(match_operand:VHSDF 1 "register_operand")]
1826                                 UNSPEC_FRINTZ)))]
1827   "TARGET_SIMD"
1828   {})
1829
1830 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
1831   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1832         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1833                                [(match_operand:VHSDF 1 "register_operand")]
1834                                 UNSPEC_FRINTZ)))]
1835   "TARGET_SIMD"
1836   {})
1837
1838 (define_expand "ftrunc<VHSDF:mode>2"
1839   [(set (match_operand:VHSDF 0 "register_operand")
1840         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
1841                        UNSPEC_FRINTZ))]
1842   "TARGET_SIMD"
1843   {})
1844
1845 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
1846   [(set (match_operand:VHSDF 0 "register_operand" "=w")
1847         (FLOATUORS:VHSDF
1848           (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
1849   "TARGET_SIMD"
1850   "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
1851   [(set_attr "type" "neon_int_to_fp_<stype><q>")]
1852 )
1853
1854 ;; Conversions between vectors of floats and doubles.
1855 ;; Contains a mix of patterns to match standard pattern names
1856 ;; and those for intrinsics.
1857
1858 ;; Float widening operations.
1859
1860 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
1861   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1862         (float_extend:<VWIDE> (vec_select:<VHALF>
1863                                (match_operand:VQ_HSF 1 "register_operand" "w")
1864                                (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
1865                             )))]
1866   "TARGET_SIMD"
1867   "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
1868   [(set_attr "type" "neon_fp_cvt_widen_s")]
1869 )
1870
1871 ;; Convert between fixed-point and floating-point (vector modes)
1872
1873 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
1874   [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
1875         (unspec:<VHSDF:FCVT_TARGET>
1876           [(match_operand:VHSDF 1 "register_operand" "w")
1877            (match_operand:SI 2 "immediate_operand" "i")]
1878          FCVT_F2FIXED))]
1879   "TARGET_SIMD"
1880   "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
1881   [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
1882 )
1883
1884 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
1885   [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
1886         (unspec:<VDQ_HSDI:FCVT_TARGET>
1887           [(match_operand:VDQ_HSDI 1 "register_operand" "w")
1888            (match_operand:SI 2 "immediate_operand" "i")]
1889          FCVT_FIXED2F))]
1890   "TARGET_SIMD"
1891   "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
1892   [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
1893 )
1894
1895 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
1896 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
1897 ;; the meaning of HI and LO changes depending on the target endianness.
1898 ;; While elsewhere we map the higher numbered elements of a vector to
1899 ;; the lower architectural lanes of the vector, for these patterns we want
1900 ;; to always treat "hi" as referring to the higher architectural lanes.
1901 ;; Consequently, while the patterns below look inconsistent with our
1902 ;; other big-endian patterns their behavior is as required.
1903
1904 (define_expand "vec_unpacks_lo_<mode>"
1905   [(match_operand:<VWIDE> 0 "register_operand" "")
1906    (match_operand:VQ_HSF 1 "register_operand" "")]
1907   "TARGET_SIMD"
1908   {
1909     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1910     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
1911                                                        operands[1], p));
1912     DONE;
1913   }
1914 )
1915
1916 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
1917   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1918         (float_extend:<VWIDE> (vec_select:<VHALF>
1919                                (match_operand:VQ_HSF 1 "register_operand" "w")
1920                                (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
1921                             )))]
1922   "TARGET_SIMD"
1923   "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
1924   [(set_attr "type" "neon_fp_cvt_widen_s")]
1925 )
1926
1927 (define_expand "vec_unpacks_hi_<mode>"
1928   [(match_operand:<VWIDE> 0 "register_operand" "")
1929    (match_operand:VQ_HSF 1 "register_operand" "")]
1930   "TARGET_SIMD"
1931   {
1932     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1933     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
1934                                                        operands[1], p));
1935     DONE;
1936   }
1937 )
1938 (define_insn "aarch64_float_extend_lo_<Vwide>"
1939   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1940         (float_extend:<VWIDE>
1941           (match_operand:VDF 1 "register_operand" "w")))]
1942   "TARGET_SIMD"
1943   "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
1944   [(set_attr "type" "neon_fp_cvt_widen_s")]
1945 )
1946
1947 ;; Float narrowing operations.
1948
1949 (define_insn "aarch64_float_truncate_lo_<mode>"
1950   [(set (match_operand:VDF 0 "register_operand" "=w")
1951       (float_truncate:VDF
1952         (match_operand:<VWIDE> 1 "register_operand" "w")))]
1953   "TARGET_SIMD"
1954   "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
1955   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1956 )
1957
1958 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
1959   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
1960     (vec_concat:<VDBL>
1961       (match_operand:VDF 1 "register_operand" "0")
1962       (float_truncate:VDF
1963         (match_operand:<VWIDE> 2 "register_operand" "w"))))]
1964   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1965   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
1966   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1967 )
1968
1969 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
1970   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
1971     (vec_concat:<VDBL>
1972       (float_truncate:VDF
1973         (match_operand:<VWIDE> 2 "register_operand" "w"))
1974       (match_operand:VDF 1 "register_operand" "0")))]
1975   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1976   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
1977   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1978 )
1979
1980 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
1981   [(match_operand:<VDBL> 0 "register_operand" "=w")
1982    (match_operand:VDF 1 "register_operand" "0")
1983    (match_operand:<VWIDE> 2 "register_operand" "w")]
1984   "TARGET_SIMD"
1985 {
1986   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
1987                              ? gen_aarch64_float_truncate_hi_<Vdbl>_be
1988                              : gen_aarch64_float_truncate_hi_<Vdbl>_le;
1989   emit_insn (gen (operands[0], operands[1], operands[2]));
1990   DONE;
1991 }
1992 )
1993
1994 (define_expand "vec_pack_trunc_v2df"
1995   [(set (match_operand:V4SF 0 "register_operand")
1996       (vec_concat:V4SF
1997         (float_truncate:V2SF
1998             (match_operand:V2DF 1 "register_operand"))
1999         (float_truncate:V2SF
2000             (match_operand:V2DF 2 "register_operand"))
2001           ))]
2002   "TARGET_SIMD"
2003   {
2004     rtx tmp = gen_reg_rtx (V2SFmode);
2005     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2006     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2007
2008     emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2009     emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2010                                                    tmp, operands[hi]));
2011     DONE;
2012   }
2013 )
2014
2015 (define_expand "vec_pack_trunc_df"
2016   [(set (match_operand:V2SF 0 "register_operand")
2017       (vec_concat:V2SF
2018         (float_truncate:SF
2019             (match_operand:DF 1 "register_operand"))
2020         (float_truncate:SF
2021             (match_operand:DF 2 "register_operand"))
2022           ))]
2023   "TARGET_SIMD"
2024   {
2025     rtx tmp = gen_reg_rtx (V2SFmode);
2026     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2027     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2028
2029     emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2030     emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2031     emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2032     DONE;
2033   }
2034 )
2035
2036 ;; FP Max/Min
2037 ;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
2038 ;; expression like:
2039 ;;      a = (b < c) ? b : c;
2040 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
2041 ;; either explicitly or indirectly via -ffast-math.
2042 ;;
2043 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2044 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2045 ;; operand will be returned when both operands are zero (i.e. they may not
2046 ;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
2047 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2048 ;; NaNs.
2049
2050 (define_insn "<su><maxmin><mode>3"
2051   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2052         (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2053                        (match_operand:VHSDF 2 "register_operand" "w")))]
2054   "TARGET_SIMD"
2055   "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2056   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2057 )
2058
2059 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2060 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2061 ;; which implement the IEEE fmax ()/fmin () functions.
2062 (define_insn "<maxmin_uns><mode>3"
2063   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2064        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2065                       (match_operand:VHSDF 2 "register_operand" "w")]
2066                       FMAXMIN_UNS))]
2067   "TARGET_SIMD"
2068   "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2069   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2070 )
2071
2072 ;; 'across lanes' add.
2073
2074 (define_expand "reduc_plus_scal_<mode>"
2075   [(match_operand:<VEL> 0 "register_operand" "=w")
2076    (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2077                UNSPEC_ADDV)]
2078   "TARGET_SIMD"
2079   {
2080     rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2081     rtx scratch = gen_reg_rtx (<MODE>mode);
2082     emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2083     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2084     DONE;
2085   }
2086 )
2087
2088 (define_insn "aarch64_faddp<mode>"
2089  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2090        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2091                       (match_operand:VHSDF 2 "register_operand" "w")]
2092         UNSPEC_FADDV))]
2093  "TARGET_SIMD"
2094  "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2095   [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2096 )
2097
2098 (define_insn "aarch64_reduc_plus_internal<mode>"
2099  [(set (match_operand:VDQV 0 "register_operand" "=w")
2100        (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2101                     UNSPEC_ADDV))]
2102  "TARGET_SIMD"
2103  "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2104   [(set_attr "type" "neon_reduc_add<q>")]
2105 )
2106
2107 (define_insn "aarch64_reduc_plus_internalv2si"
2108  [(set (match_operand:V2SI 0 "register_operand" "=w")
2109        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2110                     UNSPEC_ADDV))]
2111  "TARGET_SIMD"
2112  "addp\\t%0.2s, %1.2s, %1.2s"
2113   [(set_attr "type" "neon_reduc_add")]
2114 )
2115
2116 (define_insn "reduc_plus_scal_<mode>"
2117  [(set (match_operand:<VEL> 0 "register_operand" "=w")
2118        (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2119                    UNSPEC_FADDV))]
2120  "TARGET_SIMD"
2121  "faddp\\t%<Vetype>0, %1.<Vtype>"
2122   [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2123 )
2124
2125 (define_expand "reduc_plus_scal_v4sf"
2126  [(set (match_operand:SF 0 "register_operand")
2127        (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2128                     UNSPEC_FADDV))]
2129  "TARGET_SIMD"
2130 {
2131   rtx elt = GEN_INT (ENDIAN_LANE_N (V4SFmode, 0));
2132   rtx scratch = gen_reg_rtx (V4SFmode);
2133   emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2134   emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2135   emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2136   DONE;
2137 })
2138
2139 (define_insn "clrsb<mode>2"
2140   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2141         (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2142   "TARGET_SIMD"
2143   "cls\\t%0.<Vtype>, %1.<Vtype>"
2144   [(set_attr "type" "neon_cls<q>")]
2145 )
2146
2147 (define_insn "clz<mode>2"
2148  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2149        (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2150  "TARGET_SIMD"
2151  "clz\\t%0.<Vtype>, %1.<Vtype>"
2152   [(set_attr "type" "neon_cls<q>")]
2153 )
2154
2155 (define_insn "popcount<mode>2"
2156   [(set (match_operand:VB 0 "register_operand" "=w")
2157         (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2158   "TARGET_SIMD"
2159   "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2160   [(set_attr "type" "neon_cnt<q>")]
2161 )
2162
2163 ;; 'across lanes' max and min ops.
2164
2165 ;; Template for outputting a scalar, so we can create __builtins which can be
2166 ;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code.  (This is FP smax/smin).
2167 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2168   [(match_operand:<VEL> 0 "register_operand")
2169    (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2170                   FMAXMINV)]
2171   "TARGET_SIMD"
2172   {
2173     rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2174     rtx scratch = gen_reg_rtx (<MODE>mode);
2175     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2176                                                               operands[1]));
2177     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2178     DONE;
2179   }
2180 )
2181
2182 ;; Likewise for integer cases, signed and unsigned.
2183 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2184   [(match_operand:<VEL> 0 "register_operand")
2185    (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2186                     MAXMINV)]
2187   "TARGET_SIMD"
2188   {
2189     rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2190     rtx scratch = gen_reg_rtx (<MODE>mode);
2191     emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2192                                                               operands[1]));
2193     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2194     DONE;
2195   }
2196 )
2197
2198 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2199  [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2200        (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2201                     MAXMINV))]
2202  "TARGET_SIMD"
2203  "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2204   [(set_attr "type" "neon_reduc_minmax<q>")]
2205 )
2206
2207 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2208  [(set (match_operand:V2SI 0 "register_operand" "=w")
2209        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2210                     MAXMINV))]
2211  "TARGET_SIMD"
2212  "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2213   [(set_attr "type" "neon_reduc_minmax")]
2214 )
2215
2216 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2217  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2218        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2219                       FMAXMINV))]
2220  "TARGET_SIMD"
2221  "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2222   [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2223 )
2224
2225 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2226 ;; allocation.
2227 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2228 ;; to select.
2229 ;;
2230 ;; Thus our BSL is of the form:
2231 ;;   op0 = bsl (mask, op2, op3)
2232 ;; We can use any of:
2233 ;;
2234 ;;   if (op0 = mask)
2235 ;;     bsl mask, op1, op2
2236 ;;   if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2237 ;;     bit op0, op2, mask
2238 ;;   if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2239 ;;     bif op0, op1, mask
2240 ;;
2241 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2242 ;; Some forms of straight-line code may generate the equivalent form
2243 ;; in *aarch64_simd_bsl<mode>_alt.
2244
2245 (define_insn "aarch64_simd_bsl<mode>_internal"
2246   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2247         (xor:VSDQ_I_DI
2248            (and:VSDQ_I_DI
2249              (xor:VSDQ_I_DI
2250                (match_operand:<V_cmp_result> 3 "register_operand" "w,0,w")
2251                (match_operand:VSDQ_I_DI 2 "register_operand" "w,w,0"))
2252              (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2253           (match_dup:<V_cmp_result> 3)
2254         ))]
2255   "TARGET_SIMD"
2256   "@
2257   bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2258   bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2259   bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2260   [(set_attr "type" "neon_bsl<q>")]
2261 )
2262
2263 ;; We need this form in addition to the above pattern to match the case
2264 ;; when combine tries merging three insns such that the second operand of
2265 ;; the outer XOR matches the second operand of the inner XOR rather than
2266 ;; the first.  The two are equivalent but since recog doesn't try all
2267 ;; permutations of commutative operations, we have to have a separate pattern.
2268
2269 (define_insn "*aarch64_simd_bsl<mode>_alt"
2270   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2271         (xor:VSDQ_I_DI
2272            (and:VSDQ_I_DI
2273              (xor:VSDQ_I_DI
2274                (match_operand:VSDQ_I_DI 3 "register_operand" "w,w,0")
2275                (match_operand:VSDQ_I_DI 2 "register_operand" "w,0,w"))
2276               (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2277           (match_dup:VSDQ_I_DI 2)))]
2278   "TARGET_SIMD"
2279   "@
2280   bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2281   bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2282   bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2283   [(set_attr "type" "neon_bsl<q>")]
2284 )
2285
2286 (define_expand "aarch64_simd_bsl<mode>"
2287   [(match_operand:VALLDIF 0 "register_operand")
2288    (match_operand:<V_cmp_result> 1 "register_operand")
2289    (match_operand:VALLDIF 2 "register_operand")
2290    (match_operand:VALLDIF 3 "register_operand")]
2291  "TARGET_SIMD"
2292 {
2293   /* We can't alias operands together if they have different modes.  */
2294   rtx tmp = operands[0];
2295   if (FLOAT_MODE_P (<MODE>mode))
2296     {
2297       operands[2] = gen_lowpart (<V_cmp_result>mode, operands[2]);
2298       operands[3] = gen_lowpart (<V_cmp_result>mode, operands[3]);
2299       tmp = gen_reg_rtx (<V_cmp_result>mode);
2300     }
2301   operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]);
2302   emit_insn (gen_aarch64_simd_bsl<v_cmp_result>_internal (tmp,
2303                                                           operands[1],
2304                                                           operands[2],
2305                                                           operands[3]));
2306   if (tmp != operands[0])
2307     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2308
2309   DONE;
2310 })
2311
2312 (define_expand "vcond_mask_<mode><v_cmp_result>"
2313   [(match_operand:VALLDI 0 "register_operand")
2314    (match_operand:VALLDI 1 "nonmemory_operand")
2315    (match_operand:VALLDI 2 "nonmemory_operand")
2316    (match_operand:<V_cmp_result> 3 "register_operand")]
2317   "TARGET_SIMD"
2318 {
2319   /* If we have (a = (P) ? -1 : 0);
2320      Then we can simply move the generated mask (result must be int).  */
2321   if (operands[1] == CONSTM1_RTX (<MODE>mode)
2322       && operands[2] == CONST0_RTX (<MODE>mode))
2323     emit_move_insn (operands[0], operands[3]);
2324   /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask.  */
2325   else if (operands[1] == CONST0_RTX (<MODE>mode)
2326            && operands[2] == CONSTM1_RTX (<MODE>mode))
2327     emit_insn (gen_one_cmpl<v_cmp_result>2 (operands[0], operands[3]));
2328   else
2329     {
2330       if (!REG_P (operands[1]))
2331         operands[1] = force_reg (<MODE>mode, operands[1]);
2332       if (!REG_P (operands[2]))
2333         operands[2] = force_reg (<MODE>mode, operands[2]);
2334       emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2335                                              operands[1], operands[2]));
2336     }
2337
2338   DONE;
2339 })
2340
2341 ;; Patterns comparing two vectors to produce a mask.
2342
2343 (define_expand "vec_cmp<mode><mode>"
2344   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2345           (match_operator 1 "comparison_operator"
2346             [(match_operand:VSDQ_I_DI 2 "register_operand")
2347              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2348   "TARGET_SIMD"
2349 {
2350   rtx mask = operands[0];
2351   enum rtx_code code = GET_CODE (operands[1]);
2352
2353   switch (code)
2354     {
2355     case NE:
2356     case LE:
2357     case LT:
2358     case GE:
2359     case GT:
2360     case EQ:
2361       if (operands[3] == CONST0_RTX (<MODE>mode))
2362         break;
2363
2364       /* Fall through.  */
2365     default:
2366       if (!REG_P (operands[3]))
2367         operands[3] = force_reg (<MODE>mode, operands[3]);
2368
2369       break;
2370     }
2371
2372   switch (code)
2373     {
2374     case LT:
2375       emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2376       break;
2377
2378     case GE:
2379       emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2380       break;
2381
2382     case LE:
2383       emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2384       break;
2385
2386     case GT:
2387       emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2388       break;
2389
2390     case LTU:
2391       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2392       break;
2393
2394     case GEU:
2395       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2396       break;
2397
2398     case LEU:
2399       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2400       break;
2401
2402     case GTU:
2403       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2404       break;
2405
2406     case NE:
2407       /* Handle NE as !EQ.  */
2408       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2409       emit_insn (gen_one_cmpl<v_cmp_result>2 (mask, mask));
2410       break;
2411
2412     case EQ:
2413       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2414       break;
2415
2416     default:
2417       gcc_unreachable ();
2418     }
2419
2420   DONE;
2421 })
2422
2423 (define_expand "vec_cmp<mode><v_cmp_result>"
2424   [(set (match_operand:<V_cmp_result> 0 "register_operand")
2425         (match_operator 1 "comparison_operator"
2426             [(match_operand:VDQF 2 "register_operand")
2427              (match_operand:VDQF 3 "nonmemory_operand")]))]
2428   "TARGET_SIMD"
2429 {
2430   int use_zero_form = 0;
2431   enum rtx_code code = GET_CODE (operands[1]);
2432   rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
2433
2434   rtx (*comparison) (rtx, rtx, rtx) = NULL;
2435
2436   switch (code)
2437     {
2438     case LE:
2439     case LT:
2440     case GE:
2441     case GT:
2442     case EQ:
2443       if (operands[3] == CONST0_RTX (<MODE>mode))
2444         {
2445           use_zero_form = 1;
2446           break;
2447         }
2448       /* Fall through.  */
2449     default:
2450       if (!REG_P (operands[3]))
2451         operands[3] = force_reg (<MODE>mode, operands[3]);
2452
2453       break;
2454     }
2455
2456   switch (code)
2457     {
2458     case LT:
2459       if (use_zero_form)
2460         {
2461           comparison = gen_aarch64_cmlt<mode>;
2462           break;
2463         }
2464       /* Fall through.  */
2465     case UNGE:
2466       std::swap (operands[2], operands[3]);
2467       /* Fall through.  */
2468     case UNLE:
2469     case GT:
2470       comparison = gen_aarch64_cmgt<mode>;
2471       break;
2472     case LE:
2473       if (use_zero_form)
2474         {
2475           comparison = gen_aarch64_cmle<mode>;
2476           break;
2477         }
2478       /* Fall through.  */
2479     case UNGT:
2480       std::swap (operands[2], operands[3]);
2481       /* Fall through.  */
2482     case UNLT:
2483     case GE:
2484       comparison = gen_aarch64_cmge<mode>;
2485       break;
2486     case NE:
2487     case EQ:
2488       comparison = gen_aarch64_cmeq<mode>;
2489       break;
2490     case UNEQ:
2491     case ORDERED:
2492     case UNORDERED:
2493       break;
2494     default:
2495       gcc_unreachable ();
2496     }
2497
2498   switch (code)
2499     {
2500     case UNGE:
2501     case UNGT:
2502     case UNLE:
2503     case UNLT:
2504     case NE:
2505       /* FCM returns false for lanes which are unordered, so if we use
2506          the inverse of the comparison we actually want to emit, then
2507          invert the result, we will end up with the correct result.
2508          Note that a NE NaN and NaN NE b are true for all a, b.
2509
2510          Our transformations are:
2511          a UNGE b -> !(b GT a)
2512          a UNGT b -> !(b GE a)
2513          a UNLE b -> !(a GT b)
2514          a UNLT b -> !(a GE b)
2515          a   NE b -> !(a EQ b)  */
2516       gcc_assert (comparison != NULL);
2517       emit_insn (comparison (operands[0], operands[2], operands[3]));
2518       emit_insn (gen_one_cmpl<v_cmp_result>2 (operands[0], operands[0]));
2519       break;
2520
2521     case LT:
2522     case LE:
2523     case GT:
2524     case GE:
2525     case EQ:
2526       /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
2527          As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
2528          a GE b -> a GE b
2529          a GT b -> a GT b
2530          a LE b -> b GE a
2531          a LT b -> b GT a
2532          a EQ b -> a EQ b  */
2533       gcc_assert (comparison != NULL);
2534       emit_insn (comparison (operands[0], operands[2], operands[3]));
2535       break;
2536
2537     case UNEQ:
2538       /* We first check (a > b ||  b > a) which is !UNEQ, inverting
2539          this result will then give us (a == b || a UNORDERED b).  */
2540       emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2541                                          operands[2], operands[3]));
2542       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2543       emit_insn (gen_ior<v_cmp_result>3 (operands[0], operands[0], tmp));
2544       emit_insn (gen_one_cmpl<v_cmp_result>2 (operands[0], operands[0]));
2545       break;
2546
2547     case UNORDERED:
2548       /* Operands are ORDERED iff (a > b || b >= a), so we can compute
2549          UNORDERED as !ORDERED.  */
2550       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2551       emit_insn (gen_aarch64_cmge<mode> (operands[0],
2552                                          operands[3], operands[2]));
2553       emit_insn (gen_ior<v_cmp_result>3 (operands[0], operands[0], tmp));
2554       emit_insn (gen_one_cmpl<v_cmp_result>2 (operands[0], operands[0]));
2555       break;
2556
2557     case ORDERED:
2558       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2559       emit_insn (gen_aarch64_cmge<mode> (operands[0],
2560                                          operands[3], operands[2]));
2561       emit_insn (gen_ior<v_cmp_result>3 (operands[0], operands[0], tmp));
2562       break;
2563
2564     default:
2565       gcc_unreachable ();
2566     }
2567
2568   DONE;
2569 })
2570
2571 (define_expand "vec_cmpu<mode><mode>"
2572   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2573           (match_operator 1 "comparison_operator"
2574             [(match_operand:VSDQ_I_DI 2 "register_operand")
2575              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2576   "TARGET_SIMD"
2577 {
2578   emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2579                                       operands[2], operands[3]));
2580   DONE;
2581 })
2582
2583 (define_expand "vcond<mode><mode>"
2584   [(set (match_operand:VALLDI 0 "register_operand")
2585         (if_then_else:VALLDI
2586           (match_operator 3 "comparison_operator"
2587             [(match_operand:VALLDI 4 "register_operand")
2588              (match_operand:VALLDI 5 "nonmemory_operand")])
2589           (match_operand:VALLDI 1 "nonmemory_operand")
2590           (match_operand:VALLDI 2 "nonmemory_operand")))]
2591   "TARGET_SIMD"
2592 {
2593   rtx mask = gen_reg_rtx (<V_cmp_result>mode);
2594   enum rtx_code code = GET_CODE (operands[3]);
2595
2596   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2597      it as well as switch operands 1/2 in order to avoid the additional
2598      NOT instruction.  */
2599   if (code == NE)
2600     {
2601       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2602                                     operands[4], operands[5]);
2603       std::swap (operands[1], operands[2]);
2604     }
2605   emit_insn (gen_vec_cmp<mode><v_cmp_result> (mask, operands[3],
2606                                               operands[4], operands[5]));
2607   emit_insn (gen_vcond_mask_<mode><v_cmp_result> (operands[0], operands[1],
2608                                                   operands[2], mask));
2609
2610   DONE;
2611 })
2612
2613 (define_expand "vcond<v_cmp_mixed><mode>"
2614   [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2615         (if_then_else:<V_cmp_mixed>
2616           (match_operator 3 "comparison_operator"
2617             [(match_operand:VDQF_COND 4 "register_operand")
2618              (match_operand:VDQF_COND 5 "nonmemory_operand")])
2619           (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2620           (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2621   "TARGET_SIMD"
2622 {
2623   rtx mask = gen_reg_rtx (<V_cmp_result>mode);
2624   enum rtx_code code = GET_CODE (operands[3]);
2625
2626   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2627      it as well as switch operands 1/2 in order to avoid the additional
2628      NOT instruction.  */
2629   if (code == NE)
2630     {
2631       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2632                                     operands[4], operands[5]);
2633       std::swap (operands[1], operands[2]);
2634     }
2635   emit_insn (gen_vec_cmp<mode><v_cmp_result> (mask, operands[3],
2636                                               operands[4], operands[5]));
2637   emit_insn (gen_vcond_mask_<v_cmp_mixed><v_cmp_result> (
2638                                                 operands[0], operands[1],
2639                                                 operands[2], mask));
2640
2641   DONE;
2642 })
2643
2644 (define_expand "vcondu<mode><mode>"
2645   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2646         (if_then_else:VSDQ_I_DI
2647           (match_operator 3 "comparison_operator"
2648             [(match_operand:VSDQ_I_DI 4 "register_operand")
2649              (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2650           (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2651           (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2652   "TARGET_SIMD"
2653 {
2654   rtx mask = gen_reg_rtx (<MODE>mode);
2655   enum rtx_code code = GET_CODE (operands[3]);
2656
2657   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2658      it as well as switch operands 1/2 in order to avoid the additional
2659      NOT instruction.  */
2660   if (code == NE)
2661     {
2662       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2663                                     operands[4], operands[5]);
2664       std::swap (operands[1], operands[2]);
2665     }
2666   emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2667                                       operands[4], operands[5]));
2668   emit_insn (gen_vcond_mask_<mode><v_cmp_result> (operands[0], operands[1],
2669                                                   operands[2], mask));
2670   DONE;
2671 })
2672
2673 (define_expand "vcondu<mode><v_cmp_mixed>"
2674   [(set (match_operand:VDQF 0 "register_operand")
2675         (if_then_else:VDQF
2676           (match_operator 3 "comparison_operator"
2677             [(match_operand:<V_cmp_mixed> 4 "register_operand")
2678              (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2679           (match_operand:VDQF 1 "nonmemory_operand")
2680           (match_operand:VDQF 2 "nonmemory_operand")))]
2681   "TARGET_SIMD"
2682 {
2683   rtx mask = gen_reg_rtx (<V_cmp_result>mode);
2684   enum rtx_code code = GET_CODE (operands[3]);
2685
2686   /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2687      it as well as switch operands 1/2 in order to avoid the additional
2688      NOT instruction.  */
2689   if (code == NE)
2690     {
2691       operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2692                                     operands[4], operands[5]);
2693       std::swap (operands[1], operands[2]);
2694     }
2695   emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
2696                                                   mask, operands[3],
2697                                                   operands[4], operands[5]));
2698   emit_insn (gen_vcond_mask_<mode><v_cmp_result> (operands[0], operands[1],
2699                                                   operands[2], mask));
2700   DONE;
2701 })
2702
2703 ;; Patterns for AArch64 SIMD Intrinsics.
2704
2705 ;; Lane extraction with sign extension to general purpose register.
2706 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2707   [(set (match_operand:GPI 0 "register_operand" "=r")
2708         (sign_extend:GPI
2709           (vec_select:<VEL>
2710             (match_operand:VDQQH 1 "register_operand" "w")
2711             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2712   "TARGET_SIMD"
2713   {
2714     operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2715     return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
2716   }
2717   [(set_attr "type" "neon_to_gp<q>")]
2718 )
2719
2720 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
2721   [(set (match_operand:SI 0 "register_operand" "=r")
2722         (zero_extend:SI
2723           (vec_select:<VEL>
2724             (match_operand:VDQQH 1 "register_operand" "w")
2725             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2726   "TARGET_SIMD"
2727   {
2728     operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2729     return "umov\\t%w0, %1.<Vetype>[%2]";
2730   }
2731   [(set_attr "type" "neon_to_gp<q>")]
2732 )
2733
2734 ;; Lane extraction of a value, neither sign nor zero extension
2735 ;; is guaranteed so upper bits should be considered undefined.
2736 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
2737 (define_insn "aarch64_get_lane<mode>"
2738   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2739         (vec_select:<VEL>
2740           (match_operand:VALL_F16 1 "register_operand" "w, w, w")
2741           (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
2742   "TARGET_SIMD"
2743   {
2744     operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2745     switch (which_alternative)
2746       {
2747         case 0:
2748           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2749         case 1:
2750           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2751         case 2:
2752           return "st1\\t{%1.<Vetype>}[%2], %0";
2753         default:
2754           gcc_unreachable ();
2755       }
2756   }
2757   [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
2758 )
2759
2760 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2761 ;; dest vector.
2762
2763 (define_insn "*aarch64_combinez<mode>"
2764   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2765         (vec_concat:<VDBL>
2766            (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")
2767            (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")))]
2768   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2769   "@
2770    mov\\t%0.8b, %1.8b
2771    fmov\t%d0, %1
2772    ldr\\t%d0, %1"
2773   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2774    (set_attr "simd" "yes,*,yes")
2775    (set_attr "fp" "*,yes,*")]
2776 )
2777
2778 (define_insn "*aarch64_combinez_be<mode>"
2779   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2780         (vec_concat:<VDBL>
2781            (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")
2782            (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")))]
2783   "TARGET_SIMD && BYTES_BIG_ENDIAN"
2784   "@
2785    mov\\t%0.8b, %1.8b
2786    fmov\t%d0, %1
2787    ldr\\t%d0, %1"
2788   [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2789    (set_attr "simd" "yes,*,yes")
2790    (set_attr "fp" "*,yes,*")]
2791 )
2792
2793 (define_expand "aarch64_combine<mode>"
2794   [(match_operand:<VDBL> 0 "register_operand")
2795    (match_operand:VDC 1 "register_operand")
2796    (match_operand:VDC 2 "register_operand")]
2797   "TARGET_SIMD"
2798 {
2799   rtx op1, op2;
2800   if (BYTES_BIG_ENDIAN)
2801     {
2802       op1 = operands[2];
2803       op2 = operands[1];
2804     }
2805   else
2806     {
2807       op1 = operands[1];
2808       op2 = operands[2];
2809     }
2810   emit_insn (gen_aarch64_combine_internal<mode> (operands[0], op1, op2));
2811   DONE;
2812 }
2813 )
2814
2815 (define_insn_and_split "aarch64_combine_internal<mode>"
2816   [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
2817         (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
2818                            (match_operand:VDC 2 "register_operand" "w")))]
2819   "TARGET_SIMD"
2820   "#"
2821   "&& reload_completed"
2822   [(const_int 0)]
2823 {
2824   if (BYTES_BIG_ENDIAN)
2825     aarch64_split_simd_combine (operands[0], operands[2], operands[1]);
2826   else
2827     aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
2828   DONE;
2829 }
2830 [(set_attr "type" "multiple")]
2831 )
2832
2833 (define_expand "aarch64_simd_combine<mode>"
2834   [(match_operand:<VDBL> 0 "register_operand")
2835    (match_operand:VDC 1 "register_operand")
2836    (match_operand:VDC 2 "register_operand")]
2837   "TARGET_SIMD"
2838   {
2839     emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
2840     emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
2841     DONE;
2842   }
2843 [(set_attr "type" "multiple")]
2844 )
2845
2846 ;; <su><addsub>l<q>.
2847
2848 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
2849  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2850        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2851                            (match_operand:VQW 1 "register_operand" "w")
2852                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2853                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2854                            (match_operand:VQW 2 "register_operand" "w")
2855                            (match_dup 3)))))]
2856   "TARGET_SIMD"
2857   "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2858   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2859 )
2860
2861 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
2862  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2863        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2864                            (match_operand:VQW 1 "register_operand" "w")
2865                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2866                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2867                            (match_operand:VQW 2 "register_operand" "w")
2868                            (match_dup 3)))))]
2869   "TARGET_SIMD"
2870   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2871   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2872 )
2873
2874
2875 (define_expand "aarch64_saddl2<mode>"
2876   [(match_operand:<VWIDE> 0 "register_operand" "=w")
2877    (match_operand:VQW 1 "register_operand" "w")
2878    (match_operand:VQW 2 "register_operand" "w")]
2879   "TARGET_SIMD"
2880 {
2881   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2882   emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
2883                                                   operands[2], p));
2884   DONE;
2885 })
2886
2887 (define_expand "aarch64_uaddl2<mode>"
2888   [(match_operand:<VWIDE> 0 "register_operand" "=w")
2889    (match_operand:VQW 1 "register_operand" "w")
2890    (match_operand:VQW 2 "register_operand" "w")]
2891   "TARGET_SIMD"
2892 {
2893   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2894   emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
2895                                                   operands[2], p));
2896   DONE;
2897 })
2898
2899 (define_expand "aarch64_ssubl2<mode>"
2900   [(match_operand:<VWIDE> 0 "register_operand" "=w")
2901    (match_operand:VQW 1 "register_operand" "w")
2902    (match_operand:VQW 2 "register_operand" "w")]
2903   "TARGET_SIMD"
2904 {
2905   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2906   emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
2907                                                 operands[2], p));
2908   DONE;
2909 })
2910
2911 (define_expand "aarch64_usubl2<mode>"
2912   [(match_operand:<VWIDE> 0 "register_operand" "=w")
2913    (match_operand:VQW 1 "register_operand" "w")
2914    (match_operand:VQW 2 "register_operand" "w")]
2915   "TARGET_SIMD"
2916 {
2917   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2918   emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
2919                                                 operands[2], p));
2920   DONE;
2921 })
2922
2923 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
2924  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2925        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
2926                            (match_operand:VD_BHSI 1 "register_operand" "w"))
2927                        (ANY_EXTEND:<VWIDE>
2928                            (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2929   "TARGET_SIMD"
2930   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2931   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2932 )
2933
2934 ;; <su><addsub>w<q>.
2935
2936 (define_expand "widen_ssum<mode>3"
2937   [(set (match_operand:<VDBLW> 0 "register_operand" "")
2938         (plus:<VDBLW> (sign_extend:<VDBLW>
2939                         (match_operand:VQW 1 "register_operand" ""))
2940                       (match_operand:<VDBLW> 2 "register_operand" "")))]
2941   "TARGET_SIMD"
2942   {
2943     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
2944     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
2945
2946     emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
2947                                                 operands[1], p));
2948     emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
2949     DONE;
2950   }
2951 )
2952
2953 (define_expand "widen_ssum<mode>3"
2954   [(set (match_operand:<VWIDE> 0 "register_operand" "")
2955         (plus:<VWIDE> (sign_extend:<VWIDE>
2956                         (match_operand:VD_BHSI 1 "register_operand" ""))
2957                       (match_operand:<VWIDE> 2 "register_operand" "")))]
2958   "TARGET_SIMD"
2959 {
2960   emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
2961   DONE;
2962 })
2963
2964 (define_expand "widen_usum<mode>3"
2965   [(set (match_operand:<VDBLW> 0 "register_operand" "")
2966         (plus:<VDBLW> (zero_extend:<VDBLW>
2967                         (match_operand:VQW 1 "register_operand" ""))
2968                       (match_operand:<VDBLW> 2 "register_operand" "")))]
2969   "TARGET_SIMD"
2970   {
2971     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
2972     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
2973
2974     emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
2975                                                  operands[1], p));
2976     emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
2977     DONE;
2978   }
2979 )
2980
2981 (define_expand "widen_usum<mode>3"
2982   [(set (match_operand:<VWIDE> 0 "register_operand" "")
2983         (plus:<VWIDE> (zero_extend:<VWIDE>
2984                         (match_operand:VD_BHSI 1 "register_operand" ""))
2985                       (match_operand:<VWIDE> 2 "register_operand" "")))]
2986   "TARGET_SIMD"
2987 {
2988   emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
2989   DONE;
2990 })
2991
2992 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
2993   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2994         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2995                         (ANY_EXTEND:<VWIDE>
2996                           (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2997   "TARGET_SIMD"
2998   "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
2999   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3000 )
3001
3002 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
3003   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3004         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3005                         (ANY_EXTEND:<VWIDE>
3006                           (vec_select:<VHALF>
3007                            (match_operand:VQW 2 "register_operand" "w")
3008                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3009   "TARGET_SIMD"
3010   "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3011   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3012 )
3013
3014 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
3015   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3016         (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3017                         (ANY_EXTEND:<VWIDE>
3018                           (vec_select:<VHALF>
3019                            (match_operand:VQW 2 "register_operand" "w")
3020                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3021   "TARGET_SIMD"
3022   "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3023   [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3024 )
3025
3026 (define_expand "aarch64_saddw2<mode>"
3027   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3028    (match_operand:<VWIDE> 1 "register_operand" "w")
3029    (match_operand:VQW 2 "register_operand" "w")]
3030   "TARGET_SIMD"
3031 {
3032   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3033   emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3034                                                 operands[2], p));
3035   DONE;
3036 })
3037
3038 (define_expand "aarch64_uaddw2<mode>"
3039   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3040    (match_operand:<VWIDE> 1 "register_operand" "w")
3041    (match_operand:VQW 2 "register_operand" "w")]
3042   "TARGET_SIMD"
3043 {
3044   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3045   emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3046                                                 operands[2], p));
3047   DONE;
3048 })
3049
3050
3051 (define_expand "aarch64_ssubw2<mode>"
3052   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3053    (match_operand:<VWIDE> 1 "register_operand" "w")
3054    (match_operand:VQW 2 "register_operand" "w")]
3055   "TARGET_SIMD"
3056 {
3057   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3058   emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3059                                                 operands[2], p));
3060   DONE;
3061 })
3062
3063 (define_expand "aarch64_usubw2<mode>"
3064   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3065    (match_operand:<VWIDE> 1 "register_operand" "w")
3066    (match_operand:VQW 2 "register_operand" "w")]
3067   "TARGET_SIMD"
3068 {
3069   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3070   emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3071                                                 operands[2], p));
3072   DONE;
3073 })
3074
3075 ;; <su><r>h<addsub>.
3076
3077 (define_insn "aarch64_<sur>h<addsub><mode>"
3078   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3079         (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3080                       (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3081                      HADDSUB))]
3082   "TARGET_SIMD"
3083   "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3084   [(set_attr "type" "neon_<addsub>_halve<q>")]
3085 )
3086
3087 ;; <r><addsub>hn<q>.
3088
3089 (define_insn "aarch64_<sur><addsub>hn<mode>"
3090   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3091         (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3092                             (match_operand:VQN 2 "register_operand" "w")]
3093                            ADDSUBHN))]
3094   "TARGET_SIMD"
3095   "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3096   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3097 )
3098
3099 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3100   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3101         (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3102                              (match_operand:VQN 2 "register_operand" "w")
3103                              (match_operand:VQN 3 "register_operand" "w")]
3104                             ADDSUBHN2))]
3105   "TARGET_SIMD"
3106   "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3107   [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3108 )
3109
3110 ;; pmul.
3111
3112 (define_insn "aarch64_pmul<mode>"
3113   [(set (match_operand:VB 0 "register_operand" "=w")
3114         (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3115                     (match_operand:VB 2 "register_operand" "w")]
3116                    UNSPEC_PMUL))]
3117  "TARGET_SIMD"
3118  "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3119   [(set_attr "type" "neon_mul_<Vetype><q>")]
3120 )
3121
3122 ;; fmulx.
3123
3124 (define_insn "aarch64_fmulx<mode>"
3125   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3126         (unspec:VHSDF_HSDF
3127           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3128            (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3129            UNSPEC_FMULX))]
3130  "TARGET_SIMD"
3131  "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3132  [(set_attr "type" "neon_fp_mul_<stype>")]
3133 )
3134
3135 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3136
3137 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3138   [(set (match_operand:VDQSF 0 "register_operand" "=w")
3139         (unspec:VDQSF
3140          [(match_operand:VDQSF 1 "register_operand" "w")
3141           (vec_duplicate:VDQSF
3142            (vec_select:<VEL>
3143             (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3144             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3145          UNSPEC_FMULX))]
3146   "TARGET_SIMD"
3147   {
3148     operands[3] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
3149                                           INTVAL (operands[3])));
3150     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3151   }
3152   [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3153 )
3154
3155 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3156
3157 (define_insn "*aarch64_mulx_elt<mode>"
3158   [(set (match_operand:VDQF 0 "register_operand" "=w")
3159         (unspec:VDQF
3160          [(match_operand:VDQF 1 "register_operand" "w")
3161           (vec_duplicate:VDQF
3162            (vec_select:<VEL>
3163             (match_operand:VDQF 2 "register_operand" "w")
3164             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3165          UNSPEC_FMULX))]
3166   "TARGET_SIMD"
3167   {
3168     operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
3169     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3170   }
3171   [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3172 )
3173
3174 ;; vmulxq_lane
3175
3176 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3177   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3178         (unspec:VHSDF
3179          [(match_operand:VHSDF 1 "register_operand" "w")
3180           (vec_duplicate:VHSDF
3181             (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3182          UNSPEC_FMULX))]
3183   "TARGET_SIMD"
3184   "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3185   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3186 )
3187
3188 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3189 ;; vmulxd_lane_f64 ==  vmulx_lane_f64
3190 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3191
3192 (define_insn "*aarch64_vgetfmulx<mode>"
3193   [(set (match_operand:<VEL> 0 "register_operand" "=w")
3194         (unspec:<VEL>
3195          [(match_operand:<VEL> 1 "register_operand" "w")
3196           (vec_select:<VEL>
3197            (match_operand:VDQF 2 "register_operand" "w")
3198             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3199          UNSPEC_FMULX))]
3200   "TARGET_SIMD"
3201   {
3202     operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
3203     return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3204   }
3205   [(set_attr "type" "fmul<Vetype>")]
3206 )
3207 ;; <su>q<addsub>
3208
3209 (define_insn "aarch64_<su_optab><optab><mode>"
3210   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3211         (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3212                           (match_operand:VSDQ_I 2 "register_operand" "w")))]
3213   "TARGET_SIMD"
3214   "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3215   [(set_attr "type" "neon_<optab><q>")]
3216 )
3217
3218 ;; suqadd and usqadd
3219
3220 (define_insn "aarch64_<sur>qadd<mode>"
3221   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3222         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3223                         (match_operand:VSDQ_I 2 "register_operand" "w")]
3224                        USSUQADD))]
3225   "TARGET_SIMD"
3226   "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3227   [(set_attr "type" "neon_qadd<q>")]
3228 )
3229
3230 ;; sqmovun
3231
3232 (define_insn "aarch64_sqmovun<mode>"
3233   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3234         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3235                             UNSPEC_SQXTUN))]
3236    "TARGET_SIMD"
3237    "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3238    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3239 )
3240
3241 ;; sqmovn and uqmovn
3242
3243 (define_insn "aarch64_<sur>qmovn<mode>"
3244   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3245         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3246                             SUQMOVN))]
3247   "TARGET_SIMD"
3248   "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3249    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3250 )
3251
3252 ;; <su>q<absneg>
3253
3254 (define_insn "aarch64_s<optab><mode>"
3255   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3256         (UNQOPS:VSDQ_I
3257           (match_operand:VSDQ_I 1 "register_operand" "w")))]
3258   "TARGET_SIMD"
3259   "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3260   [(set_attr "type" "neon_<optab><q>")]
3261 )
3262
3263 ;; sq<r>dmulh.
3264
3265 (define_insn "aarch64_sq<r>dmulh<mode>"
3266   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3267         (unspec:VSDQ_HSI
3268           [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3269            (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3270          VQDMULH))]
3271   "TARGET_SIMD"
3272   "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3273   [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3274 )
3275
3276 ;; sq<r>dmulh_lane
3277
3278 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3279   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3280         (unspec:VDQHS
3281           [(match_operand:VDQHS 1 "register_operand" "w")
3282            (vec_select:<VEL>
3283              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3284              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3285          VQDMULH))]
3286   "TARGET_SIMD"
3287   "*
3288    operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3289    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3290   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3291 )
3292
3293 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3294   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3295         (unspec:VDQHS
3296           [(match_operand:VDQHS 1 "register_operand" "w")
3297            (vec_select:<VEL>
3298              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3299              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3300          VQDMULH))]
3301   "TARGET_SIMD"
3302   "*
3303    operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3304    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3305   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3306 )
3307
3308 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3309   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3310         (unspec:SD_HSI
3311           [(match_operand:SD_HSI 1 "register_operand" "w")
3312            (vec_select:<VEL>
3313              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3314              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3315          VQDMULH))]
3316   "TARGET_SIMD"
3317   "*
3318    operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3319    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3320   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3321 )
3322
3323 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3324   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3325         (unspec:SD_HSI
3326           [(match_operand:SD_HSI 1 "register_operand" "w")
3327            (vec_select:<VEL>
3328              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3329              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3330          VQDMULH))]
3331   "TARGET_SIMD"
3332   "*
3333    operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3334    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3335   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3336 )
3337
3338 ;; sqrdml[as]h.
3339
3340 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3341   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3342         (unspec:VSDQ_HSI
3343           [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3344            (match_operand:VSDQ_HSI 2 "register_operand" "w")
3345            (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3346           SQRDMLH_AS))]
3347    "TARGET_SIMD_RDMA"
3348    "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3349    [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3350 )
3351
3352 ;; sqrdml[as]h_lane.
3353
3354 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3355   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3356         (unspec:VDQHS
3357           [(match_operand:VDQHS 1 "register_operand" "0")
3358            (match_operand:VDQHS 2 "register_operand" "w")
3359            (vec_select:<VEL>
3360              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3361              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3362           SQRDMLH_AS))]
3363    "TARGET_SIMD_RDMA"
3364    {
3365      operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3366      return
3367       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3368    }
3369    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3370 )
3371
3372 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3373   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3374         (unspec:SD_HSI
3375           [(match_operand:SD_HSI 1 "register_operand" "0")
3376            (match_operand:SD_HSI 2 "register_operand" "w")
3377            (vec_select:<VEL>
3378              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3379              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3380           SQRDMLH_AS))]
3381    "TARGET_SIMD_RDMA"
3382    {
3383      operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3384      return
3385       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3386    }
3387    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3388 )
3389
3390 ;; sqrdml[as]h_laneq.
3391
3392 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3393   [(set (match_operand:VDQHS 0 "register_operand" "=w")
3394         (unspec:VDQHS
3395           [(match_operand:VDQHS 1 "register_operand" "0")
3396            (match_operand:VDQHS 2 "register_operand" "w")
3397            (vec_select:<VEL>
3398              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3399              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3400           SQRDMLH_AS))]
3401    "TARGET_SIMD_RDMA"
3402    {
3403      operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3404      return
3405       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3406    }
3407    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3408 )
3409
3410 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3411   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3412         (unspec:SD_HSI
3413           [(match_operand:SD_HSI 1 "register_operand" "0")
3414            (match_operand:SD_HSI 2 "register_operand" "w")
3415            (vec_select:<VEL>
3416              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3417              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3418           SQRDMLH_AS))]
3419    "TARGET_SIMD_RDMA"
3420    {
3421      operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3422      return
3423       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3424    }
3425    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3426 )
3427
3428 ;; vqdml[sa]l
3429
3430 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3431   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3432         (SBINQOPS:<VWIDE>
3433           (match_operand:<VWIDE> 1 "register_operand" "0")
3434           (ss_ashift:<VWIDE>
3435               (mult:<VWIDE>
3436                 (sign_extend:<VWIDE>
3437                       (match_operand:VSD_HSI 2 "register_operand" "w"))
3438                 (sign_extend:<VWIDE>
3439                       (match_operand:VSD_HSI 3 "register_operand" "w")))
3440               (const_int 1))))]
3441   "TARGET_SIMD"
3442   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3443   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3444 )
3445
3446 ;; vqdml[sa]l_lane
3447
3448 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3449   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3450         (SBINQOPS:<VWIDE>
3451           (match_operand:<VWIDE> 1 "register_operand" "0")
3452           (ss_ashift:<VWIDE>
3453             (mult:<VWIDE>
3454               (sign_extend:<VWIDE>
3455                 (match_operand:VD_HSI 2 "register_operand" "w"))
3456               (sign_extend:<VWIDE>
3457                 (vec_duplicate:VD_HSI
3458                   (vec_select:<VEL>
3459                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3460                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3461               ))
3462             (const_int 1))))]
3463   "TARGET_SIMD"
3464   {
3465     operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3466     return
3467       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3468   }
3469   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3470 )
3471
3472 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3473   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3474         (SBINQOPS:<VWIDE>
3475           (match_operand:<VWIDE> 1 "register_operand" "0")
3476           (ss_ashift:<VWIDE>
3477             (mult:<VWIDE>
3478               (sign_extend:<VWIDE>
3479                 (match_operand:VD_HSI 2 "register_operand" "w"))
3480               (sign_extend:<VWIDE>
3481                 (vec_duplicate:VD_HSI
3482                   (vec_select:<VEL>
3483                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3484                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3485               ))
3486             (const_int 1))))]
3487   "TARGET_SIMD"
3488   {
3489     operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3490     return
3491       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3492   }
3493   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3494 )
3495
3496 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3497   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3498         (SBINQOPS:<VWIDE>
3499           (match_operand:<VWIDE> 1 "register_operand" "0")
3500           (ss_ashift:<VWIDE>
3501             (mult:<VWIDE>
3502               (sign_extend:<VWIDE>
3503                 (match_operand:SD_HSI 2 "register_operand" "w"))
3504               (sign_extend:<VWIDE>
3505                 (vec_select:<VEL>
3506                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3507                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3508               )
3509             (const_int 1))))]
3510   "TARGET_SIMD"
3511   {
3512     operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3513     return
3514       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3515   }
3516   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3517 )
3518
3519 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3520   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3521         (SBINQOPS:<VWIDE>
3522           (match_operand:<VWIDE> 1 "register_operand" "0")
3523           (ss_ashift:<VWIDE>
3524             (mult:<VWIDE>
3525               (sign_extend:<VWIDE>
3526                 (match_operand:SD_HSI 2 "register_operand" "w"))
3527               (sign_extend:<VWIDE>
3528                 (vec_select:<VEL>
3529                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3530                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3531               )
3532             (const_int 1))))]
3533   "TARGET_SIMD"
3534   {
3535     operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3536     return
3537       "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3538   }
3539   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3540 )
3541
3542 ;; vqdml[sa]l_n
3543
3544 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3545   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3546         (SBINQOPS:<VWIDE>
3547           (match_operand:<VWIDE> 1 "register_operand" "0")
3548           (ss_ashift:<VWIDE>
3549               (mult:<VWIDE>
3550                 (sign_extend:<VWIDE>
3551                       (match_operand:VD_HSI 2 "register_operand" "w"))
3552                 (sign_extend:<VWIDE>
3553                   (vec_duplicate:VD_HSI
3554                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3555               (const_int 1))))]
3556   "TARGET_SIMD"
3557   "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3558   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3559 )
3560
3561 ;; sqdml[as]l2
3562
3563 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3564   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3565         (SBINQOPS:<VWIDE>
3566          (match_operand:<VWIDE> 1 "register_operand" "0")
3567          (ss_ashift:<VWIDE>
3568              (mult:<VWIDE>
3569                (sign_extend:<VWIDE>
3570                  (vec_select:<VHALF>
3571                      (match_operand:VQ_HSI 2 "register_operand" "w")
3572                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3573                (sign_extend:<VWIDE>
3574                  (vec_select:<VHALF>
3575                      (match_operand:VQ_HSI 3 "register_operand" "w")
3576                      (match_dup 4))))
3577              (const_int 1))))]
3578   "TARGET_SIMD"
3579   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3580   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3581 )
3582
3583 (define_expand "aarch64_sqdmlal2<mode>"
3584   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3585    (match_operand:<VWIDE> 1 "register_operand" "w")
3586    (match_operand:VQ_HSI 2 "register_operand" "w")
3587    (match_operand:VQ_HSI 3 "register_operand" "w")]
3588   "TARGET_SIMD"
3589 {
3590   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3591   emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3592                                                   operands[2], operands[3], p));
3593   DONE;
3594 })
3595
3596 (define_expand "aarch64_sqdmlsl2<mode>"
3597   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3598    (match_operand:<VWIDE> 1 "register_operand" "w")
3599    (match_operand:VQ_HSI 2 "register_operand" "w")
3600    (match_operand:VQ_HSI 3 "register_operand" "w")]
3601   "TARGET_SIMD"
3602 {
3603   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3604   emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3605                                                   operands[2], operands[3], p));
3606   DONE;
3607 })
3608
3609 ;; vqdml[sa]l2_lane
3610
3611 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3612   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3613         (SBINQOPS:<VWIDE>
3614           (match_operand:<VWIDE> 1 "register_operand" "0")
3615           (ss_ashift:<VWIDE>
3616               (mult:<VWIDE>
3617                 (sign_extend:<VWIDE>
3618                   (vec_select:<VHALF>
3619                     (match_operand:VQ_HSI 2 "register_operand" "w")
3620                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3621                 (sign_extend:<VWIDE>
3622                   (vec_duplicate:<VHALF>
3623                     (vec_select:<VEL>
3624                       (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3625                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3626                     ))))
3627               (const_int 1))))]
3628   "TARGET_SIMD"
3629   {
3630     operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3631     return
3632      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3633   }
3634   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3635 )
3636
3637 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3638   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3639         (SBINQOPS:<VWIDE>
3640           (match_operand:<VWIDE> 1 "register_operand" "0")
3641           (ss_ashift:<VWIDE>
3642               (mult:<VWIDE>
3643                 (sign_extend:<VWIDE>
3644                   (vec_select:<VHALF>
3645                     (match_operand:VQ_HSI 2 "register_operand" "w")
3646                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3647                 (sign_extend:<VWIDE>
3648                   (vec_duplicate:<VHALF>
3649                     (vec_select:<VEL>
3650                       (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3651                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3652                     ))))
3653               (const_int 1))))]
3654   "TARGET_SIMD"
3655   {
3656     operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3657     return
3658      "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3659   }
3660   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3661 )
3662
3663 (define_expand "aarch64_sqdmlal2_lane<mode>"
3664   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3665    (match_operand:<VWIDE> 1 "register_operand" "w")
3666    (match_operand:VQ_HSI 2 "register_operand" "w")
3667    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3668    (match_operand:SI 4 "immediate_operand" "i")]
3669   "TARGET_SIMD"
3670 {
3671   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3672   emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3673                                                        operands[2], operands[3],
3674                                                        operands[4], p));
3675   DONE;
3676 })
3677
3678 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3679   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3680    (match_operand:<VWIDE> 1 "register_operand" "w")
3681    (match_operand:VQ_HSI 2 "register_operand" "w")
3682    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3683    (match_operand:SI 4 "immediate_operand" "i")]
3684   "TARGET_SIMD"
3685 {
3686   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3687   emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3688                                                        operands[2], operands[3],
3689                                                        operands[4], p));
3690   DONE;
3691 })
3692
3693 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3694   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3695    (match_operand:<VWIDE> 1 "register_operand" "w")
3696    (match_operand:VQ_HSI 2 "register_operand" "w")
3697    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3698    (match_operand:SI 4 "immediate_operand" "i")]
3699   "TARGET_SIMD"
3700 {
3701   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3702   emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3703                                                        operands[2], operands[3],
3704                                                        operands[4], p));
3705   DONE;
3706 })
3707
3708 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3709   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3710    (match_operand:<VWIDE> 1 "register_operand" "w")
3711    (match_operand:VQ_HSI 2 "register_operand" "w")
3712    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3713    (match_operand:SI 4 "immediate_operand" "i")]
3714   "TARGET_SIMD"
3715 {
3716   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3717   emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
3718                                                        operands[2], operands[3],
3719                                                        operands[4], p));
3720   DONE;
3721 })
3722
3723 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
3724   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3725         (SBINQOPS:<VWIDE>
3726           (match_operand:<VWIDE> 1 "register_operand" "0")
3727           (ss_ashift:<VWIDE>
3728             (mult:<VWIDE>
3729               (sign_extend:<VWIDE>
3730                 (vec_select:<VHALF>
3731                   (match_operand:VQ_HSI 2 "register_operand" "w")
3732                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3733               (sign_extend:<VWIDE>
3734                 (vec_duplicate:<VHALF>
3735                   (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3736             (const_int 1))))]
3737   "TARGET_SIMD"
3738   "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3739   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3740 )
3741
3742 (define_expand "aarch64_sqdmlal2_n<mode>"
3743   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3744    (match_operand:<VWIDE> 1 "register_operand" "w")
3745    (match_operand:VQ_HSI 2 "register_operand" "w")
3746    (match_operand:<VEL> 3 "register_operand" "w")]
3747   "TARGET_SIMD"
3748 {
3749   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3750   emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
3751                                                     operands[2], operands[3],
3752                                                     p));
3753   DONE;
3754 })
3755
3756 (define_expand "aarch64_sqdmlsl2_n<mode>"
3757   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3758    (match_operand:<VWIDE> 1 "register_operand" "w")
3759    (match_operand:VQ_HSI 2 "register_operand" "w")
3760    (match_operand:<VEL> 3 "register_operand" "w")]
3761   "TARGET_SIMD"
3762 {
3763   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3764   emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
3765                                                     operands[2], operands[3],
3766                                                     p));
3767   DONE;
3768 })
3769
3770 ;; vqdmull
3771
3772 (define_insn "aarch64_sqdmull<mode>"
3773   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3774         (ss_ashift:<VWIDE>
3775              (mult:<VWIDE>
3776                (sign_extend:<VWIDE>
3777                      (match_operand:VSD_HSI 1 "register_operand" "w"))
3778                (sign_extend:<VWIDE>
3779                      (match_operand:VSD_HSI 2 "register_operand" "w")))
3780              (const_int 1)))]
3781   "TARGET_SIMD"
3782   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3783   [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
3784 )
3785
3786 ;; vqdmull_lane
3787
3788 (define_insn "aarch64_sqdmull_lane<mode>"
3789   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3790         (ss_ashift:<VWIDE>
3791              (mult:<VWIDE>
3792                (sign_extend:<VWIDE>
3793                  (match_operand:VD_HSI 1 "register_operand" "w"))
3794                (sign_extend:<VWIDE>
3795                  (vec_duplicate:VD_HSI
3796                    (vec_select:<VEL>
3797                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3798                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3799                ))
3800              (const_int 1)))]
3801   "TARGET_SIMD"
3802   {
3803     operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3804     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3805   }
3806   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3807 )
3808
3809 (define_insn "aarch64_sqdmull_laneq<mode>"
3810   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3811         (ss_ashift:<VWIDE>
3812              (mult:<VWIDE>
3813                (sign_extend:<VWIDE>
3814                  (match_operand:VD_HSI 1 "register_operand" "w"))
3815                (sign_extend:<VWIDE>
3816                  (vec_duplicate:VD_HSI
3817                    (vec_select:<VEL>
3818                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3819                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3820                ))
3821              (const_int 1)))]
3822   "TARGET_SIMD"
3823   {
3824     operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3825     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3826   }
3827   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3828 )
3829
3830 (define_insn "aarch64_sqdmull_lane<mode>"
3831   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3832         (ss_ashift:<VWIDE>
3833              (mult:<VWIDE>
3834                (sign_extend:<VWIDE>
3835                  (match_operand:SD_HSI 1 "register_operand" "w"))
3836                (sign_extend:<VWIDE>
3837                  (vec_select:<VEL>
3838                    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3839                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3840                ))
3841              (const_int 1)))]
3842   "TARGET_SIMD"
3843   {
3844     operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3845     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3846   }
3847   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3848 )
3849
3850 (define_insn "aarch64_sqdmull_laneq<mode>"
3851   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3852         (ss_ashift:<VWIDE>
3853              (mult:<VWIDE>
3854                (sign_extend:<VWIDE>
3855                  (match_operand:SD_HSI 1 "register_operand" "w"))
3856                (sign_extend:<VWIDE>
3857                  (vec_select:<VEL>
3858                    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3859                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3860                ))
3861              (const_int 1)))]
3862   "TARGET_SIMD"
3863   {
3864     operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3865     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3866   }
3867   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3868 )
3869
3870 ;; vqdmull_n
3871
3872 (define_insn "aarch64_sqdmull_n<mode>"
3873   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3874         (ss_ashift:<VWIDE>
3875              (mult:<VWIDE>
3876                (sign_extend:<VWIDE>
3877                  (match_operand:VD_HSI 1 "register_operand" "w"))
3878                (sign_extend:<VWIDE>
3879                  (vec_duplicate:VD_HSI
3880                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3881                )
3882              (const_int 1)))]
3883   "TARGET_SIMD"
3884   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3885   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3886 )
3887
3888 ;; vqdmull2
3889
3890
3891
3892 (define_insn "aarch64_sqdmull2<mode>_internal"
3893   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3894         (ss_ashift:<VWIDE>
3895              (mult:<VWIDE>
3896                (sign_extend:<VWIDE>
3897                  (vec_select:<VHALF>
3898                    (match_operand:VQ_HSI 1 "register_operand" "w")
3899                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3900                (sign_extend:<VWIDE>
3901                  (vec_select:<VHALF>
3902                    (match_operand:VQ_HSI 2 "register_operand" "w")
3903                    (match_dup 3)))
3904                )
3905              (const_int 1)))]
3906   "TARGET_SIMD"
3907   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3908   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3909 )
3910
3911 (define_expand "aarch64_sqdmull2<mode>"
3912   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3913    (match_operand:VQ_HSI 1 "register_operand" "w")
3914    (match_operand:VQ_HSI 2 "register_operand" "w")]
3915   "TARGET_SIMD"
3916 {
3917   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3918   emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
3919                                                   operands[2], p));
3920   DONE;
3921 })
3922
3923 ;; vqdmull2_lane
3924
3925 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
3926   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3927         (ss_ashift:<VWIDE>
3928              (mult:<VWIDE>
3929                (sign_extend:<VWIDE>
3930                  (vec_select:<VHALF>
3931                    (match_operand:VQ_HSI 1 "register_operand" "w")
3932                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3933                (sign_extend:<VWIDE>
3934                  (vec_duplicate:<VHALF>
3935                    (vec_select:<VEL>
3936                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3937                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3938                ))
3939              (const_int 1)))]
3940   "TARGET_SIMD"
3941   {
3942     operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3943     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3944   }
3945   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3946 )
3947
3948 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
3949   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3950         (ss_ashift:<VWIDE>
3951              (mult:<VWIDE>
3952                (sign_extend:<VWIDE>
3953                  (vec_select:<VHALF>
3954                    (match_operand:VQ_HSI 1 "register_operand" "w")
3955                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3956                (sign_extend:<VWIDE>
3957                  (vec_duplicate:<VHALF>
3958                    (vec_select:<VEL>
3959                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3960                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3961                ))
3962              (const_int 1)))]
3963   "TARGET_SIMD"
3964   {
3965     operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3966     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3967   }
3968   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3969 )
3970
3971 (define_expand "aarch64_sqdmull2_lane<mode>"
3972   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3973    (match_operand:VQ_HSI 1 "register_operand" "w")
3974    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3975    (match_operand:SI 3 "immediate_operand" "i")]
3976   "TARGET_SIMD"
3977 {
3978   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3979   emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
3980                                                        operands[2], operands[3],
3981                                                        p));
3982   DONE;
3983 })
3984
3985 (define_expand "aarch64_sqdmull2_laneq<mode>"
3986   [(match_operand:<VWIDE> 0 "register_operand" "=w")
3987    (match_operand:VQ_HSI 1 "register_operand" "w")
3988    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3989    (match_operand:SI 3 "immediate_operand" "i")]
3990   "TARGET_SIMD"
3991 {
3992   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3993   emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
3994                                                        operands[2], operands[3],
3995                                                        p));
3996   DONE;
3997 })
3998
3999 ;; vqdmull2_n
4000
4001 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4002   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4003         (ss_ashift:<VWIDE>
4004              (mult:<VWIDE>
4005                (sign_extend:<VWIDE>
4006                  (vec_select:<VHALF>
4007                    (match_operand:VQ_HSI 1 "register_operand" "w")
4008                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4009                (sign_extend:<VWIDE>
4010                  (vec_duplicate:<VHALF>
4011                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4012                )
4013              (const_int 1)))]
4014   "TARGET_SIMD"
4015   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4016   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4017 )
4018
4019 (define_expand "aarch64_sqdmull2_n<mode>"
4020   [(match_operand:<VWIDE> 0 "register_operand" "=w")
4021    (match_operand:VQ_HSI 1 "register_operand" "w")
4022    (match_operand:<VEL> 2 "register_operand" "w")]
4023   "TARGET_SIMD"
4024 {
4025   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
4026   emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4027                                                     operands[2], p));
4028   DONE;
4029 })
4030
4031 ;; vshl
4032
4033 (define_insn "aarch64_<sur>shl<mode>"
4034   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4035         (unspec:VSDQ_I_DI
4036           [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4037            (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4038          VSHL))]
4039   "TARGET_SIMD"
4040   "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4041   [(set_attr "type" "neon_shift_reg<q>")]
4042 )
4043
4044
4045 ;; vqshl
4046
4047 (define_insn "aarch64_<sur>q<r>shl<mode>"
4048   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4049         (unspec:VSDQ_I
4050           [(match_operand:VSDQ_I 1 "register_operand" "w")
4051            (match_operand:VSDQ_I 2 "register_operand" "w")]
4052          VQSHL))]
4053   "TARGET_SIMD"
4054   "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4055   [(set_attr "type" "neon_sat_shift_reg<q>")]
4056 )
4057
4058 ;; vshll_n
4059
4060 (define_insn "aarch64_<sur>shll_n<mode>"
4061   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4062         (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4063                          (match_operand:SI 2
4064                            "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4065                          VSHLL))]
4066   "TARGET_SIMD"
4067   {
4068     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4069       return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4070     else
4071       return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4072   }
4073   [(set_attr "type" "neon_shift_imm_long")]
4074 )
4075
4076 ;; vshll_high_n
4077
4078 (define_insn "aarch64_<sur>shll2_n<mode>"
4079   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4080         (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4081                          (match_operand:SI 2 "immediate_operand" "i")]
4082                          VSHLL))]
4083   "TARGET_SIMD"
4084   {
4085     if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4086       return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4087     else
4088       return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4089   }
4090   [(set_attr "type" "neon_shift_imm_long")]
4091 )
4092
4093 ;; vrshr_n
4094
4095 (define_insn "aarch64_<sur>shr_n<mode>"
4096   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4097         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4098                            (match_operand:SI 2
4099                              "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4100                           VRSHR_N))]
4101   "TARGET_SIMD"
4102   "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4103   [(set_attr "type" "neon_sat_shift_imm<q>")]
4104 )
4105
4106 ;; v(r)sra_n
4107
4108 (define_insn "aarch64_<sur>sra_n<mode>"
4109   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4110         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4111                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4112                        (match_operand:SI 3
4113                          "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4114                       VSRA))]
4115   "TARGET_SIMD"
4116   "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4117   [(set_attr "type" "neon_shift_acc<q>")]
4118 )
4119
4120 ;; vs<lr>i_n
4121
4122 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4123   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4124         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4125                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4126                        (match_operand:SI 3
4127                          "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4128                       VSLRI))]
4129   "TARGET_SIMD"
4130   "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4131   [(set_attr "type" "neon_shift_imm<q>")]
4132 )
4133
4134 ;; vqshl(u)
4135
4136 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4137   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4138         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4139                        (match_operand:SI 2
4140                          "aarch64_simd_shift_imm_<ve_mode>" "i")]
4141                       VQSHL_N))]
4142   "TARGET_SIMD"
4143   "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4144   [(set_attr "type" "neon_sat_shift_imm<q>")]
4145 )
4146
4147
4148 ;; vq(r)shr(u)n_n
4149
4150 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4151   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4152         (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4153                             (match_operand:SI 2
4154                               "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4155                            VQSHRN_N))]
4156   "TARGET_SIMD"
4157   "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4158   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4159 )
4160
4161
4162 ;; cm(eq|ge|gt|lt|le)
4163 ;; Note, we have constraints for Dz and Z as different expanders
4164 ;; have different ideas of what should be passed to this pattern.
4165
4166 (define_insn "aarch64_cm<optab><mode>"
4167   [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
4168         (neg:<V_cmp_result>
4169           (COMPARISONS:<V_cmp_result>
4170             (match_operand:VDQ_I 1 "register_operand" "w,w")
4171             (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4172           )))]
4173   "TARGET_SIMD"
4174   "@
4175   cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4176   cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4177   [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4178 )
4179
4180 (define_insn_and_split "aarch64_cm<optab>di"
4181   [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4182         (neg:DI
4183           (COMPARISONS:DI
4184             (match_operand:DI 1 "register_operand" "w,w,r")
4185             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4186           )))
4187      (clobber (reg:CC CC_REGNUM))]
4188   "TARGET_SIMD"
4189   "#"
4190   "reload_completed"
4191   [(set (match_operand:DI 0 "register_operand")
4192         (neg:DI
4193           (COMPARISONS:DI
4194             (match_operand:DI 1 "register_operand")
4195             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4196           )))]
4197   {
4198     /* If we are in the general purpose register file,
4199        we split to a sequence of comparison and store.  */
4200     if (GP_REGNUM_P (REGNO (operands[0]))
4201         && GP_REGNUM_P (REGNO (operands[1])))
4202       {
4203         machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4204         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4205         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4206         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4207         DONE;
4208       }
4209     /* Otherwise, we expand to a similar pattern which does not
4210        clobber CC_REGNUM.  */
4211   }
4212   [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4213 )
4214
4215 (define_insn "*aarch64_cm<optab>di"
4216   [(set (match_operand:DI 0 "register_operand" "=w,w")
4217         (neg:DI
4218           (COMPARISONS:DI
4219             (match_operand:DI 1 "register_operand" "w,w")
4220             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4221           )))]
4222   "TARGET_SIMD && reload_completed"
4223   "@
4224   cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4225   cm<optab>\t%d0, %d1, #0"
4226   [(set_attr "type" "neon_compare, neon_compare_zero")]
4227 )
4228
4229 ;; cm(hs|hi)
4230
4231 (define_insn "aarch64_cm<optab><mode>"
4232   [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4233         (neg:<V_cmp_result>
4234           (UCOMPARISONS:<V_cmp_result>
4235             (match_operand:VDQ_I 1 "register_operand" "w")
4236             (match_operand:VDQ_I 2 "register_operand" "w")
4237           )))]
4238   "TARGET_SIMD"
4239   "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4240   [(set_attr "type" "neon_compare<q>")]
4241 )
4242
4243 (define_insn_and_split "aarch64_cm<optab>di"
4244   [(set (match_operand:DI 0 "register_operand" "=w,r")
4245         (neg:DI
4246           (UCOMPARISONS:DI
4247             (match_operand:DI 1 "register_operand" "w,r")
4248             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4249           )))
4250     (clobber (reg:CC CC_REGNUM))]
4251   "TARGET_SIMD"
4252   "#"
4253   "reload_completed"
4254   [(set (match_operand:DI 0 "register_operand")
4255         (neg:DI
4256           (UCOMPARISONS:DI
4257             (match_operand:DI 1 "register_operand")
4258             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4259           )))]
4260   {
4261     /* If we are in the general purpose register file,
4262        we split to a sequence of comparison and store.  */
4263     if (GP_REGNUM_P (REGNO (operands[0]))
4264         && GP_REGNUM_P (REGNO (operands[1])))
4265       {
4266         machine_mode mode = CCmode;
4267         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4268         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4269         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4270         DONE;
4271       }
4272     /* Otherwise, we expand to a similar pattern which does not
4273        clobber CC_REGNUM.  */
4274   }
4275   [(set_attr "type" "neon_compare,multiple")]
4276 )
4277
4278 (define_insn "*aarch64_cm<optab>di"
4279   [(set (match_operand:DI 0 "register_operand" "=w")
4280         (neg:DI
4281           (UCOMPARISONS:DI
4282             (match_operand:DI 1 "register_operand" "w")
4283             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4284           )))]
4285   "TARGET_SIMD && reload_completed"
4286   "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4287   [(set_attr "type" "neon_compare")]
4288 )
4289
4290 ;; cmtst
4291
4292 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4293 ;; we don't have any insns using ne, and aarch64_vcond outputs
4294 ;; not (neg (eq (and x y) 0))
4295 ;; which is rewritten by simplify_rtx as
4296 ;; plus (eq (and x y) 0) -1.
4297
4298 (define_insn "aarch64_cmtst<mode>"
4299   [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4300         (plus:<V_cmp_result>
4301           (eq:<V_cmp_result>
4302             (and:VDQ_I
4303               (match_operand:VDQ_I 1 "register_operand" "w")
4304               (match_operand:VDQ_I 2 "register_operand" "w"))
4305             (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4306           (match_operand:<V_cmp_result> 4 "aarch64_simd_imm_minus_one")))
4307   ]
4308   "TARGET_SIMD"
4309   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4310   [(set_attr "type" "neon_tst<q>")]
4311 )
4312
4313 (define_insn_and_split "aarch64_cmtstdi"
4314   [(set (match_operand:DI 0 "register_operand" "=w,r")
4315         (neg:DI
4316           (ne:DI
4317             (and:DI
4318               (match_operand:DI 1 "register_operand" "w,r")
4319               (match_operand:DI 2 "register_operand" "w,r"))
4320             (const_int 0))))
4321     (clobber (reg:CC CC_REGNUM))]
4322   "TARGET_SIMD"
4323   "#"
4324   "reload_completed"
4325   [(set (match_operand:DI 0 "register_operand")
4326         (neg:DI
4327           (ne:DI
4328             (and:DI
4329               (match_operand:DI 1 "register_operand")
4330               (match_operand:DI 2 "register_operand"))
4331             (const_int 0))))]
4332   {
4333     /* If we are in the general purpose register file,
4334        we split to a sequence of comparison and store.  */
4335     if (GP_REGNUM_P (REGNO (operands[0]))
4336         && GP_REGNUM_P (REGNO (operands[1])))
4337       {
4338         rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4339         machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4340         rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4341         rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4342         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4343         DONE;
4344       }
4345     /* Otherwise, we expand to a similar pattern which does not
4346        clobber CC_REGNUM.  */
4347   }
4348   [(set_attr "type" "neon_tst,multiple")]
4349 )
4350
4351 (define_insn "*aarch64_cmtstdi"
4352   [(set (match_operand:DI 0 "register_operand" "=w")
4353         (neg:DI
4354           (ne:DI
4355             (and:DI
4356               (match_operand:DI 1 "register_operand" "w")
4357               (match_operand:DI 2 "register_operand" "w"))
4358             (const_int 0))))]
4359   "TARGET_SIMD"
4360   "cmtst\t%d0, %d1, %d2"
4361   [(set_attr "type" "neon_tst")]
4362 )
4363
4364 ;; fcm(eq|ge|gt|le|lt)
4365
4366 (define_insn "aarch64_cm<optab><mode>"
4367   [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
4368         (neg:<V_cmp_result>
4369           (COMPARISONS:<V_cmp_result>
4370             (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4371             (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4372           )))]
4373   "TARGET_SIMD"
4374   "@
4375   fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4376   fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4377   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4378 )
4379
4380 ;; fac(ge|gt)
4381 ;; Note we can also handle what would be fac(le|lt) by
4382 ;; generating fac(ge|gt).
4383
4384 (define_insn "aarch64_fac<optab><mode>"
4385   [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4386         (neg:<V_cmp_result>
4387           (FAC_COMPARISONS:<V_cmp_result>
4388             (abs:VHSDF_HSDF
4389               (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4390             (abs:VHSDF_HSDF
4391               (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4392   )))]
4393   "TARGET_SIMD"
4394   "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4395   [(set_attr "type" "neon_fp_compare_<stype><q>")]
4396 )
4397
4398 ;; addp
4399
4400 (define_insn "aarch64_addp<mode>"
4401   [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4402         (unspec:VD_BHSI
4403           [(match_operand:VD_BHSI 1 "register_operand" "w")
4404            (match_operand:VD_BHSI 2 "register_operand" "w")]
4405           UNSPEC_ADDP))]
4406   "TARGET_SIMD"
4407   "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4408   [(set_attr "type" "neon_reduc_add<q>")]
4409 )
4410
4411 (define_insn "aarch64_addpdi"
4412   [(set (match_operand:DI 0 "register_operand" "=w")
4413         (unspec:DI
4414           [(match_operand:V2DI 1 "register_operand" "w")]
4415           UNSPEC_ADDP))]
4416   "TARGET_SIMD"
4417   "addp\t%d0, %1.2d"
4418   [(set_attr "type" "neon_reduc_add")]
4419 )
4420
4421 ;; sqrt
4422
4423 (define_expand "sqrt<mode>2"
4424   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4425         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4426   "TARGET_SIMD"
4427 {
4428   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4429     DONE;
4430 })
4431
4432 (define_insn "*sqrt<mode>2"
4433   [(set (match_operand:VHSDF 0 "register_operand" "=w")
4434         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4435   "TARGET_SIMD"
4436   "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4437   [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4438 )
4439
4440 ;; Patterns for vector struct loads and stores.
4441
4442 (define_insn "aarch64_simd_ld2<mode>"
4443   [(set (match_operand:OI 0 "register_operand" "=w")
4444         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4445                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4446                    UNSPEC_LD2))]
4447   "TARGET_SIMD"
4448   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4449   [(set_attr "type" "neon_load2_2reg<q>")]
4450 )
4451
4452 (define_insn "aarch64_simd_ld2r<mode>"
4453   [(set (match_operand:OI 0 "register_operand" "=w")
4454        (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4455                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4456                   UNSPEC_LD2_DUP))]
4457   "TARGET_SIMD"
4458   "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4459   [(set_attr "type" "neon_load2_all_lanes<q>")]
4460 )
4461
4462 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4463   [(set (match_operand:OI 0 "register_operand" "=w")
4464         (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4465                     (match_operand:OI 2 "register_operand" "0")
4466                     (match_operand:SI 3 "immediate_operand" "i")
4467                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4468                    UNSPEC_LD2_LANE))]
4469   "TARGET_SIMD"
4470   {
4471     operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4472     return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4473   }
4474   [(set_attr "type" "neon_load2_one_lane")]
4475 )
4476
4477 (define_expand "vec_load_lanesoi<mode>"
4478   [(set (match_operand:OI 0 "register_operand" "=w")
4479         (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4480                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4481                    UNSPEC_LD2))]
4482   "TARGET_SIMD"
4483 {
4484   if (BYTES_BIG_ENDIAN)
4485     {
4486       rtx tmp = gen_reg_rtx (OImode);
4487       rtx mask = aarch64_reverse_mask (<MODE>mode);
4488       emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4489       emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4490     }
4491   else
4492     emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4493   DONE;
4494 })
4495
4496 (define_insn "aarch64_simd_st2<mode>"
4497   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4498         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4499                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4500                    UNSPEC_ST2))]
4501   "TARGET_SIMD"
4502   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4503   [(set_attr "type" "neon_store2_2reg<q>")]
4504 )
4505
4506 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4507 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4508   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4509         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4510                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4511                     (match_operand:SI 2 "immediate_operand" "i")]
4512                    UNSPEC_ST2_LANE))]
4513   "TARGET_SIMD"
4514   {
4515     operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4516     return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4517   }
4518   [(set_attr "type" "neon_store2_one_lane<q>")]
4519 )
4520
4521 (define_expand "vec_store_lanesoi<mode>"
4522   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4523         (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4524                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4525                    UNSPEC_ST2))]
4526   "TARGET_SIMD"
4527 {
4528   if (BYTES_BIG_ENDIAN)
4529     {
4530       rtx tmp = gen_reg_rtx (OImode);
4531       rtx mask = aarch64_reverse_mask (<MODE>mode);
4532       emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4533       emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4534     }
4535   else
4536     emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4537   DONE;
4538 })
4539
4540 (define_insn "aarch64_simd_ld3<mode>"
4541   [(set (match_operand:CI 0 "register_operand" "=w")
4542         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4543                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4544                    UNSPEC_LD3))]
4545   "TARGET_SIMD"
4546   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4547   [(set_attr "type" "neon_load3_3reg<q>")]
4548 )
4549
4550 (define_insn "aarch64_simd_ld3r<mode>"
4551   [(set (match_operand:CI 0 "register_operand" "=w")
4552        (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4553                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4554                   UNSPEC_LD3_DUP))]
4555   "TARGET_SIMD"
4556   "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4557   [(set_attr "type" "neon_load3_all_lanes<q>")]
4558 )
4559
4560 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4561   [(set (match_operand:CI 0 "register_operand" "=w")
4562         (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4563                     (match_operand:CI 2 "register_operand" "0")
4564                     (match_operand:SI 3 "immediate_operand" "i")
4565                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4566                    UNSPEC_LD3_LANE))]
4567   "TARGET_SIMD"
4568 {
4569     operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4570     return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4571 }
4572   [(set_attr "type" "neon_load3_one_lane")]
4573 )
4574
4575 (define_expand "vec_load_lanesci<mode>"
4576   [(set (match_operand:CI 0 "register_operand" "=w")
4577         (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4578                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4579                    UNSPEC_LD3))]
4580   "TARGET_SIMD"
4581 {
4582   if (BYTES_BIG_ENDIAN)
4583     {
4584       rtx tmp = gen_reg_rtx (CImode);
4585       rtx mask = aarch64_reverse_mask (<MODE>mode);
4586       emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4587       emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4588     }
4589   else
4590     emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4591   DONE;
4592 })
4593
4594 (define_insn "aarch64_simd_st3<mode>"
4595   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4596         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4597                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4598                    UNSPEC_ST3))]
4599   "TARGET_SIMD"
4600   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4601   [(set_attr "type" "neon_store3_3reg<q>")]
4602 )
4603
4604 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4605 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4606   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4607         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4608                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4609                      (match_operand:SI 2 "immediate_operand" "i")]
4610                     UNSPEC_ST3_LANE))]
4611   "TARGET_SIMD"
4612   {
4613     operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4614     return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4615   }
4616   [(set_attr "type" "neon_store3_one_lane<q>")]
4617 )
4618
4619 (define_expand "vec_store_lanesci<mode>"
4620   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4621         (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4622                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4623                    UNSPEC_ST3))]
4624   "TARGET_SIMD"
4625 {
4626   if (BYTES_BIG_ENDIAN)
4627     {
4628       rtx tmp = gen_reg_rtx (CImode);
4629       rtx mask = aarch64_reverse_mask (<MODE>mode);
4630       emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4631       emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4632     }
4633   else
4634     emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4635   DONE;
4636 })
4637
4638 (define_insn "aarch64_simd_ld4<mode>"
4639   [(set (match_operand:XI 0 "register_operand" "=w")
4640         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4641                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4642                    UNSPEC_LD4))]
4643   "TARGET_SIMD"
4644   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4645   [(set_attr "type" "neon_load4_4reg<q>")]
4646 )
4647
4648 (define_insn "aarch64_simd_ld4r<mode>"
4649   [(set (match_operand:XI 0 "register_operand" "=w")
4650        (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4651                    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4652                   UNSPEC_LD4_DUP))]
4653   "TARGET_SIMD"
4654   "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4655   [(set_attr "type" "neon_load4_all_lanes<q>")]
4656 )
4657
4658 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4659   [(set (match_operand:XI 0 "register_operand" "=w")
4660         (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4661                     (match_operand:XI 2 "register_operand" "0")
4662                     (match_operand:SI 3 "immediate_operand" "i")
4663                     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4664                    UNSPEC_LD4_LANE))]
4665   "TARGET_SIMD"
4666 {
4667     operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4668     return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4669 }
4670   [(set_attr "type" "neon_load4_one_lane")]
4671 )
4672
4673 (define_expand "vec_load_lanesxi<mode>"
4674   [(set (match_operand:XI 0 "register_operand" "=w")
4675         (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4676                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4677                    UNSPEC_LD4))]
4678   "TARGET_SIMD"
4679 {
4680   if (BYTES_BIG_ENDIAN)
4681     {
4682       rtx tmp = gen_reg_rtx (XImode);
4683       rtx mask = aarch64_reverse_mask (<MODE>mode);
4684       emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4685       emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4686     }
4687   else
4688     emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4689   DONE;
4690 })
4691
4692 (define_insn "aarch64_simd_st4<mode>"
4693   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4694         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4695                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4696                    UNSPEC_ST4))]
4697   "TARGET_SIMD"
4698   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4699   [(set_attr "type" "neon_store4_4reg<q>")]
4700 )
4701
4702 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4703 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4704   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4705         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4706                      (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4707                      (match_operand:SI 2 "immediate_operand" "i")]
4708                     UNSPEC_ST4_LANE))]
4709   "TARGET_SIMD"
4710   {
4711     operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4712     return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
4713   }
4714   [(set_attr "type" "neon_store4_one_lane<q>")]
4715 )
4716
4717 (define_expand "vec_store_lanesxi<mode>"
4718   [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4719         (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4720                     (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4721                    UNSPEC_ST4))]
4722   "TARGET_SIMD"
4723 {
4724   if (BYTES_BIG_ENDIAN)
4725     {
4726       rtx tmp = gen_reg_rtx (XImode);
4727       rtx mask = aarch64_reverse_mask (<MODE>mode);
4728       emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
4729       emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
4730     }
4731   else
4732     emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
4733   DONE;
4734 })
4735
4736 (define_insn_and_split "aarch64_rev_reglist<mode>"
4737 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
4738         (unspec:VSTRUCT
4739                    [(match_operand:VSTRUCT 1 "register_operand" "w")
4740                     (match_operand:V16QI 2 "register_operand" "w")]
4741                    UNSPEC_REV_REGLIST))]
4742   "TARGET_SIMD"
4743   "#"
4744   "&& reload_completed"
4745   [(const_int 0)]
4746 {
4747   int i;
4748   int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
4749   for (i = 0; i < nregs; i++)
4750     {
4751       rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
4752       rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
4753       emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
4754     }
4755   DONE;
4756 }
4757   [(set_attr "type" "neon_tbl1_q")
4758    (set_attr "length" "<insn_count>")]
4759 )
4760
4761 ;; Reload patterns for AdvSIMD register list operands.
4762
4763 (define_expand "mov<mode>"
4764   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
4765         (match_operand:VSTRUCT 1 "general_operand" ""))]
4766   "TARGET_SIMD"
4767 {
4768   if (can_create_pseudo_p ())
4769     {
4770       if (GET_CODE (operands[0]) != REG)
4771         operands[1] = force_reg (<MODE>mode, operands[1]);
4772     }
4773 })
4774
4775 (define_insn "*aarch64_mov<mode>"
4776   [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
4777         (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
4778   "TARGET_SIMD && !BYTES_BIG_ENDIAN
4779    && (register_operand (operands[0], <MODE>mode)
4780        || register_operand (operands[1], <MODE>mode))"
4781   "@
4782    #
4783    st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
4784    ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
4785   [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
4786                      neon_load<nregs>_<nregs>reg_q")
4787    (set_attr "length" "<insn_count>,4,4")]
4788 )
4789
4790 (define_insn "aarch64_be_ld1<mode>"
4791   [(set (match_operand:VALLDI_F16 0     "register_operand" "=w")
4792         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
4793                              "aarch64_simd_struct_operand" "Utv")]
4794         UNSPEC_LD1))]
4795   "TARGET_SIMD"
4796   "ld1\\t{%0<Vmtype>}, %1"
4797   [(set_attr "type" "neon_load1_1reg<q>")]
4798 )
4799
4800 (define_insn "aarch64_be_st1<mode>"
4801   [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
4802         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
4803         UNSPEC_ST1))]
4804   "TARGET_SIMD"
4805   "st1\\t{%1<Vmtype>}, %0"
4806   [(set_attr "type" "neon_store1_1reg<q>")]
4807 )
4808
4809 (define_insn "*aarch64_be_movoi"
4810   [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
4811         (match_operand:OI 1 "general_operand"      " w,w,m"))]
4812   "TARGET_SIMD && BYTES_BIG_ENDIAN
4813    && (register_operand (operands[0], OImode)
4814        || register_operand (operands[1], OImode))"
4815   "@
4816    #
4817    stp\\t%q1, %R1, %0
4818    ldp\\t%q0, %R0, %1"
4819   [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
4820    (set_attr "length" "8,4,4")]
4821 )
4822
4823 (define_insn "*aarch64_be_movci"
4824   [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
4825         (match_operand:CI 1 "general_operand"      " w,w,o"))]
4826   "TARGET_SIMD && BYTES_BIG_ENDIAN
4827    && (register_operand (operands[0], CImode)
4828        || register_operand (operands[1], CImode))"
4829   "#"
4830   [(set_attr "type" "multiple")
4831    (set_attr "length" "12,4,4")]
4832 )
4833
4834 (define_insn "*aarch64_be_movxi"
4835   [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
4836         (match_operand:XI 1 "general_operand"      " w,w,o"))]
4837   "TARGET_SIMD && BYTES_BIG_ENDIAN
4838    && (register_operand (operands[0], XImode)
4839        || register_operand (operands[1], XImode))"
4840   "#"
4841   [(set_attr "type" "multiple")
4842    (set_attr "length" "16,4,4")]
4843 )
4844
4845 (define_split
4846   [(set (match_operand:OI 0 "register_operand")
4847         (match_operand:OI 1 "register_operand"))]
4848   "TARGET_SIMD && reload_completed"
4849   [(const_int 0)]
4850 {
4851   aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
4852   DONE;
4853 })
4854
4855 (define_split
4856   [(set (match_operand:CI 0 "nonimmediate_operand")
4857         (match_operand:CI 1 "general_operand"))]
4858   "TARGET_SIMD && reload_completed"
4859   [(const_int 0)]
4860 {
4861   if (register_operand (operands[0], CImode)
4862       && register_operand (operands[1], CImode))
4863     {
4864       aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
4865       DONE;
4866     }
4867   else if (BYTES_BIG_ENDIAN)
4868     {
4869       emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
4870                       simplify_gen_subreg (OImode, operands[1], CImode, 0));
4871       emit_move_insn (gen_lowpart (V16QImode,
4872                                    simplify_gen_subreg (TImode, operands[0],
4873                                                         CImode, 32)),
4874                       gen_lowpart (V16QImode,
4875                                    simplify_gen_subreg (TImode, operands[1],
4876                                                         CImode, 32)));
4877       DONE;
4878     }
4879   else
4880     FAIL;
4881 })
4882
4883 (define_split
4884   [(set (match_operand:XI 0 "nonimmediate_operand")
4885         (match_operand:XI 1 "general_operand"))]
4886   "TARGET_SIMD && reload_completed"
4887   [(const_int 0)]
4888 {
4889   if (register_operand (operands[0], XImode)
4890       && register_operand (operands[1], XImode))
4891     {
4892       aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
4893       DONE;
4894     }
4895   else if (BYTES_BIG_ENDIAN)
4896     {
4897       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
4898                       simplify_gen_subreg (OImode, operands[1], XImode, 0));
4899       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
4900                       simplify_gen_subreg (OImode, operands[1], XImode, 32));
4901       DONE;
4902     }
4903   else
4904     FAIL;
4905 })
4906
4907 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
4908   [(match_operand:VSTRUCT 0 "register_operand" "=w")
4909    (match_operand:DI 1 "register_operand" "w")
4910    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4911   "TARGET_SIMD"
4912 {
4913   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
4914   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
4915                      * <VSTRUCT:nregs>);
4916
4917   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
4918                                                                 mem));
4919   DONE;
4920 })
4921
4922 (define_insn "aarch64_ld2<mode>_dreg_le"
4923   [(set (match_operand:OI 0 "register_operand" "=w")
4924         (subreg:OI
4925           (vec_concat:<VRL2>
4926             (vec_concat:<VDBL>
4927              (unspec:VD
4928                 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4929                 UNSPEC_LD2)
4930              (vec_duplicate:VD (const_int 0)))
4931             (vec_concat:<VDBL>
4932              (unspec:VD [(match_dup 1)]
4933                         UNSPEC_LD2)
4934              (vec_duplicate:VD (const_int 0)))) 0))]
4935   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4936   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4937   [(set_attr "type" "neon_load2_2reg<q>")]
4938 )
4939
4940 (define_insn "aarch64_ld2<mode>_dreg_be"
4941   [(set (match_operand:OI 0 "register_operand" "=w")
4942         (subreg:OI
4943           (vec_concat:<VRL2>
4944             (vec_concat:<VDBL>
4945              (vec_duplicate:VD (const_int 0))
4946              (unspec:VD
4947                 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4948                 UNSPEC_LD2))
4949             (vec_concat:<VDBL>
4950              (vec_duplicate:VD (const_int 0))
4951              (unspec:VD [(match_dup 1)]
4952                         UNSPEC_LD2))) 0))]
4953   "TARGET_SIMD && BYTES_BIG_ENDIAN"
4954   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4955   [(set_attr "type" "neon_load2_2reg<q>")]
4956 )
4957
4958 (define_insn "aarch64_ld2<mode>_dreg_le"
4959   [(set (match_operand:OI 0 "register_operand" "=w")
4960         (subreg:OI
4961           (vec_concat:<VRL2>
4962             (vec_concat:<VDBL>
4963              (unspec:DX
4964                 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4965                 UNSPEC_LD2)
4966              (const_int 0))
4967             (vec_concat:<VDBL>
4968              (unspec:DX [(match_dup 1)]
4969                         UNSPEC_LD2)
4970              (const_int 0))) 0))]
4971   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4972   "ld1\\t{%S0.1d - %T0.1d}, %1"
4973   [(set_attr "type" "neon_load1_2reg<q>")]
4974 )
4975
4976 (define_insn "aarch64_ld2<mode>_dreg_be"
4977   [(set (match_operand:OI 0 "register_operand" "=w")
4978         (subreg:OI
4979           (vec_concat:<VRL2>
4980             (vec_concat:<VDBL>
4981              (const_int 0)
4982              (unspec:DX
4983                 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4984                 UNSPEC_LD2))
4985             (vec_concat:<VDBL>
4986              (const_int 0)
4987              (unspec:DX [(match_dup 1)]
4988                         UNSPEC_LD2))) 0))]
4989   "TARGET_SIMD && BYTES_BIG_ENDIAN"
4990   "ld1\\t{%S0.1d - %T0.1d}, %1"
4991   [(set_attr "type" "neon_load1_2reg<q>")]
4992 )
4993
4994 (define_insn "aarch64_ld3<mode>_dreg_le"
4995   [(set (match_operand:CI 0 "register_operand" "=w")
4996         (subreg:CI
4997          (vec_concat:<VRL3>
4998           (vec_concat:<VRL2>
4999             (vec_concat:<VDBL>
5000              (unspec:VD
5001                 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5002                 UNSPEC_LD3)
5003              (vec_duplicate:VD (const_int 0)))
5004             (vec_concat:<VDBL>
5005              (unspec:VD [(match_dup 1)]
5006                         UNSPEC_LD3)
5007              (vec_duplicate:VD (const_int 0))))
5008           (vec_concat:<VDBL>
5009              (unspec:VD [(match_dup 1)]
5010                         UNSPEC_LD3)
5011              (vec_duplicate:VD (const_int 0)))) 0))]
5012   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5013   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5014   [(set_attr "type" "neon_load3_3reg<q>")]
5015 )
5016
5017 (define_insn "aarch64_ld3<mode>_dreg_be"
5018   [(set (match_operand:CI 0 "register_operand" "=w")
5019         (subreg:CI
5020          (vec_concat:<VRL3>
5021           (vec_concat:<VRL2>
5022             (vec_concat:<VDBL>
5023              (vec_duplicate:VD (const_int 0))
5024              (unspec:VD
5025                 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5026                 UNSPEC_LD3))
5027             (vec_concat:<VDBL>
5028              (vec_duplicate:VD (const_int 0))
5029              (unspec:VD [(match_dup 1)]
5030                         UNSPEC_LD3)))
5031           (vec_concat:<VDBL>
5032              (vec_duplicate:VD (const_int 0))
5033              (unspec:VD [(match_dup 1)]
5034                         UNSPEC_LD3))) 0))]
5035   "TARGET_SIMD && BYTES_BIG_ENDIAN"
5036   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5037   [(set_attr "type" "neon_load3_3reg<q>")]
5038 )
5039
5040 (define_insn "aarch64_ld3<mode>_dreg_le"
5041   [(set (match_operand:CI 0 "register_operand" "=w")
5042         (subreg:CI
5043          (vec_concat:<VRL3>
5044           (vec_concat:<VRL2>
5045             (vec_concat:<VDBL>
5046              (unspec:DX
5047                 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5048                 UNSPEC_LD3)
5049              (const_int 0))
5050             (vec_concat:<VDBL>
5051              (unspec:DX [(match_dup 1)]
5052                         UNSPEC_LD3)
5053              (const_int 0)))
5054           (vec_concat:<VDBL>
5055              (unspec:DX [(match_dup 1)]
5056                         UNSPEC_LD3)
5057              (const_int 0))) 0))]
5058   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5059   "ld1\\t{%S0.1d - %U0.1d}, %1"
5060   [(set_attr "type" "neon_load1_3reg<q>")]
5061 )
5062
5063 (define_insn "aarch64_ld3<mode>_dreg_be"
5064   [(set (match_operand:CI 0 "register_operand" "=w")
5065         (subreg:CI
5066          (vec_concat:<VRL3>
5067           (vec_concat:<VRL2>
5068             (vec_concat:<VDBL>
5069              (const_int 0)
5070              (unspec:DX
5071                 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5072                 UNSPEC_LD3))
5073             (vec_concat:<VDBL>
5074              (const_int 0)
5075              (unspec:DX [(match_dup 1)]
5076                         UNSPEC_LD3)))
5077           (vec_concat:<VDBL>
5078              (const_int 0)
5079              (unspec:DX [(match_dup 1)]
5080                         UNSPEC_LD3))) 0))]
5081   "TARGET_SIMD && BYTES_BIG_ENDIAN"
5082   "ld1\\t{%S0.1d - %U0.1d}, %1"
5083   [(set_attr "type" "neon_load1_3reg<q>")]
5084 )
5085
5086 (define_insn "aarch64_ld4<mode>_dreg_le"
5087   [(set (match_operand:XI 0 "register_operand" "=w")
5088         (subreg:XI
5089          (vec_concat:<VRL4>
5090            (vec_concat:<VRL2>
5091              (vec_concat:<VDBL>
5092                (unspec:VD
5093                 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5094                 UNSPEC_LD4)
5095                (vec_duplicate:VD (const_int 0)))
5096               (vec_concat:<VDBL>
5097                 (unspec:VD [(match_dup 1)]
5098                         UNSPEC_LD4)
5099                 (vec_duplicate:VD (const_int 0))))
5100            (vec_concat:<VRL2>
5101              (vec_concat:<VDBL>
5102                (unspec:VD [(match_dup 1)]
5103                         UNSPEC_LD4)
5104                (vec_duplicate:VD (const_int 0)))
5105              (vec_concat:<VDBL>
5106                (unspec:VD [(match_dup 1)]
5107                         UNSPEC_LD4)
5108                (vec_duplicate:VD (const_int 0))))) 0))]
5109   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5110   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5111   [(set_attr "type" "neon_load4_4reg<q>")]
5112 )
5113
5114 (define_insn "aarch64_ld4<mode>_dreg_be"
5115   [(set (match_operand:XI 0 "register_operand" "=w")
5116         (subreg:XI
5117          (vec_concat:<VRL4>
5118            (vec_concat:<VRL2>
5119              (vec_concat:<VDBL>
5120                (vec_duplicate:VD (const_int 0))
5121                (unspec:VD
5122                 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5123                 UNSPEC_LD4))
5124               (vec_concat:<VDBL>
5125                 (vec_duplicate:VD (const_int 0))
5126                 (unspec:VD [(match_dup 1)]
5127                         UNSPEC_LD4)))
5128            (vec_concat:<VRL2>
5129              (vec_concat:<VDBL>
5130                (vec_duplicate:VD (const_int 0))
5131                (unspec:VD [(match_dup 1)]
5132                         UNSPEC_LD4))
5133              (vec_concat:<VDBL>
5134                (vec_duplicate:VD (const_int 0))
5135                (unspec:VD [(match_dup 1)]
5136                         UNSPEC_LD4)))) 0))]
5137   "TARGET_SIMD && BYTES_BIG_ENDIAN"
5138   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5139   [(set_attr "type" "neon_load4_4reg<q>")]
5140 )
5141
5142 (define_insn "aarch64_ld4<mode>_dreg_le"
5143   [(set (match_operand:XI 0 "register_operand" "=w")
5144         (subreg:XI
5145          (vec_concat:<VRL4>
5146            (vec_concat:<VRL2>
5147              (vec_concat:<VDBL>
5148                (unspec:DX
5149                 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5150                 UNSPEC_LD4)
5151                (const_int 0))
5152               (vec_concat:<VDBL>
5153                 (unspec:DX [(match_dup 1)]
5154                         UNSPEC_LD4)
5155                 (const_int 0)))
5156            (vec_concat:<VRL2>
5157              (vec_concat:<VDBL>
5158                (unspec:DX [(match_dup 1)]
5159                         UNSPEC_LD4)
5160                (const_int 0))
5161              (vec_concat:<VDBL>
5162                (unspec:DX [(match_dup 1)]
5163                         UNSPEC_LD4)
5164                (const_int 0)))) 0))]
5165   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5166   "ld1\\t{%S0.1d - %V0.1d}, %1"
5167   [(set_attr "type" "neon_load1_4reg<q>")]
5168 )
5169
5170 (define_insn "aarch64_ld4<mode>_dreg_be"
5171   [(set (match_operand:XI 0 "register_operand" "=w")
5172         (subreg:XI
5173          (vec_concat:<VRL4>
5174            (vec_concat:<VRL2>
5175              (vec_concat:<VDBL>
5176                (const_int 0)
5177                (unspec:DX
5178                 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5179                 UNSPEC_LD4))
5180               (vec_concat:<VDBL>
5181                 (const_int 0)
5182                 (unspec:DX [(match_dup 1)]
5183                         UNSPEC_LD4)))
5184            (vec_concat:<VRL2>
5185              (vec_concat:<VDBL>
5186                (const_int 0)
5187                (unspec:DX [(match_dup 1)]
5188                         UNSPEC_LD4))
5189              (vec_concat:<VDBL>
5190                (const_int 0)
5191                (unspec:DX [(match_dup 1)]
5192                         UNSPEC_LD4)))) 0))]
5193   "TARGET_SIMD && BYTES_BIG_ENDIAN"
5194   "ld1\\t{%S0.1d - %V0.1d}, %1"
5195   [(set_attr "type" "neon_load1_4reg<q>")]
5196 )
5197
5198 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5199  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5200   (match_operand:DI 1 "register_operand" "r")
5201   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5202   "TARGET_SIMD"
5203 {
5204   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5205   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5206
5207   if (BYTES_BIG_ENDIAN)
5208     emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg_be (operands[0],
5209                                                                 mem));
5210   else
5211     emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg_le (operands[0],
5212                                                                 mem));
5213   DONE;
5214 })
5215
5216 (define_expand "aarch64_ld1<VALL_F16:mode>"
5217  [(match_operand:VALL_F16 0 "register_operand")
5218   (match_operand:DI 1 "register_operand")]
5219   "TARGET_SIMD"
5220 {
5221   machine_mode mode = <VALL_F16:MODE>mode;
5222   rtx mem = gen_rtx_MEM (mode, operands[1]);
5223
5224   if (BYTES_BIG_ENDIAN)
5225     emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5226   else
5227     emit_move_insn (operands[0], mem);
5228   DONE;
5229 })
5230
5231 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5232  [(match_operand:VSTRUCT 0 "register_operand" "=w")
5233   (match_operand:DI 1 "register_operand" "r")
5234   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5235   "TARGET_SIMD"
5236 {
5237   machine_mode mode = <VSTRUCT:MODE>mode;
5238   rtx mem = gen_rtx_MEM (mode, operands[1]);
5239
5240   emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5241   DONE;
5242 })
5243
5244 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5245   [(match_operand:VSTRUCT 0 "register_operand" "=w")
5246         (match_operand:DI 1 "register_operand" "w")
5247         (match_operand:VSTRUCT 2 "register_operand" "0")
5248         (match_operand:SI 3 "immediate_operand" "i")
5249         (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5250   "TARGET_SIMD"
5251 {
5252   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5253   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5254                      * <VSTRUCT:nregs>);
5255
5256   aarch64_simd_lane_bounds (operands[3], 0,
5257                             GET_MODE_NUNITS (<VALLDIF:MODE>mode),
5258                             NULL);
5259   emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5260         operands[0], mem, operands[2], operands[3]));
5261   DONE;
5262 })
5263
5264 ;; Expanders for builtins to extract vector registers from large
5265 ;; opaque integer modes.
5266
5267 ;; D-register list.
5268
5269 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5270  [(match_operand:VDC 0 "register_operand" "=w")
5271   (match_operand:VSTRUCT 1 "register_operand" "w")
5272   (match_operand:SI 2 "immediate_operand" "i")]
5273   "TARGET_SIMD"
5274 {
5275   int part = INTVAL (operands[2]);
5276   rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5277   int offset = part * 16;
5278
5279   emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5280   emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5281   DONE;
5282 })
5283
5284 ;; Q-register list.
5285
5286 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5287  [(match_operand:VQ 0 "register_operand" "=w")
5288   (match_operand:VSTRUCT 1 "register_operand" "w")
5289   (match_operand:SI 2 "immediate_operand" "i")]
5290   "TARGET_SIMD"
5291 {
5292   int part = INTVAL (operands[2]);
5293   int offset = part * 16;
5294
5295   emit_move_insn (operands[0],
5296                   gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5297   DONE;
5298 })
5299
5300 ;; Permuted-store expanders for neon intrinsics.
5301
5302 ;; Permute instructions
5303
5304 ;; vec_perm support
5305
5306 (define_expand "vec_perm_const<mode>"
5307   [(match_operand:VALL_F16 0 "register_operand")
5308    (match_operand:VALL_F16 1 "register_operand")
5309    (match_operand:VALL_F16 2 "register_operand")
5310    (match_operand:<V_cmp_result> 3)]
5311   "TARGET_SIMD"
5312 {
5313   if (aarch64_expand_vec_perm_const (operands[0], operands[1],
5314                                      operands[2], operands[3]))
5315     DONE;
5316   else
5317     FAIL;
5318 })
5319
5320 (define_expand "vec_perm<mode>"
5321   [(match_operand:VB 0 "register_operand")
5322    (match_operand:VB 1 "register_operand")
5323    (match_operand:VB 2 "register_operand")
5324    (match_operand:VB 3 "register_operand")]
5325   "TARGET_SIMD"
5326 {
5327   aarch64_expand_vec_perm (operands[0], operands[1],
5328                            operands[2], operands[3]);
5329   DONE;
5330 })
5331
5332 (define_insn "aarch64_tbl1<mode>"
5333   [(set (match_operand:VB 0 "register_operand" "=w")
5334         (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5335                     (match_operand:VB 2 "register_operand" "w")]
5336                    UNSPEC_TBL))]
5337   "TARGET_SIMD"
5338   "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5339   [(set_attr "type" "neon_tbl1<q>")]
5340 )
5341
5342 ;; Two source registers.
5343
5344 (define_insn "aarch64_tbl2v16qi"
5345   [(set (match_operand:V16QI 0 "register_operand" "=w")
5346         (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5347                        (match_operand:V16QI 2 "register_operand" "w")]
5348                       UNSPEC_TBL))]
5349   "TARGET_SIMD"
5350   "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5351   [(set_attr "type" "neon_tbl2_q")]
5352 )
5353
5354 (define_insn "aarch64_tbl3<mode>"
5355   [(set (match_operand:VB 0 "register_operand" "=w")
5356         (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5357                       (match_operand:VB 2 "register_operand" "w")]
5358                       UNSPEC_TBL))]
5359   "TARGET_SIMD"
5360   "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5361   [(set_attr "type" "neon_tbl3")]
5362 )
5363
5364 (define_insn "aarch64_tbx4<mode>"
5365   [(set (match_operand:VB 0 "register_operand" "=w")
5366         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5367                       (match_operand:OI 2 "register_operand" "w")
5368                       (match_operand:VB 3 "register_operand" "w")]
5369                       UNSPEC_TBX))]
5370   "TARGET_SIMD"
5371   "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5372   [(set_attr "type" "neon_tbl4")]
5373 )
5374
5375 ;; Three source registers.
5376
5377 (define_insn "aarch64_qtbl3<mode>"
5378   [(set (match_operand:VB 0 "register_operand" "=w")
5379         (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5380                       (match_operand:VB 2 "register_operand" "w")]
5381                       UNSPEC_TBL))]
5382   "TARGET_SIMD"
5383   "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5384   [(set_attr "type" "neon_tbl3")]
5385 )
5386
5387 (define_insn "aarch64_qtbx3<mode>"
5388   [(set (match_operand:VB 0 "register_operand" "=w")
5389         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5390                       (match_operand:CI 2 "register_operand" "w")
5391                       (match_operand:VB 3 "register_operand" "w")]
5392                       UNSPEC_TBX))]
5393   "TARGET_SIMD"
5394   "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5395   [(set_attr "type" "neon_tbl3")]
5396 )
5397
5398 ;; Four source registers.
5399
5400 (define_insn "aarch64_qtbl4<mode>"
5401   [(set (match_operand:VB 0 "register_operand" "=w")
5402         (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5403                       (match_operand:VB 2 "register_operand" "w")]
5404                       UNSPEC_TBL))]
5405   "TARGET_SIMD"
5406   "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5407   [(set_attr "type" "neon_tbl4")]
5408 )
5409
5410 (define_insn "aarch64_qtbx4<mode>"
5411   [(set (match_operand:VB 0 "register_operand" "=w")
5412         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5413                       (match_operand:XI 2 "register_operand" "w")
5414                       (match_operand:VB 3 "register_operand" "w")]
5415                       UNSPEC_TBX))]
5416   "TARGET_SIMD"
5417   "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5418   [(set_attr "type" "neon_tbl4")]
5419 )
5420
5421 (define_insn_and_split "aarch64_combinev16qi"
5422   [(set (match_operand:OI 0 "register_operand" "=w")
5423         (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5424                     (match_operand:V16QI 2 "register_operand" "w")]
5425                    UNSPEC_CONCAT))]
5426   "TARGET_SIMD"
5427   "#"
5428   "&& reload_completed"
5429   [(const_int 0)]
5430 {
5431   aarch64_split_combinev16qi (operands);
5432   DONE;
5433 }
5434 [(set_attr "type" "multiple")]
5435 )
5436
5437 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5438   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5439         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5440                           (match_operand:VALL_F16 2 "register_operand" "w")]
5441          PERMUTE))]
5442   "TARGET_SIMD"
5443   "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5444   [(set_attr "type" "neon_permute<q>")]
5445 )
5446
5447 ;; Note immediate (third) operand is lane index not byte index.
5448 (define_insn "aarch64_ext<mode>"
5449   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5450         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5451                           (match_operand:VALL_F16 2 "register_operand" "w")
5452                           (match_operand:SI 3 "immediate_operand" "i")]
5453          UNSPEC_EXT))]
5454   "TARGET_SIMD"
5455 {
5456   operands[3] = GEN_INT (INTVAL (operands[3])
5457       * GET_MODE_UNIT_SIZE (<MODE>mode));
5458   return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5459 }
5460   [(set_attr "type" "neon_ext<q>")]
5461 )
5462
5463 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5464   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5465         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5466                     REVERSE))]
5467   "TARGET_SIMD"
5468   "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5469   [(set_attr "type" "neon_rev<q>")]
5470 )
5471
5472 (define_insn "aarch64_st2<mode>_dreg"
5473   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5474         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5475                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5476                    UNSPEC_ST2))]
5477   "TARGET_SIMD"
5478   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5479   [(set_attr "type" "neon_store2_2reg")]
5480 )
5481
5482 (define_insn "aarch64_st2<mode>_dreg"
5483   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5484         (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5485                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5486                    UNSPEC_ST2))]
5487   "TARGET_SIMD"
5488   "st1\\t{%S1.1d - %T1.1d}, %0"
5489   [(set_attr "type" "neon_store1_2reg")]
5490 )
5491
5492 (define_insn "aarch64_st3<mode>_dreg"
5493   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5494         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5495                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5496                    UNSPEC_ST3))]
5497   "TARGET_SIMD"
5498   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5499   [(set_attr "type" "neon_store3_3reg")]
5500 )
5501
5502 (define_insn "aarch64_st3<mode>_dreg"
5503   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5504         (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5505                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5506                    UNSPEC_ST3))]
5507   "TARGET_SIMD"
5508   "st1\\t{%S1.1d - %U1.1d}, %0"
5509   [(set_attr "type" "neon_store1_3reg")]
5510 )
5511
5512 (define_insn "aarch64_st4<mode>_dreg"
5513   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5514         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5515                     (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5516                    UNSPEC_ST4))]
5517   "TARGET_SIMD"
5518   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5519   [(set_attr "type" "neon_store4_4reg")]
5520 )
5521
5522 (define_insn "aarch64_st4<mode>_dreg"
5523   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5524         (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5525                     (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5526                    UNSPEC_ST4))]
5527   "TARGET_SIMD"
5528   "st1\\t{%S1.1d - %V1.1d}, %0"
5529   [(set_attr "type" "neon_store1_4reg")]
5530 )
5531
5532 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5533  [(match_operand:DI 0 "register_operand" "r")
5534   (match_operand:VSTRUCT 1 "register_operand" "w")
5535   (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5536   "TARGET_SIMD"
5537 {
5538   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5539   set_mem_size (mem, <VSTRUCT:nregs> * 8);
5540
5541   emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5542   DONE;
5543 })
5544
5545 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5546  [(match_operand:DI 0 "register_operand" "r")
5547   (match_operand:VSTRUCT 1 "register_operand" "w")
5548   (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5549   "TARGET_SIMD"
5550 {
5551   machine_mode mode = <VSTRUCT:MODE>mode;
5552   rtx mem = gen_rtx_MEM (mode, operands[0]);
5553
5554   emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5555   DONE;
5556 })
5557
5558 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5559  [(match_operand:DI 0 "register_operand" "r")
5560   (match_operand:VSTRUCT 1 "register_operand" "w")
5561   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5562   (match_operand:SI 2 "immediate_operand")]
5563   "TARGET_SIMD"
5564 {
5565   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5566   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5567                      * <VSTRUCT:nregs>);
5568
5569   emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5570                 mem, operands[1], operands[2]));
5571   DONE;
5572 })
5573
5574 (define_expand "aarch64_st1<VALL_F16:mode>"
5575  [(match_operand:DI 0 "register_operand")
5576   (match_operand:VALL_F16 1 "register_operand")]
5577   "TARGET_SIMD"
5578 {
5579   machine_mode mode = <VALL_F16:MODE>mode;
5580   rtx mem = gen_rtx_MEM (mode, operands[0]);
5581
5582   if (BYTES_BIG_ENDIAN)
5583     emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5584   else
5585     emit_move_insn (mem, operands[1]);
5586   DONE;
5587 })
5588
5589 ;; Expander for builtins to insert vector registers into large
5590 ;; opaque integer modes.
5591
5592 ;; Q-register list.  We don't need a D-reg inserter as we zero
5593 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5594
5595 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5596  [(match_operand:VSTRUCT 0 "register_operand" "+w")
5597   (match_operand:VSTRUCT 1 "register_operand" "0")
5598   (match_operand:VQ 2 "register_operand" "w")
5599   (match_operand:SI 3 "immediate_operand" "i")]
5600   "TARGET_SIMD"
5601 {
5602   int part = INTVAL (operands[3]);
5603   int offset = part * 16;
5604
5605   emit_move_insn (operands[0], operands[1]);
5606   emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5607                   operands[2]);
5608   DONE;
5609 })
5610
5611 ;; Standard pattern name vec_init<mode>.
5612
5613 (define_expand "vec_init<mode>"
5614   [(match_operand:VALL_F16 0 "register_operand" "")
5615    (match_operand 1 "" "")]
5616   "TARGET_SIMD"
5617 {
5618   aarch64_expand_vector_init (operands[0], operands[1]);
5619   DONE;
5620 })
5621
5622 (define_insn "*aarch64_simd_ld1r<mode>"
5623   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5624         (vec_duplicate:VALL_F16
5625           (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5626   "TARGET_SIMD"
5627   "ld1r\\t{%0.<Vtype>}, %1"
5628   [(set_attr "type" "neon_load1_all_lanes")]
5629 )
5630
5631 (define_insn "aarch64_frecpe<mode>"
5632   [(set (match_operand:VHSDF 0 "register_operand" "=w")
5633         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5634          UNSPEC_FRECPE))]
5635   "TARGET_SIMD"
5636   "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5637   [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5638 )
5639
5640 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5641   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5642         (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5643          FRECP))]
5644   "TARGET_SIMD"
5645   "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5646   [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5647 )
5648
5649 (define_insn "aarch64_frecps<mode>"
5650   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5651         (unspec:VHSDF_HSDF
5652           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5653           (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5654           UNSPEC_FRECPS))]
5655   "TARGET_SIMD"
5656   "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5657   [(set_attr "type" "neon_fp_recps_<stype><q>")]
5658 )
5659
5660 (define_insn "aarch64_urecpe<mode>"
5661   [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5662         (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5663                 UNSPEC_URECPE))]
5664  "TARGET_SIMD"
5665  "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5666   [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5667
5668 ;; Standard pattern name vec_extract<mode>.
5669
5670 (define_expand "vec_extract<mode>"
5671   [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5672    (match_operand:VALL_F16 1 "register_operand" "")
5673    (match_operand:SI 2 "immediate_operand" "")]
5674   "TARGET_SIMD"
5675 {
5676     emit_insn
5677       (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5678     DONE;
5679 })
5680
5681 ;; aes
5682
5683 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5684   [(set (match_operand:V16QI 0 "register_operand" "=w")
5685         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5686                        (match_operand:V16QI 2 "register_operand" "w")]
5687          CRYPTO_AES))]
5688   "TARGET_SIMD && TARGET_CRYPTO"
5689   "aes<aes_op>\\t%0.16b, %2.16b"
5690   [(set_attr "type" "crypto_aese")]
5691 )
5692
5693 ;; When AES/AESMC fusion is enabled we want the register allocation to
5694 ;; look like:
5695 ;;    AESE Vn, _
5696 ;;    AESMC Vn, Vn
5697 ;; So prefer to tie operand 1 to operand 0 when fusing.
5698
5699 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5700   [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5701         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5702          CRYPTO_AESMC))]
5703   "TARGET_SIMD && TARGET_CRYPTO"
5704   "aes<aesmc_op>\\t%0.16b, %1.16b"
5705   [(set_attr "type" "crypto_aesmc")
5706    (set_attr_alternative "enabled"
5707      [(if_then_else (match_test
5708                        "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5709                      (const_string "yes" )
5710                      (const_string "no"))
5711       (const_string "yes")])]
5712 )
5713
5714 ;; sha1
5715
5716 (define_insn "aarch64_crypto_sha1hsi"
5717   [(set (match_operand:SI 0 "register_operand" "=w")
5718         (unspec:SI [(match_operand:SI 1
5719                        "register_operand" "w")]
5720          UNSPEC_SHA1H))]
5721   "TARGET_SIMD && TARGET_CRYPTO"
5722   "sha1h\\t%s0, %s1"
5723   [(set_attr "type" "crypto_sha1_fast")]
5724 )
5725
5726 (define_insn "aarch64_crypto_sha1hv4si"
5727   [(set (match_operand:SI 0 "register_operand" "=w")
5728         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5729                      (parallel [(const_int 0)]))]
5730          UNSPEC_SHA1H))]
5731   "TARGET_SIMD && TARGET_CRYPTO && !BYTES_BIG_ENDIAN"
5732   "sha1h\\t%s0, %s1"
5733   [(set_attr "type" "crypto_sha1_fast")]
5734 )
5735
5736 (define_insn "aarch64_be_crypto_sha1hv4si"
5737   [(set (match_operand:SI 0 "register_operand" "=w")
5738         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5739                      (parallel [(const_int 3)]))]
5740          UNSPEC_SHA1H))]
5741   "TARGET_SIMD && TARGET_CRYPTO && BYTES_BIG_ENDIAN"
5742   "sha1h\\t%s0, %s1"
5743   [(set_attr "type" "crypto_sha1_fast")]
5744 )
5745
5746 (define_insn "aarch64_crypto_sha1su1v4si"
5747   [(set (match_operand:V4SI 0 "register_operand" "=w")
5748         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5749                       (match_operand:V4SI 2 "register_operand" "w")]
5750          UNSPEC_SHA1SU1))]
5751   "TARGET_SIMD && TARGET_CRYPTO"
5752   "sha1su1\\t%0.4s, %2.4s"
5753   [(set_attr "type" "crypto_sha1_fast")]
5754 )
5755
5756 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5757   [(set (match_operand:V4SI 0 "register_operand" "=w")
5758         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5759                       (match_operand:SI 2 "register_operand" "w")
5760                       (match_operand:V4SI 3 "register_operand" "w")]
5761          CRYPTO_SHA1))]
5762   "TARGET_SIMD && TARGET_CRYPTO"
5763   "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5764   [(set_attr "type" "crypto_sha1_slow")]
5765 )
5766
5767 (define_insn "aarch64_crypto_sha1su0v4si"
5768   [(set (match_operand:V4SI 0 "register_operand" "=w")
5769         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5770                       (match_operand:V4SI 2 "register_operand" "w")
5771                       (match_operand:V4SI 3 "register_operand" "w")]
5772          UNSPEC_SHA1SU0))]
5773   "TARGET_SIMD && TARGET_CRYPTO"
5774   "sha1su0\\t%0.4s, %2.4s, %3.4s"
5775   [(set_attr "type" "crypto_sha1_xor")]
5776 )
5777
5778 ;; sha256
5779
5780 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
5781   [(set (match_operand:V4SI 0 "register_operand" "=w")
5782         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5783                       (match_operand:V4SI 2 "register_operand" "w")
5784                       (match_operand:V4SI 3 "register_operand" "w")]
5785          CRYPTO_SHA256))]
5786   "TARGET_SIMD && TARGET_CRYPTO"
5787   "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5788   [(set_attr "type" "crypto_sha256_slow")]
5789 )
5790
5791 (define_insn "aarch64_crypto_sha256su0v4si"
5792   [(set (match_operand:V4SI 0 "register_operand" "=w")
5793         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5794                       (match_operand:V4SI 2 "register_operand" "w")]
5795          UNSPEC_SHA256SU0))]
5796   "TARGET_SIMD &&TARGET_CRYPTO"
5797   "sha256su0\\t%0.4s, %2.4s"
5798   [(set_attr "type" "crypto_sha256_fast")]
5799 )
5800
5801 (define_insn "aarch64_crypto_sha256su1v4si"
5802   [(set (match_operand:V4SI 0 "register_operand" "=w")
5803         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5804                       (match_operand:V4SI 2 "register_operand" "w")
5805                       (match_operand:V4SI 3 "register_operand" "w")]
5806          UNSPEC_SHA256SU1))]
5807   "TARGET_SIMD &&TARGET_CRYPTO"
5808   "sha256su1\\t%0.4s, %2.4s, %3.4s"
5809   [(set_attr "type" "crypto_sha256_slow")]
5810 )
5811
5812 ;; pmull
5813
5814 (define_insn "aarch64_crypto_pmulldi"
5815   [(set (match_operand:TI 0 "register_operand" "=w")
5816         (unspec:TI  [(match_operand:DI 1 "register_operand" "w")
5817                      (match_operand:DI 2 "register_operand" "w")]
5818                     UNSPEC_PMULL))]
5819  "TARGET_SIMD && TARGET_CRYPTO"
5820  "pmull\\t%0.1q, %1.1d, %2.1d"
5821   [(set_attr "type" "neon_mul_d_long")]
5822 )
5823
5824 (define_insn "aarch64_crypto_pmullv2di"
5825  [(set (match_operand:TI 0 "register_operand" "=w")
5826        (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
5827                    (match_operand:V2DI 2 "register_operand" "w")]
5828                   UNSPEC_PMULL2))]
5829   "TARGET_SIMD && TARGET_CRYPTO"
5830   "pmull2\\t%0.1q, %1.2d, %2.2d"
5831   [(set_attr "type" "neon_mul_d_long")]
5832 )