1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2021 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16MOV 0 "nonimmediate_operand")
23 (match_operand:VALL_F16MOV 1 "general_operand"))]
26 /* Force the operand into a register if it is not an
27 immediate whose use can be replaced with xzr.
28 If the mode is 16 bytes wide, then we will be doing
29 a stp in DI mode, so we check the validity of that.
30 If the mode is 8 bytes wide, then we will do doing a
31 normal str, so the check need not apply. */
32 if (GET_CODE (operands[0]) == MEM
33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34 && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
35 && aarch64_mem_pair_operand (operands[0], DImode))
36 || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
37 operands[1] = force_reg (<MODE>mode, operands[1]);
39 /* If a constant is too complex to force to memory (e.g. because it
40 contains CONST_POLY_INTs), build it up from individual elements instead.
41 We should only need to do this before RA; aarch64_legitimate_constant_p
42 should ensure that we don't try to rematerialize the constant later. */
43 if (GET_CODE (operands[1]) == CONST_VECTOR
44 && targetm.cannot_force_const_mem (<MODE>mode, operands[1]))
46 aarch64_expand_vector_init (operands[0], operands[1]);
52 (define_expand "movmisalign<mode>"
53 [(set (match_operand:VALL 0 "nonimmediate_operand")
54 (match_operand:VALL 1 "general_operand"))]
55 "TARGET_SIMD && !STRICT_ALIGNMENT"
57 /* This pattern is not permitted to fail during expansion: if both arguments
58 are non-registers (e.g. memory := constant, which can be created by the
59 auto-vectorizer), force operand 1 into a register. */
60 if (!register_operand (operands[0], <MODE>mode)
61 && !register_operand (operands[1], <MODE>mode))
62 operands[1] = force_reg (<MODE>mode, operands[1]);
65 (define_insn "aarch64_simd_dup<mode>"
66 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
68 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
71 dup\\t%0.<Vtype>, %1.<Vetype>[0]
72 dup\\t%0.<Vtype>, %<vwcore>1"
73 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
76 (define_insn "aarch64_simd_dup<mode>"
77 [(set (match_operand:VDQF_F16 0 "register_operand" "=w,w")
78 (vec_duplicate:VDQF_F16
79 (match_operand:<VEL> 1 "register_operand" "w,r")))]
82 dup\\t%0.<Vtype>, %1.<Vetype>[0]
83 dup\\t%0.<Vtype>, %<vwcore>1"
84 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
87 (define_insn "aarch64_dup_lane<mode>"
88 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
89 (vec_duplicate:VALL_F16
91 (match_operand:VALL_F16 1 "register_operand" "w")
92 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
96 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
97 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
99 [(set_attr "type" "neon_dup<q>")]
102 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
103 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
104 (vec_duplicate:VALL_F16_NO_V2Q
106 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
107 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
111 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
112 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
114 [(set_attr "type" "neon_dup<q>")]
117 (define_insn "*aarch64_simd_mov<VDMOV:mode>"
118 [(set (match_operand:VDMOV 0 "nonimmediate_operand"
119 "=w, m, m, w, ?r, ?w, ?r, w")
120 (match_operand:VDMOV 1 "general_operand"
121 "m, Dz, w, w, w, r, r, Dn"))]
123 && (register_operand (operands[0], <MODE>mode)
124 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
126 switch (which_alternative)
128 case 0: return "ldr\t%d0, %1";
129 case 1: return "str\txzr, %0";
130 case 2: return "str\t%d1, %0";
131 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
132 case 4: return "umov\t%0, %1.d[0]";
133 case 5: return "fmov\t%d0, %1";
134 case 6: return "mov\t%0, %1";
136 return aarch64_output_simd_mov_immediate (operands[1], 64);
137 default: gcc_unreachable ();
140 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
141 neon_logic<q>, neon_to_gp<q>, f_mcr,\
142 mov_reg, neon_move<q>")]
145 (define_insn "*aarch64_simd_mov<VQMOV:mode>"
146 [(set (match_operand:VQMOV 0 "nonimmediate_operand"
147 "=w, Umn, m, w, ?r, ?w, ?r, w")
148 (match_operand:VQMOV 1 "general_operand"
149 "m, Dz, w, w, w, r, r, Dn"))]
151 && (register_operand (operands[0], <MODE>mode)
152 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
154 switch (which_alternative)
157 return "ldr\t%q0, %1";
159 return "stp\txzr, xzr, %0";
161 return "str\t%q1, %0";
163 return "mov\t%0.<Vbtype>, %1.<Vbtype>";
169 return aarch64_output_simd_mov_immediate (operands[1], 128);
174 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
175 neon_logic<q>, multiple, multiple,\
176 multiple, neon_move<q>")
177 (set_attr "length" "4,4,4,4,8,8,8,4")]
180 ;; When storing lane zero we can use the normal STR and its more permissive
183 (define_insn "aarch64_store_lane0<mode>"
184 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
185 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
186 (parallel [(match_operand 2 "const_int_operand" "n")])))]
188 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
189 "str\\t%<Vetype>1, %0"
190 [(set_attr "type" "neon_store1_1reg<q>")]
193 (define_insn "load_pair<DREG:mode><DREG2:mode>"
194 [(set (match_operand:DREG 0 "register_operand" "=w")
195 (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
196 (set (match_operand:DREG2 2 "register_operand" "=w")
197 (match_operand:DREG2 3 "memory_operand" "m"))]
199 && rtx_equal_p (XEXP (operands[3], 0),
200 plus_constant (Pmode,
201 XEXP (operands[1], 0),
202 GET_MODE_SIZE (<DREG:MODE>mode)))"
203 "ldp\\t%d0, %d2, %z1"
204 [(set_attr "type" "neon_ldp")]
207 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
208 [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
209 (match_operand:DREG 1 "register_operand" "w"))
210 (set (match_operand:DREG2 2 "memory_operand" "=m")
211 (match_operand:DREG2 3 "register_operand" "w"))]
213 && rtx_equal_p (XEXP (operands[2], 0),
214 plus_constant (Pmode,
215 XEXP (operands[0], 0),
216 GET_MODE_SIZE (<DREG:MODE>mode)))"
217 "stp\\t%d1, %d3, %z0"
218 [(set_attr "type" "neon_stp")]
221 (define_insn "load_pair<VQ:mode><VQ2:mode>"
222 [(set (match_operand:VQ 0 "register_operand" "=w")
223 (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
224 (set (match_operand:VQ2 2 "register_operand" "=w")
225 (match_operand:VQ2 3 "memory_operand" "m"))]
227 && rtx_equal_p (XEXP (operands[3], 0),
228 plus_constant (Pmode,
229 XEXP (operands[1], 0),
230 GET_MODE_SIZE (<VQ:MODE>mode)))"
231 "ldp\\t%q0, %q2, %z1"
232 [(set_attr "type" "neon_ldp_q")]
235 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
236 [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
237 (match_operand:VQ 1 "register_operand" "w"))
238 (set (match_operand:VQ2 2 "memory_operand" "=m")
239 (match_operand:VQ2 3 "register_operand" "w"))]
240 "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
241 plus_constant (Pmode,
242 XEXP (operands[0], 0),
243 GET_MODE_SIZE (<VQ:MODE>mode)))"
244 "stp\\t%q1, %q3, %z0"
245 [(set_attr "type" "neon_stp_q")]
250 [(set (match_operand:VQMOV 0 "register_operand" "")
251 (match_operand:VQMOV 1 "register_operand" ""))]
252 "TARGET_SIMD && reload_completed
253 && GP_REGNUM_P (REGNO (operands[0]))
254 && GP_REGNUM_P (REGNO (operands[1]))"
257 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
262 [(set (match_operand:VQMOV 0 "register_operand" "")
263 (match_operand:VQMOV 1 "register_operand" ""))]
264 "TARGET_SIMD && reload_completed
265 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
266 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
269 aarch64_split_simd_move (operands[0], operands[1]);
273 (define_expand "@aarch64_split_simd_mov<mode>"
274 [(set (match_operand:VQMOV 0)
275 (match_operand:VQMOV 1))]
278 rtx dst = operands[0];
279 rtx src = operands[1];
281 if (GP_REGNUM_P (REGNO (src)))
283 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
284 rtx src_high_part = gen_highpart (<VHALF>mode, src);
287 (gen_move_lo_quad_<mode> (dst, src_low_part));
289 (gen_move_hi_quad_<mode> (dst, src_high_part));
294 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
295 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
296 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
297 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
298 emit_insn (gen_aarch64_get_half<mode> (dst_low_part, src, lo));
299 emit_insn (gen_aarch64_get_half<mode> (dst_high_part, src, hi));
305 (define_expand "aarch64_get_half<mode>"
306 [(set (match_operand:<VHALF> 0 "register_operand")
308 (match_operand:VQMOV 1 "register_operand")
309 (match_operand 2 "ascending_int_parallel")))]
313 (define_expand "aarch64_get_low<mode>"
314 [(match_operand:<VHALF> 0 "register_operand")
315 (match_operand:VQMOV 1 "register_operand")]
318 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
319 emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], lo));
324 (define_expand "aarch64_get_high<mode>"
325 [(match_operand:<VHALF> 0 "register_operand")
326 (match_operand:VQMOV 1 "register_operand")]
329 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
330 emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], hi));
335 (define_insn_and_split "aarch64_simd_mov_from_<mode>low"
336 [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
338 (match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
339 (match_operand:VQMOV_NO2E 2 "vect_par_cnst_lo_half" "")))]
344 "&& reload_completed && aarch64_simd_register (operands[0], <VHALF>mode)"
345 [(set (match_dup 0) (match_dup 1))]
347 operands[1] = aarch64_replace_reg_mode (operands[1], <VHALF>mode);
349 [(set_attr "type" "mov_reg,neon_to_gp<q>")
350 (set_attr "length" "4")]
353 (define_insn "aarch64_simd_mov_from_<mode>high"
354 [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
356 (match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
357 (match_operand:VQMOV_NO2E 2 "vect_par_cnst_hi_half" "")))]
362 [(set_attr "type" "neon_dup<q>,neon_to_gp<q>")
363 (set_attr "length" "4")]
366 (define_insn "orn<mode>3"
367 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
368 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
369 (match_operand:VDQ_I 2 "register_operand" "w")))]
371 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
372 [(set_attr "type" "neon_logic<q>")]
375 (define_insn "bic<mode>3"
376 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
377 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
378 (match_operand:VDQ_I 2 "register_operand" "w")))]
380 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
381 [(set_attr "type" "neon_logic<q>")]
384 (define_insn "add<mode>3"
385 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
386 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
387 (match_operand:VDQ_I 2 "register_operand" "w")))]
389 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
390 [(set_attr "type" "neon_add<q>")]
393 (define_insn "sub<mode>3"
394 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
395 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
396 (match_operand:VDQ_I 2 "register_operand" "w")))]
398 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
399 [(set_attr "type" "neon_sub<q>")]
402 (define_insn "mul<mode>3"
403 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
404 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
405 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
407 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
408 [(set_attr "type" "neon_mul_<Vetype><q>")]
411 (define_insn "bswap<mode>2"
412 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
413 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
415 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
416 [(set_attr "type" "neon_rev<q>")]
419 (define_insn "aarch64_rbit<mode>"
420 [(set (match_operand:VB 0 "register_operand" "=w")
421 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
424 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
425 [(set_attr "type" "neon_rbit")]
428 (define_expand "ctz<mode>2"
429 [(set (match_operand:VS 0 "register_operand")
430 (ctz:VS (match_operand:VS 1 "register_operand")))]
433 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
434 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
436 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
437 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
442 (define_expand "xorsign<mode>3"
443 [(match_operand:VHSDF 0 "register_operand")
444 (match_operand:VHSDF 1 "register_operand")
445 (match_operand:VHSDF 2 "register_operand")]
449 machine_mode imode = <V_INT_EQUIV>mode;
450 rtx v_bitmask = gen_reg_rtx (imode);
451 rtx op1x = gen_reg_rtx (imode);
452 rtx op2x = gen_reg_rtx (imode);
454 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
455 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
457 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
459 emit_move_insn (v_bitmask,
460 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
461 HOST_WIDE_INT_M1U << bits));
463 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
464 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
465 emit_move_insn (operands[0],
466 lowpart_subreg (<MODE>mode, op1x, imode));
471 ;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
472 ;; fact that their usage need to guarantee that the source vectors are
473 ;; contiguous. It would be wrong to describe the operation without being able
474 ;; to describe the permute that is also required, but even if that is done
475 ;; the permute would have been created as a LOAD_LANES which means the values
476 ;; in the registers are in the wrong order.
477 (define_insn "aarch64_fcadd<rot><mode>"
478 [(set (match_operand:VHSDF 0 "register_operand" "=w")
479 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
480 (match_operand:VHSDF 2 "register_operand" "w")]
483 "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
484 [(set_attr "type" "neon_fcadd")]
487 (define_expand "cadd<rot><mode>3"
488 [(set (match_operand:VHSDF 0 "register_operand")
489 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
490 (match_operand:VHSDF 2 "register_operand")]
492 "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
495 (define_insn "aarch64_fcmla<rot><mode>"
496 [(set (match_operand:VHSDF 0 "register_operand" "=w")
497 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
498 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
499 (match_operand:VHSDF 3 "register_operand" "w")]
502 "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
503 [(set_attr "type" "neon_fcmla")]
507 (define_insn "aarch64_fcmla_lane<rot><mode>"
508 [(set (match_operand:VHSDF 0 "register_operand" "=w")
509 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
510 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
511 (match_operand:VHSDF 3 "register_operand" "w")
512 (match_operand:SI 4 "const_int_operand" "n")]
516 operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
517 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
519 [(set_attr "type" "neon_fcmla")]
522 (define_insn "aarch64_fcmla_laneq<rot>v4hf"
523 [(set (match_operand:V4HF 0 "register_operand" "=w")
524 (plus:V4HF (match_operand:V4HF 1 "register_operand" "0")
525 (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
526 (match_operand:V8HF 3 "register_operand" "w")
527 (match_operand:SI 4 "const_int_operand" "n")]
531 operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
532 return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
534 [(set_attr "type" "neon_fcmla")]
537 (define_insn "aarch64_fcmlaq_lane<rot><mode>"
538 [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
539 (plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0")
540 (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
541 (match_operand:<VHALF> 3 "register_operand" "w")
542 (match_operand:SI 4 "const_int_operand" "n")]
546 int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
548 = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
549 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
551 [(set_attr "type" "neon_fcmla")]
554 ;; The complex mla/mls operations always need to expand to two instructions.
555 ;; The first operation does half the computation and the second does the
556 ;; remainder. Because of this, expand early.
557 (define_expand "cml<fcmac1><conj_op><mode>4"
558 [(set (match_operand:VHSDF 0 "register_operand")
559 (plus:VHSDF (match_operand:VHSDF 1 "register_operand")
560 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand")
561 (match_operand:VHSDF 3 "register_operand")]
563 "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
565 rtx tmp = gen_reg_rtx (<MODE>mode);
566 emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[1],
567 operands[3], operands[2]));
568 emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], tmp,
569 operands[3], operands[2]));
573 ;; The complex mul operations always need to expand to two instructions.
574 ;; The first operation does half the computation and the second does the
575 ;; remainder. Because of this, expand early.
576 (define_expand "cmul<conj_op><mode>3"
577 [(set (match_operand:VHSDF 0 "register_operand")
578 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
579 (match_operand:VHSDF 2 "register_operand")]
581 "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
583 rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
584 rtx res1 = gen_reg_rtx (<MODE>mode);
585 emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (res1, tmp,
586 operands[2], operands[1]));
587 emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], res1,
588 operands[2], operands[1]));
592 ;; These expands map to the Dot Product optab the vectorizer checks for
593 ;; and to the intrinsics patttern.
594 ;; The auto-vectorizer expects a dot product builtin that also does an
595 ;; accumulation into the provided register.
596 ;; Given the following pattern
598 ;; for (i=0; i<len; i++) {
604 ;; This can be auto-vectorized to
605 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
607 ;; given enough iterations. However the vectorizer can keep unrolling the loop
608 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
609 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
612 ;; and so the vectorizer provides r, in which the result has to be accumulated.
613 (define_insn "<sur>dot_prod<vsi2qi>"
614 [(set (match_operand:VS 0 "register_operand" "=w")
616 (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w")
617 (match_operand:<VSI2QI> 2 "register_operand" "w")]
619 (match_operand:VS 3 "register_operand" "0")))]
621 "<sur>dot\\t%0.<Vtype>, %1.<Vdottype>, %2.<Vdottype>"
622 [(set_attr "type" "neon_dot<q>")]
625 ;; These instructions map to the __builtins for the Armv8.6-a I8MM usdot
626 ;; (vector) Dot Product operation and the vectorized optab.
627 (define_insn "usdot_prod<vsi2qi>"
628 [(set (match_operand:VS 0 "register_operand" "=w")
630 (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w")
631 (match_operand:<VSI2QI> 2 "register_operand" "w")]
633 (match_operand:VS 3 "register_operand" "0")))]
635 "usdot\\t%0.<Vtype>, %1.<Vdottype>, %2.<Vdottype>"
636 [(set_attr "type" "neon_dot<q>")]
639 ;; These instructions map to the __builtins for the Dot Product
640 ;; indexed operations.
641 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
642 [(set (match_operand:VS 0 "register_operand" "=w")
644 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
645 (match_operand:V8QI 3 "register_operand" "<h_con>")
646 (match_operand:SI 4 "immediate_operand" "i")]
648 (match_operand:VS 1 "register_operand" "0")))]
651 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
652 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
654 [(set_attr "type" "neon_dot<q>")]
657 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
658 [(set (match_operand:VS 0 "register_operand" "=w")
660 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
661 (match_operand:V16QI 3 "register_operand" "<h_con>")
662 (match_operand:SI 4 "immediate_operand" "i")]
664 (match_operand:VS 1 "register_operand" "0")))]
667 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
668 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
670 [(set_attr "type" "neon_dot<q>")]
673 ;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot
674 ;; (by element) Dot Product operations.
675 (define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi>"
676 [(set (match_operand:VS 0 "register_operand" "=w")
678 (unspec:VS [(match_operand:<VS:VSI2QI> 2 "register_operand" "w")
679 (match_operand:VB 3 "register_operand" "w")
680 (match_operand:SI 4 "immediate_operand" "i")]
682 (match_operand:VS 1 "register_operand" "0")))]
685 int nunits = GET_MODE_NUNITS (<VB:MODE>mode).to_constant ();
686 int lane = INTVAL (operands[4]);
687 operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), SImode);
688 return "<DOTPROD_I8MM:sur>dot\\t%0.<VS:Vtype>, %2.<VS:Vdottype>, %3.4b[%4]";
690 [(set_attr "type" "neon_dot<VS:q>")]
693 (define_expand "copysign<mode>3"
694 [(match_operand:VHSDF 0 "register_operand")
695 (match_operand:VHSDF 1 "register_operand")
696 (match_operand:VHSDF 2 "register_operand")]
697 "TARGET_FLOAT && TARGET_SIMD"
699 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
700 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
702 emit_move_insn (v_bitmask,
703 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
704 HOST_WIDE_INT_M1U << bits));
705 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
706 operands[2], operands[1]));
711 (define_insn "mul_lane<mode>3"
712 [(set (match_operand:VMULD 0 "register_operand" "=w")
716 (match_operand:<VCOND> 2 "register_operand" "<h_con>")
717 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
718 (match_operand:VMULD 1 "register_operand" "w")))]
721 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
722 return "<f>mul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]";
724 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
727 (define_insn "mul_laneq<mode>3"
728 [(set (match_operand:VMUL 0 "register_operand" "=w")
732 (match_operand:<VCONQ> 2 "register_operand" "<h_con>")
733 (parallel [(match_operand:SI 3 "immediate_operand")])))
734 (match_operand:VMUL 1 "register_operand" "w")))]
737 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
738 return "<f>mul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]";
740 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
743 (define_insn "mul_n<mode>3"
744 [(set (match_operand:VMUL 0 "register_operand" "=w")
747 (match_operand:<VEL> 2 "register_operand" "<h_con>"))
748 (match_operand:VMUL 1 "register_operand" "w")))]
750 "<f>mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
751 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
754 (define_insn "@aarch64_rsqrte<mode>"
755 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
756 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
759 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
760 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
762 (define_insn "@aarch64_rsqrts<mode>"
763 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
764 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
765 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
768 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
769 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
771 (define_expand "rsqrt<mode>2"
772 [(set (match_operand:VALLF 0 "register_operand")
773 (unspec:VALLF [(match_operand:VALLF 1 "register_operand")]
777 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
781 (define_insn "aarch64_ursqrte<mode>"
782 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
783 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
786 "ursqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
787 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
789 (define_insn "*aarch64_mul3_elt_to_64v2df"
790 [(set (match_operand:DF 0 "register_operand" "=w")
793 (match_operand:V2DF 1 "register_operand" "w")
794 (parallel [(match_operand:SI 2 "immediate_operand")]))
795 (match_operand:DF 3 "register_operand" "w")))]
798 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
799 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
801 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
804 (define_insn "neg<mode>2"
805 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
806 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
808 "neg\t%0.<Vtype>, %1.<Vtype>"
809 [(set_attr "type" "neon_neg<q>")]
812 (define_insn "abs<mode>2"
813 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
814 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
816 "abs\t%0.<Vtype>, %1.<Vtype>"
817 [(set_attr "type" "neon_abs<q>")]
820 ;; The intrinsic version of integer ABS must not be allowed to
821 ;; combine with any operation with an integerated ABS step, such
823 (define_insn "aarch64_abs<mode>"
824 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
826 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
829 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
830 [(set_attr "type" "neon_abs<q>")]
833 ;; It's tempting to represent SABD as ABS (MINUS op1 op2).
834 ;; This isn't accurate as ABS treats always its input as a signed value.
835 ;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
836 ;; Whereas SABD would return 192 (-64 signed) on the above example.
837 ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
838 (define_insn "aarch64_<su>abd<mode>"
839 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
842 (match_operand:VDQ_BHSI 1 "register_operand" "w")
843 (match_operand:VDQ_BHSI 2 "register_operand" "w"))
848 "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
849 [(set_attr "type" "neon_abd<q>")]
853 (define_insn "aarch64_<sur>abdl<mode>"
854 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
855 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
856 (match_operand:VD_BHSI 2 "register_operand" "w")]
859 "<sur>abdl\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
860 [(set_attr "type" "neon_abd<q>")]
863 (define_insn "aarch64_<sur>abdl2<mode>"
864 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
865 (unspec:<VDBLW> [(match_operand:VQW 1 "register_operand" "w")
866 (match_operand:VQW 2 "register_operand" "w")]
869 "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
870 [(set_attr "type" "neon_abd<q>")]
873 (define_insn "aarch64_<sur>abal<mode>"
874 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
875 (unspec:<VWIDE> [(match_operand:VD_BHSI 2 "register_operand" "w")
876 (match_operand:VD_BHSI 3 "register_operand" "w")
877 (match_operand:<VWIDE> 1 "register_operand" "0")]
880 "<sur>abal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
881 [(set_attr "type" "neon_arith_acc<q>")]
884 (define_insn "aarch64_<sur>abal2<mode>"
885 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
886 (unspec:<VWIDE> [(match_operand:VQW 2 "register_operand" "w")
887 (match_operand:VQW 3 "register_operand" "w")
888 (match_operand:<VWIDE> 1 "register_operand" "0")]
891 "<sur>abal2\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
892 [(set_attr "type" "neon_arith_acc<q>")]
895 (define_insn "aarch64_<sur>adalp<mode>"
896 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
897 (unspec:<VDBLW> [(match_operand:VDQV_L 2 "register_operand" "w")
898 (match_operand:<VDBLW> 1 "register_operand" "0")]
901 "<sur>adalp\t%0.<Vwhalf>, %2.<Vtype>"
902 [(set_attr "type" "neon_reduc_add<q>")]
905 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
906 ;; inputs in operands 1 and 2. The sequence also has to perform a widening
907 ;; reduction of the difference into a V4SI vector and accumulate that into
908 ;; operand 3 before copying that into the result operand 0.
909 ;; Perform that with a sequence of:
910 ;; UABDL2 tmp.8h, op1.16b, op2.16b
911 ;; UABAL tmp.8h, op1.8b, op2.8b
912 ;; UADALP op3.4s, tmp.8h
913 ;; MOV op0, op3 // should be eliminated in later passes.
915 ;; For TARGET_DOTPROD we do:
916 ;; MOV tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
917 ;; UABD tmp2.16b, op1.16b, op2.16b
918 ;; UDOT op3.4s, tmp2.16b, tmp1.16b
919 ;; MOV op0, op3 // RA will tie the operands of UDOT appropriately.
921 ;; The signed version just uses the signed variants of the above instructions
922 ;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
925 (define_expand "<sur>sadv16qi"
926 [(use (match_operand:V4SI 0 "register_operand"))
927 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
928 (use (match_operand:V16QI 2 "register_operand"))] ABAL)
929 (use (match_operand:V4SI 3 "register_operand"))]
934 rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
935 rtx abd = gen_reg_rtx (V16QImode);
936 emit_insn (gen_aarch64_<sur>abdv16qi (abd, operands[1], operands[2]));
937 emit_insn (gen_udot_prodv16qi (operands[0], abd, ones, operands[3]));
940 rtx reduc = gen_reg_rtx (V8HImode);
941 emit_insn (gen_aarch64_<sur>abdl2v16qi (reduc, operands[1],
943 emit_insn (gen_aarch64_<sur>abalv8qi (reduc, reduc,
944 gen_lowpart (V8QImode, operands[1]),
945 gen_lowpart (V8QImode,
947 emit_insn (gen_aarch64_<sur>adalpv8hi (operands[3], operands[3], reduc));
948 emit_move_insn (operands[0], operands[3]);
953 (define_insn "aarch64_<su>aba<mode>"
954 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
955 (plus:VDQ_BHSI (minus:VDQ_BHSI
957 (match_operand:VDQ_BHSI 2 "register_operand" "w")
958 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
962 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
964 "<su>aba\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
965 [(set_attr "type" "neon_arith_acc<q>")]
968 (define_insn "fabd<mode>3"
969 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
972 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
973 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
975 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
976 [(set_attr "type" "neon_fp_abd_<stype><q>")]
979 ;; For AND (vector, register) and BIC (vector, immediate)
980 (define_insn "and<mode>3"
981 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
982 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
983 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
986 switch (which_alternative)
989 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
991 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
997 [(set_attr "type" "neon_logic<q>")]
1000 ;; For ORR (vector, register) and ORR (vector, immediate)
1001 (define_insn "ior<mode>3"
1002 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
1003 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
1004 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
1007 switch (which_alternative)
1010 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
1012 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
1018 [(set_attr "type" "neon_logic<q>")]
1021 (define_insn "xor<mode>3"
1022 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1023 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1024 (match_operand:VDQ_I 2 "register_operand" "w")))]
1026 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
1027 [(set_attr "type" "neon_logic<q>")]
1030 (define_insn "one_cmpl<mode>2"
1031 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1032 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
1034 "not\t%0.<Vbtype>, %1.<Vbtype>"
1035 [(set_attr "type" "neon_logic<q>")]
1038 (define_insn "aarch64_simd_vec_set<mode>"
1039 [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
1041 (vec_duplicate:VALL_F16
1042 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
1043 (match_operand:VALL_F16 3 "register_operand" "0,0,0")
1044 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
1047 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1048 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1049 switch (which_alternative)
1052 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
1054 return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
1056 return "ld1\\t{%0.<Vetype>}[%p2], %1";
1061 [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
1064 (define_insn "@aarch64_simd_vec_copy_lane<mode>"
1065 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
1067 (vec_duplicate:VALL_F16
1069 (match_operand:VALL_F16 3 "register_operand" "w")
1071 [(match_operand:SI 4 "immediate_operand" "i")])))
1072 (match_operand:VALL_F16 1 "register_operand" "0")
1073 (match_operand:SI 2 "immediate_operand" "i")))]
1076 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1077 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
1078 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
1080 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1082 [(set_attr "type" "neon_ins<q>")]
1085 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
1086 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
1087 (vec_merge:VALL_F16_NO_V2Q
1088 (vec_duplicate:VALL_F16_NO_V2Q
1090 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
1092 [(match_operand:SI 4 "immediate_operand" "i")])))
1093 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
1094 (match_operand:SI 2 "immediate_operand" "i")))]
1097 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1098 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
1099 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
1100 INTVAL (operands[4]));
1102 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1104 [(set_attr "type" "neon_ins<q>")]
1107 (define_expand "signbit<mode>2"
1108 [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
1109 (use (match_operand:VDQSF 1 "register_operand"))]
1112 int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
1113 rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
1115 operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
1117 emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
1122 (define_insn "aarch64_simd_lshr<mode>"
1123 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1124 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1125 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
1127 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
1128 [(set_attr "type" "neon_shift_imm<q>")]
1131 (define_insn "aarch64_simd_ashr<mode>"
1132 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
1133 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,w")
1134 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "D1,Dr")))]
1137 cmlt\t%0.<Vtype>, %1.<Vtype>, #0
1138 sshr\t%0.<Vtype>, %1.<Vtype>, %2"
1139 [(set_attr "type" "neon_compare<q>,neon_shift_imm<q>")]
1142 (define_insn "*aarch64_simd_sra<mode>"
1143 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1146 (match_operand:VDQ_I 1 "register_operand" "w")
1147 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr"))
1148 (match_operand:VDQ_I 3 "register_operand" "0")))]
1150 "<sra_op>sra\t%0.<Vtype>, %1.<Vtype>, %2"
1151 [(set_attr "type" "neon_shift_acc<q>")]
1154 (define_insn "aarch64_simd_imm_shl<mode>"
1155 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1156 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1157 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
1159 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
1160 [(set_attr "type" "neon_shift_imm<q>")]
1163 (define_insn "aarch64_simd_reg_sshl<mode>"
1164 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1165 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1166 (match_operand:VDQ_I 2 "register_operand" "w")))]
1168 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1169 [(set_attr "type" "neon_shift_reg<q>")]
1172 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
1173 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1174 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1175 (match_operand:VDQ_I 2 "register_operand" "w")]
1176 UNSPEC_ASHIFT_UNSIGNED))]
1178 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1179 [(set_attr "type" "neon_shift_reg<q>")]
1182 (define_insn "aarch64_simd_reg_shl<mode>_signed"
1183 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1184 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1185 (match_operand:VDQ_I 2 "register_operand" "w")]
1186 UNSPEC_ASHIFT_SIGNED))]
1188 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1189 [(set_attr "type" "neon_shift_reg<q>")]
1192 (define_expand "ashl<mode>3"
1193 [(match_operand:VDQ_I 0 "register_operand")
1194 (match_operand:VDQ_I 1 "register_operand")
1195 (match_operand:SI 2 "general_operand")]
1198 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1201 if (CONST_INT_P (operands[2]))
1203 shift_amount = INTVAL (operands[2]);
1204 if (shift_amount >= 0 && shift_amount < bit_width)
1206 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1208 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
1215 operands[2] = force_reg (SImode, operands[2]);
1217 rtx tmp = gen_reg_rtx (<MODE>mode);
1218 emit_insn (gen_aarch64_simd_dup<mode> (tmp, convert_to_mode (<VEL>mode,
1221 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], tmp));
1225 (define_expand "lshr<mode>3"
1226 [(match_operand:VDQ_I 0 "register_operand")
1227 (match_operand:VDQ_I 1 "register_operand")
1228 (match_operand:SI 2 "general_operand")]
1231 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1234 if (CONST_INT_P (operands[2]))
1236 shift_amount = INTVAL (operands[2]);
1237 if (shift_amount > 0 && shift_amount <= bit_width)
1239 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1241 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
1248 operands[2] = force_reg (SImode, operands[2]);
1250 rtx tmp = gen_reg_rtx (SImode);
1251 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1252 emit_insn (gen_negsi2 (tmp, operands[2]));
1253 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1254 convert_to_mode (<VEL>mode, tmp, 0)));
1255 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1260 (define_expand "ashr<mode>3"
1261 [(match_operand:VDQ_I 0 "register_operand")
1262 (match_operand:VDQ_I 1 "register_operand")
1263 (match_operand:SI 2 "general_operand")]
1266 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1269 if (CONST_INT_P (operands[2]))
1271 shift_amount = INTVAL (operands[2]);
1272 if (shift_amount > 0 && shift_amount <= bit_width)
1274 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1276 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1283 operands[2] = force_reg (SImode, operands[2]);
1285 rtx tmp = gen_reg_rtx (SImode);
1286 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1287 emit_insn (gen_negsi2 (tmp, operands[2]));
1288 emit_insn (gen_aarch64_simd_dup<mode> (tmp1, convert_to_mode (<VEL>mode,
1290 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1295 (define_expand "vashl<mode>3"
1296 [(match_operand:VDQ_I 0 "register_operand")
1297 (match_operand:VDQ_I 1 "register_operand")
1298 (match_operand:VDQ_I 2 "register_operand")]
1301 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1306 (define_expand "vashr<mode>3"
1307 [(match_operand:VDQ_I 0 "register_operand")
1308 (match_operand:VDQ_I 1 "register_operand")
1309 (match_operand:VDQ_I 2 "register_operand")]
1312 rtx neg = gen_reg_rtx (<MODE>mode);
1313 emit (gen_neg<mode>2 (neg, operands[2]));
1314 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1320 (define_expand "aarch64_ashr_simddi"
1321 [(match_operand:DI 0 "register_operand")
1322 (match_operand:DI 1 "register_operand")
1323 (match_operand:SI 2 "aarch64_shift_imm64_di")]
1326 /* An arithmetic shift right by 64 fills the result with copies of the sign
1327 bit, just like asr by 63 - however the standard pattern does not handle
1329 if (INTVAL (operands[2]) == 64)
1330 operands[2] = GEN_INT (63);
1331 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1336 (define_expand "vlshr<mode>3"
1337 [(match_operand:VDQ_I 0 "register_operand")
1338 (match_operand:VDQ_I 1 "register_operand")
1339 (match_operand:VDQ_I 2 "register_operand")]
1342 rtx neg = gen_reg_rtx (<MODE>mode);
1343 emit (gen_neg<mode>2 (neg, operands[2]));
1344 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1349 (define_expand "aarch64_lshr_simddi"
1350 [(match_operand:DI 0 "register_operand")
1351 (match_operand:DI 1 "register_operand")
1352 (match_operand:SI 2 "aarch64_shift_imm64_di")]
1355 if (INTVAL (operands[2]) == 64)
1356 emit_move_insn (operands[0], const0_rtx);
1358 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1363 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1364 (define_insn "vec_shr_<mode>"
1365 [(set (match_operand:VD 0 "register_operand" "=w")
1366 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1367 (match_operand:SI 2 "immediate_operand" "i")]
1371 if (BYTES_BIG_ENDIAN)
1372 return "shl %d0, %d1, %2";
1374 return "ushr %d0, %d1, %2";
1376 [(set_attr "type" "neon_shift_imm")]
1379 (define_expand "vec_set<mode>"
1380 [(match_operand:VALL_F16 0 "register_operand")
1381 (match_operand:<VEL> 1 "register_operand")
1382 (match_operand:SI 2 "immediate_operand")]
1385 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1386 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1387 GEN_INT (elem), operands[0]));
1393 (define_insn "aarch64_mla<mode>"
1394 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1395 (plus:VDQ_BHSI (mult:VDQ_BHSI
1396 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1397 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1398 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1400 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1401 [(set_attr "type" "neon_mla_<Vetype><q>")]
1404 (define_insn "*aarch64_mla_elt<mode>"
1405 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1408 (vec_duplicate:VDQHS
1410 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1411 (parallel [(match_operand:SI 2 "immediate_operand")])))
1412 (match_operand:VDQHS 3 "register_operand" "w"))
1413 (match_operand:VDQHS 4 "register_operand" "0")))]
1416 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1417 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1419 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1422 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1423 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1426 (vec_duplicate:VDQHS
1428 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1429 (parallel [(match_operand:SI 2 "immediate_operand")])))
1430 (match_operand:VDQHS 3 "register_operand" "w"))
1431 (match_operand:VDQHS 4 "register_operand" "0")))]
1434 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1435 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1437 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1440 (define_insn "aarch64_mla_n<mode>"
1441 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1444 (vec_duplicate:VDQHS
1445 (match_operand:<VEL> 3 "register_operand" "<h_con>"))
1446 (match_operand:VDQHS 2 "register_operand" "w"))
1447 (match_operand:VDQHS 1 "register_operand" "0")))]
1449 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[0]"
1450 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1453 (define_insn "aarch64_mls<mode>"
1454 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1455 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1456 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1457 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1459 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1460 [(set_attr "type" "neon_mla_<Vetype><q>")]
1463 (define_insn "*aarch64_mls_elt<mode>"
1464 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1466 (match_operand:VDQHS 4 "register_operand" "0")
1468 (vec_duplicate:VDQHS
1470 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1471 (parallel [(match_operand:SI 2 "immediate_operand")])))
1472 (match_operand:VDQHS 3 "register_operand" "w"))))]
1475 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1476 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1478 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1481 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1482 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1484 (match_operand:VDQHS 4 "register_operand" "0")
1486 (vec_duplicate:VDQHS
1488 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1489 (parallel [(match_operand:SI 2 "immediate_operand")])))
1490 (match_operand:VDQHS 3 "register_operand" "w"))))]
1493 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1494 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1496 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1499 (define_insn "aarch64_mls_n<mode>"
1500 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1502 (match_operand:VDQHS 1 "register_operand" "0")
1504 (vec_duplicate:VDQHS
1505 (match_operand:<VEL> 3 "register_operand" "<h_con>"))
1506 (match_operand:VDQHS 2 "register_operand" "w"))))]
1508 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[0]"
1509 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1512 ;; Max/Min operations.
1513 (define_insn "<su><maxmin><mode>3"
1514 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1515 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1516 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1518 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1519 [(set_attr "type" "neon_minmax<q>")]
1522 (define_expand "<su><maxmin>v2di3"
1523 [(set (match_operand:V2DI 0 "register_operand")
1524 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand")
1525 (match_operand:V2DI 2 "register_operand")))]
1528 enum rtx_code cmp_operator;
1549 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1550 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1551 operands[2], cmp_fmt, operands[1], operands[2]));
1555 ;; Pairwise Integer Max/Min operations.
1556 (define_insn "aarch64_<optab>p<mode>"
1557 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1558 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1559 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1562 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1563 [(set_attr "type" "neon_minmax<q>")]
1566 ;; Pairwise FP Max/Min operations.
1567 (define_insn "aarch64_<optab>p<mode>"
1568 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1569 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1570 (match_operand:VHSDF 2 "register_operand" "w")]
1573 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1574 [(set_attr "type" "neon_minmax<q>")]
1577 ;; vec_concat gives a new vector with the low elements from operand 1, and
1578 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1579 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1580 ;; What that means, is that the RTL descriptions of the below patterns
1581 ;; need to change depending on endianness.
1583 ;; Move to the low architectural bits of the register.
1584 ;; On little-endian this is { operand, zeroes }
1585 ;; On big-endian this is { zeroes, operand }
1587 (define_insn "move_lo_quad_internal_<mode>"
1588 [(set (match_operand:VQMOV 0 "register_operand" "=w,w,w")
1590 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1591 (match_operand:<VHALF> 2 "aarch64_simd_or_scalar_imm_zero")))]
1592 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1597 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1598 (set_attr "length" "4")
1599 (set_attr "arch" "simd,fp,simd")]
1602 (define_insn "move_lo_quad_internal_be_<mode>"
1603 [(set (match_operand:VQMOV 0 "register_operand" "=w,w,w")
1605 (match_operand:<VHALF> 2 "aarch64_simd_or_scalar_imm_zero")
1606 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1607 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1612 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1613 (set_attr "length" "4")
1614 (set_attr "arch" "simd,fp,simd")]
1617 (define_expand "move_lo_quad_<mode>"
1618 [(match_operand:VQMOV 0 "register_operand")
1619 (match_operand:<VHALF> 1 "register_operand")]
1622 rtx zs = CONST0_RTX (<VHALF>mode);
1623 if (BYTES_BIG_ENDIAN)
1624 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1], zs));
1626 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1], zs));
1631 ;; Move operand1 to the high architectural bits of the register, keeping
1632 ;; the low architectural bits of operand2.
1633 ;; For little-endian this is { operand2, operand1 }
1634 ;; For big-endian this is { operand1, operand2 }
1636 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1637 [(set (match_operand:VQMOV 0 "register_operand" "+w,w")
1641 (match_operand:VQMOV 2 "vect_par_cnst_lo_half" ""))
1642 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1643 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1645 ins\\t%0.d[1], %1.d[0]
1647 [(set_attr "type" "neon_ins")]
1650 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1651 [(set (match_operand:VQMOV 0 "register_operand" "+w,w")
1653 (match_operand:<VHALF> 1 "register_operand" "w,r")
1656 (match_operand:VQMOV 2 "vect_par_cnst_lo_half" ""))))]
1657 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1659 ins\\t%0.d[1], %1.d[0]
1661 [(set_attr "type" "neon_ins")]
1664 (define_expand "move_hi_quad_<mode>"
1665 [(match_operand:VQMOV 0 "register_operand")
1666 (match_operand:<VHALF> 1 "register_operand")]
1669 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1670 if (BYTES_BIG_ENDIAN)
1671 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1674 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1679 ;; Narrowing operations.
1681 (define_insn "aarch64_xtn<mode>_insn_le"
1682 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1683 (vec_concat:<VNARROWQ2>
1684 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1685 (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")))]
1686 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1687 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1688 [(set_attr "type" "neon_move_narrow_q")]
1691 (define_insn "aarch64_xtn<mode>_insn_be"
1692 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1693 (vec_concat:<VNARROWQ2>
1694 (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")
1695 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))))]
1696 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1697 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1698 [(set_attr "type" "neon_move_narrow_q")]
1701 (define_expand "aarch64_xtn<mode>"
1702 [(set (match_operand:<VNARROWQ> 0 "register_operand")
1703 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand")))]
1706 rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
1707 if (BYTES_BIG_ENDIAN)
1708 emit_insn (gen_aarch64_xtn<mode>_insn_be (tmp, operands[1],
1709 CONST0_RTX (<VNARROWQ>mode)));
1711 emit_insn (gen_aarch64_xtn<mode>_insn_le (tmp, operands[1],
1712 CONST0_RTX (<VNARROWQ>mode)));
1714 /* The intrinsic expects a narrow result, so emit a subreg that will get
1715 optimized away as appropriate. */
1716 emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
1722 (define_insn "aarch64_xtn2<mode>_insn_le"
1723 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1724 (vec_concat:<VNARROWQ2>
1725 (match_operand:<VNARROWQ> 1 "register_operand" "0")
1726 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1727 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1728 "xtn2\t%0.<V2ntype>, %2.<Vtype>"
1729 [(set_attr "type" "neon_move_narrow_q")]
1732 (define_insn "aarch64_xtn2<mode>_insn_be"
1733 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1734 (vec_concat:<VNARROWQ2>
1735 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))
1736 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
1737 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1738 "xtn2\t%0.<V2ntype>, %2.<Vtype>"
1739 [(set_attr "type" "neon_move_narrow_q")]
1742 (define_expand "aarch64_xtn2<mode>"
1743 [(match_operand:<VNARROWQ2> 0 "register_operand")
1744 (match_operand:<VNARROWQ> 1 "register_operand")
1745 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))]
1748 if (BYTES_BIG_ENDIAN)
1749 emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[0], operands[1],
1752 emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[0], operands[1],
1758 (define_insn "*aarch64_narrow_trunc<mode>"
1759 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1760 (vec_concat:<VNARROWQ2>
1761 (truncate:<VNARROWQ>
1762 (match_operand:VQN 1 "register_operand" "w"))
1763 (truncate:<VNARROWQ>
1764 (match_operand:VQN 2 "register_operand" "w"))))]
1767 if (!BYTES_BIG_ENDIAN)
1768 return "uzp1\\t%0.<V2ntype>, %1.<V2ntype>, %2.<V2ntype>";
1770 return "uzp1\\t%0.<V2ntype>, %2.<V2ntype>, %1.<V2ntype>";
1772 [(set_attr "type" "neon_permute<q>")]
1777 (define_expand "vec_pack_trunc_<mode>"
1778 [(match_operand:<VNARROWD> 0 "register_operand")
1779 (match_operand:VDN 1 "register_operand")
1780 (match_operand:VDN 2 "register_operand")]
1783 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1784 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1785 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1787 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1788 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1789 emit_insn (gen_trunc<Vdbl><Vnarrowd>2 (operands[0], tempreg));
1795 (define_expand "vec_pack_trunc_<mode>"
1796 [(set (match_operand:<VNARROWQ2> 0 "register_operand")
1797 (vec_concat:<VNARROWQ2>
1798 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand"))
1799 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))))]
1802 rtx tmpreg = gen_reg_rtx (<VNARROWQ>mode);
1803 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1804 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1806 emit_insn (gen_trunc<mode><Vnarrowq>2 (tmpreg, operands[lo]));
1808 if (BYTES_BIG_ENDIAN)
1809 emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[0], tmpreg,
1812 emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[0], tmpreg,
1818 (define_insn "aarch64_shrn<mode>_insn_le"
1819 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1820 (vec_concat:<VNARROWQ2>
1821 (truncate:<VNARROWQ>
1822 (lshiftrt:VQN (match_operand:VQN 1 "register_operand" "w")
1823 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")))
1824 (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
1825 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1826 "shrn\\t%0.<Vntype>, %1.<Vtype>, %2"
1827 [(set_attr "type" "neon_shift_imm_narrow_q")]
1830 (define_insn "aarch64_shrn<mode>_insn_be"
1831 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1832 (vec_concat:<VNARROWQ2>
1833 (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
1834 (truncate:<VNARROWQ>
1835 (lshiftrt:VQN (match_operand:VQN 1 "register_operand" "w")
1836 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
1837 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1838 "shrn\\t%0.<Vntype>, %1.<Vtype>, %2"
1839 [(set_attr "type" "neon_shift_imm_narrow_q")]
1842 (define_insn "*aarch64_<srn_op>shrn<mode>_vect"
1843 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1844 (truncate:<VNARROWQ>
1845 (SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
1846 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
1848 "shrn\\t%0.<Vntype>, %1.<Vtype>, %2"
1849 [(set_attr "type" "neon_shift_imm_narrow_q")]
1852 (define_insn "*aarch64_<srn_op>shrn<mode>2_vect_le"
1853 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1854 (vec_concat:<VNARROWQ2>
1855 (match_operand:<VNARROWQ> 1 "register_operand" "0")
1856 (truncate:<VNARROWQ>
1857 (SHIFTRT:VQN (match_operand:VQN 2 "register_operand" "w")
1858 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
1859 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1860 "shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
1861 [(set_attr "type" "neon_shift_imm_narrow_q")]
1864 (define_insn "*aarch64_<srn_op>shrn<mode>2_vect_be"
1865 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1866 (vec_concat:<VNARROWQ2>
1867 (truncate:<VNARROWQ>
1868 (SHIFTRT:VQN (match_operand:VQN 2 "register_operand" "w")
1869 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))
1870 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
1871 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1872 "shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
1873 [(set_attr "type" "neon_shift_imm_narrow_q")]
1876 (define_insn "*aarch64_<srn_op>topbits_shuffle<mode>_le"
1877 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1878 (vec_concat:<VNARROWQ2>
1879 (truncate:<VNARROWQ>
1880 (SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
1881 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_exact_top")))
1882 (truncate:<VNARROWQ>
1883 (SHIFTRT:VQN (match_operand:VQN 3 "register_operand" "w")
1885 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1886 "uzp2\\t%0.<V2ntype>, %1.<V2ntype>, %3.<V2ntype>"
1887 [(set_attr "type" "neon_permute<q>")]
1890 (define_insn "*aarch64_<srn_op>topbits_shuffle<mode>_be"
1891 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1892 (vec_concat:<VNARROWQ2>
1893 (truncate:<VNARROWQ>
1894 (SHIFTRT:VQN (match_operand:VQN 3 "register_operand" "w")
1895 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_exact_top")))
1896 (truncate:<VNARROWQ>
1897 (SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
1899 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1900 "uzp2\\t%0.<V2ntype>, %1.<V2ntype>, %3.<V2ntype>"
1901 [(set_attr "type" "neon_permute<q>")]
1904 (define_expand "aarch64_shrn<mode>"
1905 [(set (match_operand:<VNARROWQ> 0 "register_operand")
1906 (truncate:<VNARROWQ>
1907 (lshiftrt:VQN (match_operand:VQN 1 "register_operand")
1908 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>"))))]
1911 operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1912 INTVAL (operands[2]));
1913 rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
1914 if (BYTES_BIG_ENDIAN)
1915 emit_insn (gen_aarch64_shrn<mode>_insn_be (tmp, operands[1],
1916 operands[2], CONST0_RTX (<VNARROWQ>mode)));
1918 emit_insn (gen_aarch64_shrn<mode>_insn_le (tmp, operands[1],
1919 operands[2], CONST0_RTX (<VNARROWQ>mode)));
1921 /* The intrinsic expects a narrow result, so emit a subreg that will get
1922 optimized away as appropriate. */
1923 emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
1929 (define_insn "aarch64_rshrn<mode>_insn_le"
1930 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1931 (vec_concat:<VNARROWQ2>
1932 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
1933 (match_operand:VQN 2
1934 "aarch64_simd_shift_imm_vec_<vn_mode>")] UNSPEC_RSHRN)
1935 (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
1936 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1937 "rshrn\\t%0.<Vntype>, %1.<Vtype>, %2"
1938 [(set_attr "type" "neon_shift_imm_narrow_q")]
1941 (define_insn "aarch64_rshrn<mode>_insn_be"
1942 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1943 (vec_concat:<VNARROWQ2>
1944 (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
1945 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
1946 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
1948 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1949 "rshrn\\t%0.<Vntype>, %1.<Vtype>, %2"
1950 [(set_attr "type" "neon_shift_imm_narrow_q")]
1953 (define_expand "aarch64_rshrn<mode>"
1954 [(match_operand:<VNARROWQ> 0 "register_operand")
1955 (match_operand:VQN 1 "register_operand")
1956 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<vn_mode>")]
1959 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ>mode))
1961 rtx tmp0 = aarch64_gen_shareable_zero (<MODE>mode);
1962 emit_insn (gen_aarch64_raddhn<mode> (operands[0], operands[1], tmp0));
1966 rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
1967 operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1968 INTVAL (operands[2]));
1969 if (BYTES_BIG_ENDIAN)
1971 gen_aarch64_rshrn<mode>_insn_be (tmp, operands[1],
1973 CONST0_RTX (<VNARROWQ>mode)));
1976 gen_aarch64_rshrn<mode>_insn_le (tmp, operands[1],
1978 CONST0_RTX (<VNARROWQ>mode)));
1980 /* The intrinsic expects a narrow result, so emit a subreg that will
1981 get optimized away as appropriate. */
1982 emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
1989 (define_insn "aarch64_shrn2<mode>_insn_le"
1990 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1991 (vec_concat:<VNARROWQ2>
1992 (match_operand:<VNARROWQ> 1 "register_operand" "0")
1993 (truncate:<VNARROWQ>
1994 (lshiftrt:VQN (match_operand:VQN 2 "register_operand" "w")
1995 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
1996 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1997 "shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
1998 [(set_attr "type" "neon_shift_imm_narrow_q")]
2001 (define_insn "aarch64_shrn2<mode>_insn_be"
2002 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
2003 (vec_concat:<VNARROWQ2>
2004 (truncate:<VNARROWQ>
2005 (lshiftrt:VQN (match_operand:VQN 2 "register_operand" "w")
2006 (match_operand:VQN 3
2007 "aarch64_simd_shift_imm_vec_<vn_mode>")))
2008 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
2009 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2010 "shrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
2011 [(set_attr "type" "neon_shift_imm_narrow_q")]
2014 (define_expand "aarch64_shrn2<mode>"
2015 [(match_operand:<VNARROWQ2> 0 "register_operand")
2016 (match_operand:<VNARROWQ> 1 "register_operand")
2017 (match_operand:VQN 2 "register_operand")
2018 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
2021 operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
2022 INTVAL (operands[3]));
2023 if (BYTES_BIG_ENDIAN)
2024 emit_insn (gen_aarch64_shrn2<mode>_insn_be (operands[0], operands[1],
2025 operands[2], operands[3]));
2027 emit_insn (gen_aarch64_shrn2<mode>_insn_le (operands[0], operands[1],
2028 operands[2], operands[3]));
2033 (define_insn "aarch64_rshrn2<mode>_insn_le"
2034 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
2035 (vec_concat:<VNARROWQ2>
2036 (match_operand:<VNARROWQ> 1 "register_operand" "0")
2037 (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
2038 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")]
2040 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2041 "rshrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
2042 [(set_attr "type" "neon_shift_imm_narrow_q")]
2045 (define_insn "aarch64_rshrn2<mode>_insn_be"
2046 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
2047 (vec_concat:<VNARROWQ2>
2048 (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
2049 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")]
2051 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
2052 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2053 "rshrn2\\t%0.<V2ntype>, %2.<Vtype>, %3"
2054 [(set_attr "type" "neon_shift_imm_narrow_q")]
2057 (define_expand "aarch64_rshrn2<mode>"
2058 [(match_operand:<VNARROWQ2> 0 "register_operand")
2059 (match_operand:<VNARROWQ> 1 "register_operand")
2060 (match_operand:VQN 2 "register_operand")
2061 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
2064 if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ2>mode))
2066 rtx tmp = aarch64_gen_shareable_zero (<MODE>mode);
2067 emit_insn (gen_aarch64_raddhn2<mode> (operands[0], operands[1],
2072 operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
2073 INTVAL (operands[3]));
2074 if (BYTES_BIG_ENDIAN)
2075 emit_insn (gen_aarch64_rshrn2<mode>_insn_be (operands[0],
2080 emit_insn (gen_aarch64_rshrn2<mode>_insn_le (operands[0],
2089 ;; Widening operations.
2091 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
2092 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2093 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2094 (match_operand:VQW 1 "register_operand" "w")
2095 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
2098 "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
2099 [(set_attr "type" "neon_shift_imm_long")]
2102 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
2103 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2104 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2105 (match_operand:VQW 1 "register_operand" "w")
2106 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
2109 "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
2110 [(set_attr "type" "neon_shift_imm_long")]
2113 (define_expand "vec_unpack<su>_hi_<mode>"
2114 [(match_operand:<VWIDE> 0 "register_operand")
2115 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
2118 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2119 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
2125 (define_expand "vec_unpack<su>_lo_<mode>"
2126 [(match_operand:<VWIDE> 0 "register_operand")
2127 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
2130 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2131 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
2137 ;; Widening arithmetic.
2139 (define_insn "*aarch64_<su>mlal_lo<mode>"
2140 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2143 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2144 (match_operand:VQW 2 "register_operand" "w")
2145 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2146 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2147 (match_operand:VQW 4 "register_operand" "w")
2149 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2151 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
2152 [(set_attr "type" "neon_mla_<Vetype>_long")]
2155 (define_insn "aarch64_<su>mlal_hi<mode>_insn"
2156 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2159 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2160 (match_operand:VQW 2 "register_operand" "w")
2161 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2162 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2163 (match_operand:VQW 4 "register_operand" "w")
2165 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2167 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
2168 [(set_attr "type" "neon_mla_<Vetype>_long")]
2171 (define_expand "aarch64_<su>mlal_hi<mode>"
2172 [(match_operand:<VWIDE> 0 "register_operand")
2173 (match_operand:<VWIDE> 1 "register_operand")
2174 (ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand"))
2175 (match_operand:VQW 3 "register_operand")]
2178 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2179 emit_insn (gen_aarch64_<su>mlal_hi<mode>_insn (operands[0], operands[1],
2180 operands[2], p, operands[3]));
2185 (define_insn "aarch64_<su>mlal_hi_n<mode>_insn"
2186 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2191 (match_operand:VQ_HSI 2 "register_operand" "w")
2192 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2193 (vec_duplicate:<VWIDE>
2194 (ANY_EXTEND:<VWIDE_S>
2195 (match_operand:<VEL> 4 "register_operand" "<h_con>"))))
2196 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2198 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
2199 [(set_attr "type" "neon_mla_<Vetype>_long")]
2202 (define_expand "aarch64_<su>mlal_hi_n<mode>"
2203 [(match_operand:<VWIDE> 0 "register_operand")
2204 (match_operand:<VWIDE> 1 "register_operand")
2205 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2206 (match_operand:<VEL> 3 "register_operand")]
2209 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2210 emit_insn (gen_aarch64_<su>mlal_hi_n<mode>_insn (operands[0],
2211 operands[1], operands[2], p, operands[3]));
2216 (define_insn "*aarch64_<su>mlsl_lo<mode>"
2217 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2219 (match_operand:<VWIDE> 1 "register_operand" "0")
2221 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2222 (match_operand:VQW 2 "register_operand" "w")
2223 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2224 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2225 (match_operand:VQW 4 "register_operand" "w")
2228 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
2229 [(set_attr "type" "neon_mla_<Vetype>_long")]
2232 (define_insn "aarch64_<su>mlsl_hi<mode>_insn"
2233 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2235 (match_operand:<VWIDE> 1 "register_operand" "0")
2237 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2238 (match_operand:VQW 2 "register_operand" "w")
2239 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2240 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2241 (match_operand:VQW 4 "register_operand" "w")
2244 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
2245 [(set_attr "type" "neon_mla_<Vetype>_long")]
2248 (define_expand "aarch64_<su>mlsl_hi<mode>"
2249 [(match_operand:<VWIDE> 0 "register_operand")
2250 (match_operand:<VWIDE> 1 "register_operand")
2251 (ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand"))
2252 (match_operand:VQW 3 "register_operand")]
2255 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2256 emit_insn (gen_aarch64_<su>mlsl_hi<mode>_insn (operands[0], operands[1],
2257 operands[2], p, operands[3]));
2262 (define_insn "aarch64_<su>mlsl_hi_n<mode>_insn"
2263 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2265 (match_operand:<VWIDE> 1 "register_operand" "0")
2269 (match_operand:VQ_HSI 2 "register_operand" "w")
2270 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2271 (vec_duplicate:<VWIDE>
2272 (ANY_EXTEND:<VWIDE_S>
2273 (match_operand:<VEL> 4 "register_operand" "<h_con>"))))))]
2275 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
2276 [(set_attr "type" "neon_mla_<Vetype>_long")]
2279 (define_expand "aarch64_<su>mlsl_hi_n<mode>"
2280 [(match_operand:<VWIDE> 0 "register_operand")
2281 (match_operand:<VWIDE> 1 "register_operand")
2282 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2283 (match_operand:<VEL> 3 "register_operand")]
2286 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2287 emit_insn (gen_aarch64_<su>mlsl_hi_n<mode>_insn (operands[0],
2288 operands[1], operands[2], p, operands[3]));
2293 (define_insn "aarch64_<su>mlal<mode>"
2294 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2298 (match_operand:VD_BHSI 2 "register_operand" "w"))
2300 (match_operand:VD_BHSI 3 "register_operand" "w")))
2301 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2303 "<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
2304 [(set_attr "type" "neon_mla_<Vetype>_long")]
2307 (define_insn "aarch64_<su>mlal_n<mode>"
2308 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2312 (match_operand:VD_HSI 2 "register_operand" "w"))
2313 (vec_duplicate:<VWIDE>
2314 (ANY_EXTEND:<VWIDE_S>
2315 (match_operand:<VEL> 3 "register_operand" "<h_con>"))))
2316 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2318 "<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
2319 [(set_attr "type" "neon_mla_<Vetype>_long")]
2322 (define_insn "aarch64_<su>mlsl<mode>"
2323 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2325 (match_operand:<VWIDE> 1 "register_operand" "0")
2328 (match_operand:VD_BHSI 2 "register_operand" "w"))
2330 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
2332 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
2333 [(set_attr "type" "neon_mla_<Vetype>_long")]
2336 (define_insn "aarch64_<su>mlsl_n<mode>"
2337 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2339 (match_operand:<VWIDE> 1 "register_operand" "0")
2342 (match_operand:VD_HSI 2 "register_operand" "w"))
2343 (vec_duplicate:<VWIDE>
2344 (ANY_EXTEND:<VWIDE_S>
2345 (match_operand:<VEL> 3 "register_operand" "<h_con>"))))))]
2347 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
2348 [(set_attr "type" "neon_mla_<Vetype>_long")]
2351 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
2352 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2353 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2354 (match_operand:VQW 1 "register_operand" "w")
2355 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2356 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2357 (match_operand:VQW 2 "register_operand" "w")
2360 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2361 [(set_attr "type" "neon_mul_<Vetype>_long")]
2364 (define_insn "aarch64_intrinsic_vec_<su>mult_lo_<mode>"
2365 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2366 (mult:<VWIDE> (ANY_EXTEND:<VWIDE>
2367 (match_operand:VD_BHSI 1 "register_operand" "w"))
2369 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2371 "<su>mull\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2372 [(set_attr "type" "neon_mul_<Vetype>_long")]
2375 (define_expand "vec_widen_<su>mult_lo_<mode>"
2376 [(match_operand:<VWIDE> 0 "register_operand")
2377 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
2378 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
2381 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2382 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
2389 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
2390 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2391 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2392 (match_operand:VQW 1 "register_operand" "w")
2393 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2394 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2395 (match_operand:VQW 2 "register_operand" "w")
2398 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2399 [(set_attr "type" "neon_mul_<Vetype>_long")]
2402 (define_expand "vec_widen_<su>mult_hi_<mode>"
2403 [(match_operand:<VWIDE> 0 "register_operand")
2404 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
2405 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
2408 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2409 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
2417 ;; vmull_lane_s16 intrinsics
2418 (define_insn "aarch64_vec_<su>mult_lane<Qlane>"
2419 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2422 (match_operand:<VCOND> 1 "register_operand" "w"))
2423 (vec_duplicate:<VWIDE>
2424 (ANY_EXTEND:<VWIDE_S>
2426 (match_operand:VDQHS 2 "register_operand" "<vwx>")
2427 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))]
2430 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
2431 return "<su>mull\\t%0.<Vwtype>, %1.<Vcondtype>, %2.<Vetype>[%3]";
2433 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2436 (define_insn "aarch64_<su>mull_hi_lane<mode>_insn"
2437 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2441 (match_operand:VQ_HSI 1 "register_operand" "w")
2442 (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
2443 (vec_duplicate:<VWIDE>
2444 (ANY_EXTEND:<VWIDE_S>
2446 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
2447 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
2450 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
2451 return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %3.<Vetype>[%4]";
2453 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2456 (define_expand "aarch64_<su>mull_hi_lane<mode>"
2457 [(match_operand:<VWIDE> 0 "register_operand")
2458 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 1 "register_operand"))
2459 (match_operand:<VCOND> 2 "register_operand")
2460 (match_operand:SI 3 "immediate_operand")]
2463 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2464 emit_insn (gen_aarch64_<su>mull_hi_lane<mode>_insn (operands[0],
2465 operands[1], p, operands[2], operands[3]));
2470 (define_insn "aarch64_<su>mull_hi_laneq<mode>_insn"
2471 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2475 (match_operand:VQ_HSI 1 "register_operand" "w")
2476 (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
2477 (vec_duplicate:<VWIDE>
2478 (ANY_EXTEND:<VWIDE_S>
2480 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
2481 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
2484 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
2485 return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %3.<Vetype>[%4]";
2487 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2490 (define_expand "aarch64_<su>mull_hi_laneq<mode>"
2491 [(match_operand:<VWIDE> 0 "register_operand")
2492 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 1 "register_operand"))
2493 (match_operand:<VCONQ> 2 "register_operand")
2494 (match_operand:SI 3 "immediate_operand")]
2497 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2498 emit_insn (gen_aarch64_<su>mull_hi_laneq<mode>_insn (operands[0],
2499 operands[1], p, operands[2], operands[3]));
2504 (define_insn "aarch64_<su>mull_n<mode>"
2505 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2508 (match_operand:VD_HSI 1 "register_operand" "w"))
2509 (vec_duplicate:<VWIDE>
2510 (ANY_EXTEND:<VWIDE_S>
2511 (match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
2513 "<su>mull\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
2514 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2517 (define_insn "aarch64_<su>mull_hi_n<mode>_insn"
2518 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2522 (match_operand:VQ_HSI 1 "register_operand" "w")
2523 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2524 (vec_duplicate:<VWIDE>
2525 (ANY_EXTEND:<VWIDE_S>
2526 (match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
2528 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
2529 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2532 (define_expand "aarch64_<su>mull_hi_n<mode>"
2533 [(match_operand:<VWIDE> 0 "register_operand")
2534 (ANY_EXTEND:<VWIDE> (match_operand:VQ_HSI 1 "register_operand"))
2535 (match_operand:<VEL> 2 "register_operand")]
2538 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2539 emit_insn (gen_aarch64_<su>mull_hi_n<mode>_insn (operands[0], operands[1],
2545 ;; vmlal_lane_s16 intrinsics
2546 (define_insn "aarch64_vec_<su>mlal_lane<Qlane>"
2547 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2551 (match_operand:<VCOND> 2 "register_operand" "w"))
2552 (vec_duplicate:<VWIDE>
2553 (ANY_EXTEND:<VWIDE_S>
2555 (match_operand:VDQHS 3 "register_operand" "<vwx>")
2556 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))
2557 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2560 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
2561 return "<su>mlal\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
2563 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2566 (define_insn "aarch64_<su>mlal_hi_lane<mode>_insn"
2567 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2572 (match_operand:VQ_HSI 2 "register_operand" "w")
2573 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2574 (vec_duplicate:<VWIDE>
2575 (ANY_EXTEND:<VWIDE_S>
2577 (match_operand:<VCOND> 4 "register_operand" "<vwx>")
2578 (parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
2579 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2582 operands[5] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[5]));
2583 return "<su>mlal2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2585 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2588 (define_expand "aarch64_<su>mlal_hi_lane<mode>"
2589 [(match_operand:<VWIDE> 0 "register_operand")
2590 (match_operand:<VWIDE> 1 "register_operand")
2591 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2592 (match_operand:<VCOND> 3 "register_operand")
2593 (match_operand:SI 4 "immediate_operand")]
2596 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2597 emit_insn (gen_aarch64_<su>mlal_hi_lane<mode>_insn (operands[0],
2598 operands[1], operands[2], p, operands[3], operands[4]));
2603 (define_insn "aarch64_<su>mlal_hi_laneq<mode>_insn"
2604 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2609 (match_operand:VQ_HSI 2 "register_operand" "w")
2610 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2611 (vec_duplicate:<VWIDE>
2612 (ANY_EXTEND:<VWIDE_S>
2614 (match_operand:<VCONQ> 4 "register_operand" "<vwx>")
2615 (parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
2616 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2619 operands[5] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[5]));
2620 return "<su>mlal2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2622 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2625 (define_expand "aarch64_<su>mlal_hi_laneq<mode>"
2626 [(match_operand:<VWIDE> 0 "register_operand")
2627 (match_operand:<VWIDE> 1 "register_operand")
2628 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2629 (match_operand:<VCONQ> 3 "register_operand")
2630 (match_operand:SI 4 "immediate_operand")]
2633 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2634 emit_insn (gen_aarch64_<su>mlal_hi_laneq<mode>_insn (operands[0],
2635 operands[1], operands[2], p, operands[3], operands[4]));
2640 (define_insn "aarch64_vec_<su>mlsl_lane<Qlane>"
2641 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2643 (match_operand:<VWIDE> 1 "register_operand" "0")
2646 (match_operand:<VCOND> 2 "register_operand" "w"))
2647 (vec_duplicate:<VWIDE>
2648 (ANY_EXTEND:<VWIDE_S>
2650 (match_operand:VDQHS 3 "register_operand" "<vwx>")
2651 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))))]
2654 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
2655 return "<su>mlsl\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
2657 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2660 (define_insn "aarch64_<su>mlsl_hi_lane<mode>_insn"
2661 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2663 (match_operand:<VWIDE> 1 "register_operand" "0")
2667 (match_operand:VQ_HSI 2 "register_operand" "w")
2668 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2669 (vec_duplicate:<VWIDE>
2670 (ANY_EXTEND:<VWIDE_S>
2672 (match_operand:<VCOND> 4 "register_operand" "<vwx>")
2673 (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
2677 operands[5] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[5]));
2678 return "<su>mlsl2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2680 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2683 (define_expand "aarch64_<su>mlsl_hi_lane<mode>"
2684 [(match_operand:<VWIDE> 0 "register_operand")
2685 (match_operand:<VWIDE> 1 "register_operand")
2686 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2687 (match_operand:<VCOND> 3 "register_operand")
2688 (match_operand:SI 4 "immediate_operand")]
2691 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2692 emit_insn (gen_aarch64_<su>mlsl_hi_lane<mode>_insn (operands[0],
2693 operands[1], operands[2], p, operands[3], operands[4]));
2698 (define_insn "aarch64_<su>mlsl_hi_laneq<mode>_insn"
2699 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2701 (match_operand:<VWIDE> 1 "register_operand" "0")
2705 (match_operand:VQ_HSI 2 "register_operand" "w")
2706 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2707 (vec_duplicate:<VWIDE>
2708 (ANY_EXTEND:<VWIDE_S>
2710 (match_operand:<VCONQ> 4 "register_operand" "<vwx>")
2711 (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
2715 operands[5] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[5]));
2716 return "<su>mlsl2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2718 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2721 (define_expand "aarch64_<su>mlsl_hi_laneq<mode>"
2722 [(match_operand:<VWIDE> 0 "register_operand")
2723 (match_operand:<VWIDE> 1 "register_operand")
2724 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2725 (match_operand:<VCONQ> 3 "register_operand")
2726 (match_operand:SI 4 "immediate_operand")]
2729 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2730 emit_insn (gen_aarch64_<su>mlsl_hi_laneq<mode>_insn (operands[0],
2731 operands[1], operands[2], p, operands[3], operands[4]));
2736 ;; FP vector operations.
2737 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
2738 ;; double-precision (64-bit) floating-point data types and arithmetic as
2739 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
2740 ;; without the need for -ffast-math or -funsafe-math-optimizations.
2742 ;; Floating-point operations can raise an exception. Vectorizing such
2743 ;; operations are safe because of reasons explained below.
2745 ;; ARMv8 permits an extension to enable trapped floating-point
2746 ;; exception handling, however this is an optional feature. In the
2747 ;; event of a floating-point exception being raised by vectorised
2749 ;; 1. If trapped floating-point exceptions are available, then a trap
2750 ;; will be taken when any lane raises an enabled exception. A trap
2751 ;; handler may determine which lane raised the exception.
2752 ;; 2. Alternatively a sticky exception flag is set in the
2753 ;; floating-point status register (FPSR). Software may explicitly
2754 ;; test the exception flags, in which case the tests will either
2755 ;; prevent vectorisation, allowing precise identification of the
2756 ;; failing operation, or if tested outside of vectorisable regions
2757 ;; then the specific operation and lane are not of interest.
2759 ;; FP arithmetic operations.
2761 (define_insn "add<mode>3"
2762 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2763 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2764 (match_operand:VHSDF 2 "register_operand" "w")))]
2766 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2767 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
2770 (define_insn "sub<mode>3"
2771 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2772 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2773 (match_operand:VHSDF 2 "register_operand" "w")))]
2775 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2776 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
2779 (define_insn "mul<mode>3"
2780 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2781 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2782 (match_operand:VHSDF 2 "register_operand" "w")))]
2784 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2785 [(set_attr "type" "neon_fp_mul_<stype><q>")]
2788 (define_expand "div<mode>3"
2789 [(set (match_operand:VHSDF 0 "register_operand")
2790 (div:VHSDF (match_operand:VHSDF 1 "register_operand")
2791 (match_operand:VHSDF 2 "register_operand")))]
2794 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
2797 operands[1] = force_reg (<MODE>mode, operands[1]);
2800 (define_insn "*div<mode>3"
2801 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2802 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2803 (match_operand:VHSDF 2 "register_operand" "w")))]
2805 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2806 [(set_attr "type" "neon_fp_div_<stype><q>")]
2809 (define_insn "neg<mode>2"
2810 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2811 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2813 "fneg\\t%0.<Vtype>, %1.<Vtype>"
2814 [(set_attr "type" "neon_fp_neg_<stype><q>")]
2817 (define_insn "abs<mode>2"
2818 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2819 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2821 "fabs\\t%0.<Vtype>, %1.<Vtype>"
2822 [(set_attr "type" "neon_fp_abs_<stype><q>")]
2825 (define_expand "aarch64_float_mla<mode>"
2826 [(set (match_operand:VDQF_DF 0 "register_operand")
2829 (match_operand:VDQF_DF 2 "register_operand")
2830 (match_operand:VDQF_DF 3 "register_operand"))
2831 (match_operand:VDQF_DF 1 "register_operand")))]
2834 rtx scratch = gen_reg_rtx (<MODE>mode);
2835 emit_insn (gen_mul<mode>3 (scratch, operands[2], operands[3]));
2836 emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2841 (define_expand "aarch64_float_mls<mode>"
2842 [(set (match_operand:VDQF_DF 0 "register_operand")
2844 (match_operand:VDQF_DF 1 "register_operand")
2846 (match_operand:VDQF_DF 2 "register_operand")
2847 (match_operand:VDQF_DF 3 "register_operand"))))]
2850 rtx scratch = gen_reg_rtx (<MODE>mode);
2851 emit_insn (gen_mul<mode>3 (scratch, operands[2], operands[3]));
2852 emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2857 (define_expand "aarch64_float_mla_n<mode>"
2858 [(set (match_operand:VDQSF 0 "register_operand")
2861 (vec_duplicate:VDQSF
2862 (match_operand:<VEL> 3 "register_operand"))
2863 (match_operand:VDQSF 2 "register_operand"))
2864 (match_operand:VDQSF 1 "register_operand")))]
2867 rtx scratch = gen_reg_rtx (<MODE>mode);
2868 emit_insn (gen_mul_n<mode>3 (scratch, operands[2], operands[3]));
2869 emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2874 (define_expand "aarch64_float_mls_n<mode>"
2875 [(set (match_operand:VDQSF 0 "register_operand")
2877 (match_operand:VDQSF 1 "register_operand")
2879 (vec_duplicate:VDQSF
2880 (match_operand:<VEL> 3 "register_operand"))
2881 (match_operand:VDQSF 2 "register_operand"))))]
2884 rtx scratch = gen_reg_rtx (<MODE>mode);
2885 emit_insn (gen_mul_n<mode>3 (scratch, operands[2], operands[3]));
2886 emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2891 (define_expand "aarch64_float_mla_lane<mode>"
2892 [(set (match_operand:VDQSF 0 "register_operand")
2895 (vec_duplicate:VDQSF
2897 (match_operand:V2SF 3 "register_operand")
2898 (parallel [(match_operand:SI 4 "immediate_operand")])))
2899 (match_operand:VDQSF 2 "register_operand"))
2900 (match_operand:VDQSF 1 "register_operand")))]
2903 rtx scratch = gen_reg_rtx (<MODE>mode);
2904 emit_insn (gen_mul_lane<mode>3 (scratch, operands[2],
2905 operands[3], operands[4]));
2906 emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2911 (define_expand "aarch64_float_mls_lane<mode>"
2912 [(set (match_operand:VDQSF 0 "register_operand")
2914 (match_operand:VDQSF 1 "register_operand")
2916 (vec_duplicate:VDQSF
2918 (match_operand:V2SF 3 "register_operand")
2919 (parallel [(match_operand:SI 4 "immediate_operand")])))
2920 (match_operand:VDQSF 2 "register_operand"))))]
2923 rtx scratch = gen_reg_rtx (<MODE>mode);
2924 emit_insn (gen_mul_lane<mode>3 (scratch, operands[2],
2925 operands[3], operands[4]));
2926 emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2931 (define_expand "aarch64_float_mla_laneq<mode>"
2932 [(set (match_operand:VDQSF 0 "register_operand")
2935 (vec_duplicate:VDQSF
2937 (match_operand:V4SF 3 "register_operand")
2938 (parallel [(match_operand:SI 4 "immediate_operand")])))
2939 (match_operand:VDQSF 2 "register_operand"))
2940 (match_operand:VDQSF 1 "register_operand")))]
2943 rtx scratch = gen_reg_rtx (<MODE>mode);
2944 emit_insn (gen_mul_laneq<mode>3 (scratch, operands[2],
2945 operands[3], operands[4]));
2946 emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2951 (define_expand "aarch64_float_mls_laneq<mode>"
2952 [(set (match_operand:VDQSF 0 "register_operand")
2954 (match_operand:VDQSF 1 "register_operand")
2956 (vec_duplicate:VDQSF
2958 (match_operand:V4SF 3 "register_operand")
2959 (parallel [(match_operand:SI 4 "immediate_operand")])))
2960 (match_operand:VDQSF 2 "register_operand"))))]
2963 rtx scratch = gen_reg_rtx (<MODE>mode);
2964 emit_insn (gen_mul_laneq<mode>3 (scratch, operands[2],
2965 operands[3], operands[4]));
2966 emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2971 (define_insn "fma<mode>4"
2972 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2973 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2974 (match_operand:VHSDF 2 "register_operand" "w")
2975 (match_operand:VHSDF 3 "register_operand" "0")))]
2977 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2978 [(set_attr "type" "neon_fp_mla_<stype><q>")]
2981 (define_insn "*aarch64_fma4_elt<mode>"
2982 [(set (match_operand:VDQF 0 "register_operand" "=w")
2986 (match_operand:VDQF 1 "register_operand" "<h_con>")
2987 (parallel [(match_operand:SI 2 "immediate_operand")])))
2988 (match_operand:VDQF 3 "register_operand" "w")
2989 (match_operand:VDQF 4 "register_operand" "0")))]
2992 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2993 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2995 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2998 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
2999 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3001 (vec_duplicate:VDQSF
3003 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
3004 (parallel [(match_operand:SI 2 "immediate_operand")])))
3005 (match_operand:VDQSF 3 "register_operand" "w")
3006 (match_operand:VDQSF 4 "register_operand" "0")))]
3009 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
3010 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
3012 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
3015 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
3016 [(set (match_operand:VMUL 0 "register_operand" "=w")
3019 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
3020 (match_operand:VMUL 2 "register_operand" "w")
3021 (match_operand:VMUL 3 "register_operand" "0")))]
3023 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
3024 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
3027 (define_insn "*aarch64_fma4_elt_to_64v2df"
3028 [(set (match_operand:DF 0 "register_operand" "=w")
3031 (match_operand:V2DF 1 "register_operand" "w")
3032 (parallel [(match_operand:SI 2 "immediate_operand")]))
3033 (match_operand:DF 3 "register_operand" "w")
3034 (match_operand:DF 4 "register_operand" "0")))]
3037 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
3038 return "fmla\\t%0.2d, %3.2d, %1.d[%2]";
3040 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
3043 (define_insn "fnma<mode>4"
3044 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3046 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
3047 (match_operand:VHSDF 2 "register_operand" "w")
3048 (match_operand:VHSDF 3 "register_operand" "0")))]
3050 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3051 [(set_attr "type" "neon_fp_mla_<stype><q>")]
3054 (define_insn "*aarch64_fnma4_elt<mode>"
3055 [(set (match_operand:VDQF 0 "register_operand" "=w")
3058 (match_operand:VDQF 3 "register_operand" "w"))
3061 (match_operand:VDQF 1 "register_operand" "<h_con>")
3062 (parallel [(match_operand:SI 2 "immediate_operand")])))
3063 (match_operand:VDQF 4 "register_operand" "0")))]
3066 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3067 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
3069 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
3072 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
3073 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3076 (match_operand:VDQSF 3 "register_operand" "w"))
3077 (vec_duplicate:VDQSF
3079 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
3080 (parallel [(match_operand:SI 2 "immediate_operand")])))
3081 (match_operand:VDQSF 4 "register_operand" "0")))]
3084 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
3085 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
3087 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
3090 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
3091 [(set (match_operand:VMUL 0 "register_operand" "=w")
3094 (match_operand:VMUL 2 "register_operand" "w"))
3096 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
3097 (match_operand:VMUL 3 "register_operand" "0")))]
3099 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
3100 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
3103 (define_insn "*aarch64_fnma4_elt_to_64v2df"
3104 [(set (match_operand:DF 0 "register_operand" "=w")
3107 (match_operand:V2DF 1 "register_operand" "w")
3108 (parallel [(match_operand:SI 2 "immediate_operand")]))
3110 (match_operand:DF 3 "register_operand" "w"))
3111 (match_operand:DF 4 "register_operand" "0")))]
3114 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
3115 return "fmls\\t%0.2d, %3.2d, %1.d[%2]";
3117 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
3120 ;; Vector versions of the floating-point frint patterns.
3121 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
3122 (define_insn "<frint_pattern><mode>2"
3123 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3124 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
3127 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
3128 [(set_attr "type" "neon_fp_round_<stype><q>")]
3131 ;; Vector versions of the fcvt standard patterns.
3132 ;; Expands to lbtrunc, lround, lceil, lfloor
3133 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
3134 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
3135 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3136 [(match_operand:VHSDF 1 "register_operand" "w")]
3139 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
3140 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
3143 ;; HF Scalar variants of related SIMD instructions.
3144 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
3145 [(set (match_operand:HI 0 "register_operand" "=w")
3146 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
3148 "TARGET_SIMD_F16INST"
3149 "fcvt<frint_suffix><su>\t%h0, %h1"
3150 [(set_attr "type" "neon_fp_to_int_s")]
3153 (define_insn "<optab>_trunchfhi2"
3154 [(set (match_operand:HI 0 "register_operand" "=w")
3155 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
3156 "TARGET_SIMD_F16INST"
3157 "fcvtz<su>\t%h0, %h1"
3158 [(set_attr "type" "neon_fp_to_int_s")]
3161 (define_insn "<optab>hihf2"
3162 [(set (match_operand:HF 0 "register_operand" "=w")
3163 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
3164 "TARGET_SIMD_F16INST"
3165 "<su_optab>cvtf\t%h0, %h1"
3166 [(set_attr "type" "neon_int_to_fp_s")]
3169 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
3170 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
3171 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3173 (match_operand:VDQF 1 "register_operand" "w")
3174 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
3177 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
3178 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
3180 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
3182 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
3183 output_asm_insn (buf, operands);
3186 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
3189 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
3190 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
3191 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3192 [(match_operand:VHSDF 1 "register_operand")]
3197 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
3198 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
3199 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3200 [(match_operand:VHSDF 1 "register_operand")]
3205 (define_expand "ftrunc<VHSDF:mode>2"
3206 [(set (match_operand:VHSDF 0 "register_operand")
3207 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
3212 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
3213 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3215 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
3217 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
3218 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
3221 ;; Conversions between vectors of floats and doubles.
3222 ;; Contains a mix of patterns to match standard pattern names
3223 ;; and those for intrinsics.
3225 ;; Float widening operations.
3227 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
3228 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3229 (float_extend:<VWIDE> (vec_select:<VHALF>
3230 (match_operand:VQ_HSF 1 "register_operand" "w")
3231 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
3234 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
3235 [(set_attr "type" "neon_fp_cvt_widen_s")]
3238 ;; Convert between fixed-point and floating-point (vector modes)
3240 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
3241 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
3242 (unspec:<VHSDF:FCVT_TARGET>
3243 [(match_operand:VHSDF 1 "register_operand" "w")
3244 (match_operand:SI 2 "immediate_operand" "i")]
3247 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
3248 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
3251 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
3252 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
3253 (unspec:<VDQ_HSDI:FCVT_TARGET>
3254 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
3255 (match_operand:SI 2 "immediate_operand" "i")]
3258 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
3259 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
3262 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
3263 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
3264 ;; the meaning of HI and LO changes depending on the target endianness.
3265 ;; While elsewhere we map the higher numbered elements of a vector to
3266 ;; the lower architectural lanes of the vector, for these patterns we want
3267 ;; to always treat "hi" as referring to the higher architectural lanes.
3268 ;; Consequently, while the patterns below look inconsistent with our
3269 ;; other big-endian patterns their behavior is as required.
3271 (define_expand "vec_unpacks_lo_<mode>"
3272 [(match_operand:<VWIDE> 0 "register_operand")
3273 (match_operand:VQ_HSF 1 "register_operand")]
3276 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3277 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
3283 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
3284 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3285 (float_extend:<VWIDE> (vec_select:<VHALF>
3286 (match_operand:VQ_HSF 1 "register_operand" "w")
3287 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
3290 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
3291 [(set_attr "type" "neon_fp_cvt_widen_s")]
3294 (define_expand "vec_unpacks_hi_<mode>"
3295 [(match_operand:<VWIDE> 0 "register_operand")
3296 (match_operand:VQ_HSF 1 "register_operand")]
3299 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3300 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
3305 (define_insn "aarch64_float_extend_lo_<Vwide>"
3306 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3307 (float_extend:<VWIDE>
3308 (match_operand:VDF 1 "register_operand" "w")))]
3310 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
3311 [(set_attr "type" "neon_fp_cvt_widen_s")]
3314 ;; Float narrowing operations.
3316 (define_insn "aarch64_float_trunc_rodd_df"
3317 [(set (match_operand:SF 0 "register_operand" "=w")
3318 (unspec:SF [(match_operand:DF 1 "register_operand" "w")]
3322 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3325 (define_insn "aarch64_float_trunc_rodd_lo_v2sf"
3326 [(set (match_operand:V2SF 0 "register_operand" "=w")
3327 (unspec:V2SF [(match_operand:V2DF 1 "register_operand" "w")]
3330 "fcvtxn\\t%0.2s, %1.2d"
3331 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3334 (define_insn "aarch64_float_trunc_rodd_hi_v4sf_le"
3335 [(set (match_operand:V4SF 0 "register_operand" "=w")
3337 (match_operand:V2SF 1 "register_operand" "0")
3338 (unspec:V2SF [(match_operand:V2DF 2 "register_operand" "w")]
3340 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3341 "fcvtxn2\\t%0.4s, %2.2d"
3342 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3345 (define_insn "aarch64_float_trunc_rodd_hi_v4sf_be"
3346 [(set (match_operand:V4SF 0 "register_operand" "=w")
3348 (unspec:V2SF [(match_operand:V2DF 2 "register_operand" "w")]
3350 (match_operand:V2SF 1 "register_operand" "0")))]
3351 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3352 "fcvtxn2\\t%0.4s, %2.2d"
3353 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3356 (define_expand "aarch64_float_trunc_rodd_hi_v4sf"
3357 [(match_operand:V4SF 0 "register_operand")
3358 (match_operand:V2SF 1 "register_operand")
3359 (match_operand:V2DF 2 "register_operand")]
3362 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
3363 ? gen_aarch64_float_trunc_rodd_hi_v4sf_be
3364 : gen_aarch64_float_trunc_rodd_hi_v4sf_le;
3365 emit_insn (gen (operands[0], operands[1], operands[2]));
3370 (define_insn "aarch64_float_truncate_lo_<mode>"
3371 [(set (match_operand:VDF 0 "register_operand" "=w")
3373 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3375 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
3376 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3379 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
3380 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3382 (match_operand:VDF 1 "register_operand" "0")
3384 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
3385 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3386 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
3387 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3390 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
3391 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3394 (match_operand:<VWIDE> 2 "register_operand" "w"))
3395 (match_operand:VDF 1 "register_operand" "0")))]
3396 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3397 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
3398 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3401 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
3402 [(match_operand:<VDBL> 0 "register_operand")
3403 (match_operand:VDF 1 "register_operand")
3404 (match_operand:<VWIDE> 2 "register_operand")]
3407 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
3408 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
3409 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
3410 emit_insn (gen (operands[0], operands[1], operands[2]));
3415 (define_expand "vec_pack_trunc_v2df"
3416 [(set (match_operand:V4SF 0 "register_operand")
3418 (float_truncate:V2SF
3419 (match_operand:V2DF 1 "register_operand"))
3420 (float_truncate:V2SF
3421 (match_operand:V2DF 2 "register_operand"))
3425 rtx tmp = gen_reg_rtx (V2SFmode);
3426 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
3427 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
3429 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
3430 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
3431 tmp, operands[hi]));
3436 (define_expand "vec_pack_trunc_df"
3437 [(set (match_operand:V2SF 0 "register_operand")
3440 (match_operand:DF 1 "register_operand"))
3442 (match_operand:DF 2 "register_operand"))
3446 rtx tmp = gen_reg_rtx (V2SFmode);
3447 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
3448 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
3450 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
3451 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
3452 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
3458 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
3460 ;; a = (b < c) ? b : c;
3461 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
3462 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
3465 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
3466 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
3467 ;; operand will be returned when both operands are zero (i.e. they may not
3468 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
3469 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
3472 (define_insn "<su><maxmin><mode>3"
3473 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3474 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
3475 (match_operand:VHSDF 2 "register_operand" "w")))]
3477 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3478 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
3481 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
3482 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
3483 ;; which implement the IEEE fmax ()/fmin () functions.
3484 (define_insn "<fmaxmin><mode>3"
3485 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3486 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
3487 (match_operand:VHSDF 2 "register_operand" "w")]
3490 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3491 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
3494 ;; 'across lanes' add.
3496 (define_expand "reduc_plus_scal_<mode>"
3497 [(match_operand:<VEL> 0 "register_operand")
3498 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand")]
3502 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
3503 rtx scratch = gen_reg_rtx (<MODE>mode);
3504 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
3505 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
3510 (define_insn "aarch64_faddp<mode>"
3511 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3512 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
3513 (match_operand:VHSDF 2 "register_operand" "w")]
3516 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3517 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
3520 (define_insn "aarch64_reduc_plus_internal<mode>"
3521 [(set (match_operand:VDQV 0 "register_operand" "=w")
3522 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
3525 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
3526 [(set_attr "type" "neon_reduc_add<q>")]
3529 (define_insn "aarch64_<su>addlv<mode>"
3530 [(set (match_operand:<VWIDE_S> 0 "register_operand" "=w")
3531 (unspec:<VWIDE_S> [(match_operand:VDQV_L 1 "register_operand" "w")]
3534 "<su>addl<vp>\\t%<Vwstype>0<Vwsuf>, %1.<Vtype>"
3535 [(set_attr "type" "neon_reduc_add<q>")]
3538 (define_insn "aarch64_<su>addlp<mode>"
3539 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
3540 (unspec:<VDBLW> [(match_operand:VDQV_L 1 "register_operand" "w")]
3543 "<su>addlp\\t%0.<Vwhalf>, %1.<Vtype>"
3544 [(set_attr "type" "neon_reduc_add<q>")]
3547 ;; ADDV with result zero-extended to SI/DImode (for popcount).
3548 (define_insn "aarch64_zero_extend<GPI:mode>_reduc_plus_<VDQV_E:mode>"
3549 [(set (match_operand:GPI 0 "register_operand" "=w")
3551 (unspec:<VDQV_E:VEL> [(match_operand:VDQV_E 1 "register_operand" "w")]
3554 "add<VDQV_E:vp>\\t%<VDQV_E:Vetype>0, %1.<VDQV_E:Vtype>"
3555 [(set_attr "type" "neon_reduc_add<VDQV_E:q>")]
3558 (define_insn "aarch64_reduc_plus_internalv2si"
3559 [(set (match_operand:V2SI 0 "register_operand" "=w")
3560 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
3563 "addp\\t%0.2s, %1.2s, %1.2s"
3564 [(set_attr "type" "neon_reduc_add")]
3567 (define_insn "reduc_plus_scal_<mode>"
3568 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3569 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
3572 "faddp\\t%<Vetype>0, %1.<Vtype>"
3573 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
3576 (define_expand "reduc_plus_scal_v4sf"
3577 [(set (match_operand:SF 0 "register_operand")
3578 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
3582 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
3583 rtx scratch = gen_reg_rtx (V4SFmode);
3584 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
3585 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
3586 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
3590 (define_insn "clrsb<mode>2"
3591 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3592 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
3594 "cls\\t%0.<Vtype>, %1.<Vtype>"
3595 [(set_attr "type" "neon_cls<q>")]
3598 (define_insn "clz<mode>2"
3599 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3600 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
3602 "clz\\t%0.<Vtype>, %1.<Vtype>"
3603 [(set_attr "type" "neon_cls<q>")]
3606 (define_insn "popcount<mode>2"
3607 [(set (match_operand:VB 0 "register_operand" "=w")
3608 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
3610 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
3611 [(set_attr "type" "neon_cnt<q>")]
3614 ;; 'across lanes' max and min ops.
3616 ;; Template for outputting a scalar, so we can create __builtins which can be
3617 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
3618 (define_expand "reduc_<optab>_scal_<mode>"
3619 [(match_operand:<VEL> 0 "register_operand")
3620 (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
3624 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
3625 rtx scratch = gen_reg_rtx (<MODE>mode);
3626 emit_insn (gen_aarch64_reduc_<optab>_internal<mode> (scratch,
3628 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
3633 (define_expand "reduc_<fmaxmin>_scal_<mode>"
3634 [(match_operand:<VEL> 0 "register_operand")
3635 (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
3639 emit_insn (gen_reduc_<optab>_scal_<mode> (operands[0], operands[1]));
3644 ;; Likewise for integer cases, signed and unsigned.
3645 (define_expand "reduc_<optab>_scal_<mode>"
3646 [(match_operand:<VEL> 0 "register_operand")
3647 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
3651 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
3652 rtx scratch = gen_reg_rtx (<MODE>mode);
3653 emit_insn (gen_aarch64_reduc_<optab>_internal<mode> (scratch,
3655 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
3660 (define_insn "aarch64_reduc_<optab>_internal<mode>"
3661 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
3662 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
3665 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
3666 [(set_attr "type" "neon_reduc_minmax<q>")]
3669 (define_insn "aarch64_reduc_<optab>_internalv2si"
3670 [(set (match_operand:V2SI 0 "register_operand" "=w")
3671 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
3674 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
3675 [(set_attr "type" "neon_reduc_minmax")]
3678 (define_insn "aarch64_reduc_<optab>_internal<mode>"
3679 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3680 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
3683 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
3684 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
3687 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
3689 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
3692 ;; Thus our BSL is of the form:
3693 ;; op0 = bsl (mask, op2, op3)
3694 ;; We can use any of:
3697 ;; bsl mask, op1, op2
3698 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
3699 ;; bit op0, op2, mask
3700 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
3701 ;; bif op0, op1, mask
3703 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
3704 ;; Some forms of straight-line code may generate the equivalent form
3705 ;; in *aarch64_simd_bsl<mode>_alt.
3707 (define_insn "aarch64_simd_bsl<mode>_internal"
3708 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
3712 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
3713 (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
3714 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
3715 (match_dup:<V_INT_EQUIV> 3)
3719 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
3720 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
3721 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
3722 [(set_attr "type" "neon_bsl<q>")]
3725 ;; We need this form in addition to the above pattern to match the case
3726 ;; when combine tries merging three insns such that the second operand of
3727 ;; the outer XOR matches the second operand of the inner XOR rather than
3728 ;; the first. The two are equivalent but since recog doesn't try all
3729 ;; permutations of commutative operations, we have to have a separate pattern.
3731 (define_insn "*aarch64_simd_bsl<mode>_alt"
3732 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
3736 (match_operand:VDQ_I 3 "register_operand" "w,w,0")
3737 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
3738 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
3739 (match_dup:<V_INT_EQUIV> 2)))]
3742 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
3743 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
3744 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
3745 [(set_attr "type" "neon_bsl<q>")]
3748 ;; DImode is special, we want to avoid computing operations which are
3749 ;; more naturally computed in general purpose registers in the vector
3750 ;; registers. If we do that, we need to move all three operands from general
3751 ;; purpose registers to vector registers, then back again. However, we
3752 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
3753 ;; optimizations based on the component operations of a BSL.
3755 ;; That means we need a splitter back to the individual operations, if they
3756 ;; would be better calculated on the integer side.
3758 (define_insn_and_split "aarch64_simd_bsldi_internal"
3759 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
3763 (match_operand:DI 3 "register_operand" "w,0,w,r")
3764 (match_operand:DI 2 "register_operand" "w,w,0,r"))
3765 (match_operand:DI 1 "register_operand" "0,w,w,r"))
3770 bsl\\t%0.8b, %2.8b, %3.8b
3771 bit\\t%0.8b, %2.8b, %1.8b
3772 bif\\t%0.8b, %3.8b, %1.8b
3774 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
3775 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
3777 /* Split back to individual operations. If we're before reload, and
3778 able to create a temporary register, do so. If we're after reload,
3779 we've got an early-clobber destination register, so use that.
3780 Otherwise, we can't create pseudos and we can't yet guarantee that
3781 operands[0] is safe to write, so FAIL to split. */
3784 if (reload_completed)
3785 scratch = operands[0];
3786 else if (can_create_pseudo_p ())
3787 scratch = gen_reg_rtx (DImode);
3791 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
3792 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
3793 emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
3796 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
3797 (set_attr "length" "4,4,4,12")]
3800 (define_insn_and_split "aarch64_simd_bsldi_alt"
3801 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
3805 (match_operand:DI 3 "register_operand" "w,w,0,r")
3806 (match_operand:DI 2 "register_operand" "w,0,w,r"))
3807 (match_operand:DI 1 "register_operand" "0,w,w,r"))
3812 bsl\\t%0.8b, %3.8b, %2.8b
3813 bit\\t%0.8b, %3.8b, %1.8b
3814 bif\\t%0.8b, %2.8b, %1.8b
3816 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
3817 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
3819 /* Split back to individual operations. If we're before reload, and
3820 able to create a temporary register, do so. If we're after reload,
3821 we've got an early-clobber destination register, so use that.
3822 Otherwise, we can't create pseudos and we can't yet guarantee that
3823 operands[0] is safe to write, so FAIL to split. */
3826 if (reload_completed)
3827 scratch = operands[0];
3828 else if (can_create_pseudo_p ())
3829 scratch = gen_reg_rtx (DImode);
3833 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
3834 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
3835 emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
3838 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
3839 (set_attr "length" "4,4,4,12")]
3842 (define_expand "aarch64_simd_bsl<mode>"
3843 [(match_operand:VALLDIF 0 "register_operand")
3844 (match_operand:<V_INT_EQUIV> 1 "register_operand")
3845 (match_operand:VALLDIF 2 "register_operand")
3846 (match_operand:VALLDIF 3 "register_operand")]
3849 /* We can't alias operands together if they have different modes. */
3850 rtx tmp = operands[0];
3851 if (FLOAT_MODE_P (<MODE>mode))
3853 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
3854 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
3855 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
3857 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
3858 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
3862 if (tmp != operands[0])
3863 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
3868 (define_expand "vcond_mask_<mode><v_int_equiv>"
3869 [(match_operand:VALLDI 0 "register_operand")
3870 (match_operand:VALLDI 1 "nonmemory_operand")
3871 (match_operand:VALLDI 2 "nonmemory_operand")
3872 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
3875 /* If we have (a = (P) ? -1 : 0);
3876 Then we can simply move the generated mask (result must be int). */
3877 if (operands[1] == CONSTM1_RTX (<MODE>mode)
3878 && operands[2] == CONST0_RTX (<MODE>mode))
3879 emit_move_insn (operands[0], operands[3]);
3880 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
3881 else if (operands[1] == CONST0_RTX (<MODE>mode)
3882 && operands[2] == CONSTM1_RTX (<MODE>mode))
3883 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
3886 if (!REG_P (operands[1]))
3887 operands[1] = force_reg (<MODE>mode, operands[1]);
3888 if (!REG_P (operands[2]))
3889 operands[2] = force_reg (<MODE>mode, operands[2]);
3890 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
3891 operands[1], operands[2]));
3897 ;; Patterns comparing two vectors to produce a mask.
3899 (define_expand "vec_cmp<mode><mode>"
3900 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3901 (match_operator 1 "comparison_operator"
3902 [(match_operand:VSDQ_I_DI 2 "register_operand")
3903 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
3906 rtx mask = operands[0];
3907 enum rtx_code code = GET_CODE (operands[1]);
3917 if (operands[3] == CONST0_RTX (<MODE>mode))
3922 if (!REG_P (operands[3]))
3923 operands[3] = force_reg (<MODE>mode, operands[3]);
3931 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
3935 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
3939 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
3943 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
3947 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
3951 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
3955 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
3959 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
3963 /* Handle NE as !EQ. */
3964 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
3965 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
3969 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
3979 (define_expand "vec_cmp<mode><v_int_equiv>"
3980 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
3981 (match_operator 1 "comparison_operator"
3982 [(match_operand:VDQF 2 "register_operand")
3983 (match_operand:VDQF 3 "nonmemory_operand")]))]
3986 int use_zero_form = 0;
3987 enum rtx_code code = GET_CODE (operands[1]);
3988 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
3990 rtx (*comparison) (rtx, rtx, rtx) = NULL;
3999 if (operands[3] == CONST0_RTX (<MODE>mode))
4006 if (!REG_P (operands[3]))
4007 operands[3] = force_reg (<MODE>mode, operands[3]);
4017 comparison = gen_aarch64_cmlt<mode>;
4022 std::swap (operands[2], operands[3]);
4026 comparison = gen_aarch64_cmgt<mode>;
4031 comparison = gen_aarch64_cmle<mode>;
4036 std::swap (operands[2], operands[3]);
4040 comparison = gen_aarch64_cmge<mode>;
4044 comparison = gen_aarch64_cmeq<mode>;
4062 /* All of the above must not raise any FP exceptions. Thus we first
4063 check each operand for NaNs and force any elements containing NaN to
4064 zero before using them in the compare.
4065 Example: UN<cc> (a, b) -> UNORDERED (a, b) |
4066 (cm<cc> (isnan (a) ? 0.0 : a,
4067 isnan (b) ? 0.0 : b))
4068 We use the following transformations for doing the comparisions:
4072 a UNLT b -> b GT a. */
4074 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
4075 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
4076 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
4077 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
4078 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
4079 emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
4080 emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
4081 lowpart_subreg (<V_INT_EQUIV>mode,
4084 emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
4085 lowpart_subreg (<V_INT_EQUIV>mode,
4088 gcc_assert (comparison != NULL);
4089 emit_insn (comparison (operands[0],
4090 lowpart_subreg (<MODE>mode,
4091 tmp0, <V_INT_EQUIV>mode),
4092 lowpart_subreg (<MODE>mode,
4093 tmp1, <V_INT_EQUIV>mode)));
4094 emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
4104 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
4105 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
4111 a NE b -> ~(a EQ b) */
4112 gcc_assert (comparison != NULL);
4113 emit_insn (comparison (operands[0], operands[2], operands[3]));
4115 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
4119 /* LTGT is not guranteed to not generate a FP exception. So let's
4120 go the faster way : ((a > b) || (b > a)). */
4121 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
4122 operands[2], operands[3]));
4123 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
4124 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
4130 /* cmeq (a, a) & cmeq (b, b). */
4131 emit_insn (gen_aarch64_cmeq<mode> (operands[0],
4132 operands[2], operands[2]));
4133 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
4134 emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
4136 if (code == UNORDERED)
4137 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
4138 else if (code == UNEQ)
4140 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
4141 emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
4152 (define_expand "vec_cmpu<mode><mode>"
4153 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
4154 (match_operator 1 "comparison_operator"
4155 [(match_operand:VSDQ_I_DI 2 "register_operand")
4156 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
4159 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
4160 operands[2], operands[3]));
4164 (define_expand "vcond<mode><mode>"
4165 [(set (match_operand:VALLDI 0 "register_operand")
4166 (if_then_else:VALLDI
4167 (match_operator 3 "comparison_operator"
4168 [(match_operand:VALLDI 4 "register_operand")
4169 (match_operand:VALLDI 5 "nonmemory_operand")])
4170 (match_operand:VALLDI 1 "nonmemory_operand")
4171 (match_operand:VALLDI 2 "nonmemory_operand")))]
4174 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
4175 enum rtx_code code = GET_CODE (operands[3]);
4177 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4178 it as well as switch operands 1/2 in order to avoid the additional
4182 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4183 operands[4], operands[5]);
4184 std::swap (operands[1], operands[2]);
4186 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
4187 operands[4], operands[5]));
4188 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
4189 operands[2], mask));
4194 (define_expand "vcond<v_cmp_mixed><mode>"
4195 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
4196 (if_then_else:<V_cmp_mixed>
4197 (match_operator 3 "comparison_operator"
4198 [(match_operand:VDQF_COND 4 "register_operand")
4199 (match_operand:VDQF_COND 5 "nonmemory_operand")])
4200 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
4201 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
4204 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
4205 enum rtx_code code = GET_CODE (operands[3]);
4207 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4208 it as well as switch operands 1/2 in order to avoid the additional
4212 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4213 operands[4], operands[5]);
4214 std::swap (operands[1], operands[2]);
4216 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
4217 operands[4], operands[5]));
4218 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
4219 operands[0], operands[1],
4220 operands[2], mask));
4225 (define_expand "vcondu<mode><mode>"
4226 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
4227 (if_then_else:VSDQ_I_DI
4228 (match_operator 3 "comparison_operator"
4229 [(match_operand:VSDQ_I_DI 4 "register_operand")
4230 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
4231 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
4232 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
4235 rtx mask = gen_reg_rtx (<MODE>mode);
4236 enum rtx_code code = GET_CODE (operands[3]);
4238 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4239 it as well as switch operands 1/2 in order to avoid the additional
4243 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4244 operands[4], operands[5]);
4245 std::swap (operands[1], operands[2]);
4247 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
4248 operands[4], operands[5]));
4249 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
4250 operands[2], mask));
4254 (define_expand "vcondu<mode><v_cmp_mixed>"
4255 [(set (match_operand:VDQF 0 "register_operand")
4257 (match_operator 3 "comparison_operator"
4258 [(match_operand:<V_cmp_mixed> 4 "register_operand")
4259 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
4260 (match_operand:VDQF 1 "nonmemory_operand")
4261 (match_operand:VDQF 2 "nonmemory_operand")))]
4264 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
4265 enum rtx_code code = GET_CODE (operands[3]);
4267 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
4268 it as well as switch operands 1/2 in order to avoid the additional
4272 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
4273 operands[4], operands[5]);
4274 std::swap (operands[1], operands[2]);
4276 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
4278 operands[4], operands[5]));
4279 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
4280 operands[2], mask));
4284 ;; Patterns for AArch64 SIMD Intrinsics.
4286 ;; Lane extraction with sign extension to general purpose register.
4287 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
4288 [(set (match_operand:GPI 0 "register_operand" "=r")
4290 (vec_select:<VDQQH:VEL>
4291 (match_operand:VDQQH 1 "register_operand" "w")
4292 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
4295 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
4296 INTVAL (operands[2]));
4297 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
4299 [(set_attr "type" "neon_to_gp<VDQQH:q>")]
4302 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
4303 [(set (match_operand:GPI 0 "register_operand" "=r")
4305 (vec_select:<VDQQH:VEL>
4306 (match_operand:VDQQH 1 "register_operand" "w")
4307 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
4310 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
4311 INTVAL (operands[2]));
4312 return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
4314 [(set_attr "type" "neon_to_gp<VDQQH:q>")]
4317 ;; Lane extraction of a value, neither sign nor zero extension
4318 ;; is guaranteed so upper bits should be considered undefined.
4319 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
4320 ;; Extracting lane zero is split into a simple move when it is between SIMD
4321 ;; registers or a store.
4322 (define_insn_and_split "aarch64_get_lane<mode>"
4323 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
4325 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
4326 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
4329 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4330 switch (which_alternative)
4333 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
4335 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
4337 return "st1\\t{%1.<Vetype>}[%2], %0";
4342 "&& reload_completed
4343 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
4344 [(set (match_dup 0) (match_dup 1))]
4346 operands[1] = aarch64_replace_reg_mode (operands[1], <VEL>mode);
4348 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
4351 (define_insn "load_pair_lanes<mode>"
4352 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
4354 (match_operand:VDC 1 "memory_operand" "Utq")
4355 (match_operand:VDC 2 "memory_operand" "m")))]
4356 "TARGET_SIMD && !STRICT_ALIGNMENT
4357 && rtx_equal_p (XEXP (operands[2], 0),
4358 plus_constant (Pmode,
4359 XEXP (operands[1], 0),
4360 GET_MODE_SIZE (<MODE>mode)))"
4362 [(set_attr "type" "neon_load1_1reg_q")]
4365 (define_insn "store_pair_lanes<mode>"
4366 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
4368 (match_operand:VDC 1 "register_operand" "w, r")
4369 (match_operand:VDC 2 "register_operand" "w, r")))]
4373 stp\\t%x1, %x2, %y0"
4374 [(set_attr "type" "neon_stp, store_16")]
4377 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
4380 (define_insn "@aarch64_combinez<mode>"
4381 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
4383 (match_operand:VDC 1 "general_operand" "w,?r,m")
4384 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
4385 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4390 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
4391 (set_attr "arch" "simd,fp,simd")]
4394 (define_insn "@aarch64_combinez_be<mode>"
4395 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
4397 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
4398 (match_operand:VDC 1 "general_operand" "w,?r,m")))]
4399 "TARGET_SIMD && BYTES_BIG_ENDIAN"
4404 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
4405 (set_attr "arch" "simd,fp,simd")]
4408 (define_expand "aarch64_combine<mode>"
4409 [(match_operand:<VDBL> 0 "register_operand")
4410 (match_operand:VDC 1 "register_operand")
4411 (match_operand:VDC 2 "aarch64_simd_reg_or_zero")]
4414 if (operands[2] == CONST0_RTX (<MODE>mode))
4416 if (BYTES_BIG_ENDIAN)
4417 emit_insn (gen_aarch64_combinez_be<mode> (operands[0], operands[1],
4420 emit_insn (gen_aarch64_combinez<mode> (operands[0], operands[1],
4424 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
4429 (define_expand "@aarch64_simd_combine<mode>"
4430 [(match_operand:<VDBL> 0 "register_operand")
4431 (match_operand:VDC 1 "register_operand")
4432 (match_operand:VDC 2 "register_operand")]
4435 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
4436 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
4439 [(set_attr "type" "multiple")]
4442 ;; <su><addsub>l<q>.
4444 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
4445 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4446 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4447 (match_operand:VQW 1 "register_operand" "w")
4448 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
4449 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4450 (match_operand:VQW 2 "register_operand" "w")
4453 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
4454 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4457 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
4458 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4459 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4460 (match_operand:VQW 1 "register_operand" "w")
4461 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
4462 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4463 (match_operand:VQW 2 "register_operand" "w")
4466 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
4467 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4470 (define_expand "vec_widen_<su>addl_lo_<mode>"
4471 [(match_operand:<VWIDE> 0 "register_operand")
4472 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4473 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4476 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4477 emit_insn (gen_aarch64_<su>addl<mode>_lo_internal (operands[0], operands[1],
4482 (define_expand "vec_widen_<su>addl_hi_<mode>"
4483 [(match_operand:<VWIDE> 0 "register_operand")
4484 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4485 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4488 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4489 emit_insn (gen_aarch64_<su>addl<mode>_hi_internal (operands[0], operands[1],
4494 (define_expand "vec_widen_<su>subl_lo_<mode>"
4495 [(match_operand:<VWIDE> 0 "register_operand")
4496 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4497 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4500 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4501 emit_insn (gen_aarch64_<su>subl<mode>_lo_internal (operands[0], operands[1],
4506 (define_expand "vec_widen_<su>subl_hi_<mode>"
4507 [(match_operand:<VWIDE> 0 "register_operand")
4508 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4509 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4512 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4513 emit_insn (gen_aarch64_<su>subl<mode>_hi_internal (operands[0], operands[1],
4518 (define_expand "aarch64_saddl2<mode>"
4519 [(match_operand:<VWIDE> 0 "register_operand")
4520 (match_operand:VQW 1 "register_operand")
4521 (match_operand:VQW 2 "register_operand")]
4524 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4525 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
4530 (define_expand "aarch64_uaddl2<mode>"
4531 [(match_operand:<VWIDE> 0 "register_operand")
4532 (match_operand:VQW 1 "register_operand")
4533 (match_operand:VQW 2 "register_operand")]
4536 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4537 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
4542 (define_expand "aarch64_ssubl2<mode>"
4543 [(match_operand:<VWIDE> 0 "register_operand")
4544 (match_operand:VQW 1 "register_operand")
4545 (match_operand:VQW 2 "register_operand")]
4548 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4549 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
4554 (define_expand "aarch64_usubl2<mode>"
4555 [(match_operand:<VWIDE> 0 "register_operand")
4556 (match_operand:VQW 1 "register_operand")
4557 (match_operand:VQW 2 "register_operand")]
4560 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4561 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
4566 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
4567 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4568 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
4569 (match_operand:VD_BHSI 1 "register_operand" "w"))
4571 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
4573 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
4574 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4577 ;; <su><addsub>w<q>.
4579 (define_expand "widen_ssum<mode>3"
4580 [(set (match_operand:<VDBLW> 0 "register_operand")
4581 (plus:<VDBLW> (sign_extend:<VDBLW>
4582 (match_operand:VQW 1 "register_operand"))
4583 (match_operand:<VDBLW> 2 "register_operand")))]
4586 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4587 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
4589 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
4591 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
4596 (define_expand "widen_ssum<mode>3"
4597 [(set (match_operand:<VWIDE> 0 "register_operand")
4598 (plus:<VWIDE> (sign_extend:<VWIDE>
4599 (match_operand:VD_BHSI 1 "register_operand"))
4600 (match_operand:<VWIDE> 2 "register_operand")))]
4603 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
4607 (define_expand "widen_usum<mode>3"
4608 [(set (match_operand:<VDBLW> 0 "register_operand")
4609 (plus:<VDBLW> (zero_extend:<VDBLW>
4610 (match_operand:VQW 1 "register_operand"))
4611 (match_operand:<VDBLW> 2 "register_operand")))]
4614 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4615 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
4617 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
4619 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
4624 (define_expand "widen_usum<mode>3"
4625 [(set (match_operand:<VWIDE> 0 "register_operand")
4626 (plus:<VWIDE> (zero_extend:<VWIDE>
4627 (match_operand:VD_BHSI 1 "register_operand"))
4628 (match_operand:<VWIDE> 2 "register_operand")))]
4631 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
4635 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
4636 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4637 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4639 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
4641 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4642 [(set_attr "type" "neon_sub_widen")]
4645 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
4646 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4647 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4650 (match_operand:VQW 2 "register_operand" "w")
4651 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
4653 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
4654 [(set_attr "type" "neon_sub_widen")]
4657 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
4658 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4659 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4662 (match_operand:VQW 2 "register_operand" "w")
4663 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
4665 "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4666 [(set_attr "type" "neon_sub_widen")]
4669 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
4670 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4672 (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
4673 (match_operand:<VWIDE> 1 "register_operand" "w")))]
4675 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4676 [(set_attr "type" "neon_add_widen")]
4679 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
4680 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4684 (match_operand:VQW 2 "register_operand" "w")
4685 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
4686 (match_operand:<VWIDE> 1 "register_operand" "w")))]
4688 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
4689 [(set_attr "type" "neon_add_widen")]
4692 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
4693 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4697 (match_operand:VQW 2 "register_operand" "w")
4698 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
4699 (match_operand:<VWIDE> 1 "register_operand" "w")))]
4701 "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4702 [(set_attr "type" "neon_add_widen")]
4705 (define_expand "aarch64_saddw2<mode>"
4706 [(match_operand:<VWIDE> 0 "register_operand")
4707 (match_operand:<VWIDE> 1 "register_operand")
4708 (match_operand:VQW 2 "register_operand")]
4711 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4712 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
4717 (define_expand "aarch64_uaddw2<mode>"
4718 [(match_operand:<VWIDE> 0 "register_operand")
4719 (match_operand:<VWIDE> 1 "register_operand")
4720 (match_operand:VQW 2 "register_operand")]
4723 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4724 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
4730 (define_expand "aarch64_ssubw2<mode>"
4731 [(match_operand:<VWIDE> 0 "register_operand")
4732 (match_operand:<VWIDE> 1 "register_operand")
4733 (match_operand:VQW 2 "register_operand")]
4736 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4737 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
4742 (define_expand "aarch64_usubw2<mode>"
4743 [(match_operand:<VWIDE> 0 "register_operand")
4744 (match_operand:<VWIDE> 1 "register_operand")
4745 (match_operand:VQW 2 "register_operand")]
4748 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4749 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
4754 ;; <su><r>h<addsub>.
4756 (define_expand "<u>avg<mode>3_floor"
4757 [(set (match_operand:VDQ_BHSI 0 "register_operand")
4758 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
4759 (match_operand:VDQ_BHSI 2 "register_operand")]
4764 (define_expand "<u>avg<mode>3_ceil"
4765 [(set (match_operand:VDQ_BHSI 0 "register_operand")
4766 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
4767 (match_operand:VDQ_BHSI 2 "register_operand")]
4772 (define_insn "aarch64_<sur>h<addsub><mode>"
4773 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
4774 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
4775 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
4778 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
4779 [(set_attr "type" "neon_<addsub>_halve<q>")]
4782 ;; <r><addsub>hn<q>.
4784 (define_insn "aarch64_<sur><addsub>hn<mode>_insn_le"
4785 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4786 (vec_concat:<VNARROWQ2>
4787 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
4788 (match_operand:VQN 2 "register_operand" "w")]
4790 (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
4791 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4792 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
4793 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
4796 (define_insn "aarch64_<sur><addsub>hn<mode>_insn_be"
4797 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4798 (vec_concat:<VNARROWQ2>
4799 (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
4800 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
4801 (match_operand:VQN 2 "register_operand" "w")]
4803 "TARGET_SIMD && BYTES_BIG_ENDIAN"
4804 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
4805 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
4808 (define_expand "aarch64_<sur><addsub>hn<mode>"
4809 [(set (match_operand:<VNARROWQ> 0 "register_operand")
4810 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand")
4811 (match_operand:VQN 2 "register_operand")]
4815 rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
4816 if (BYTES_BIG_ENDIAN)
4817 emit_insn (gen_aarch64_<sur><addsub>hn<mode>_insn_be (tmp, operands[1],
4818 operands[2], CONST0_RTX (<VNARROWQ>mode)));
4820 emit_insn (gen_aarch64_<sur><addsub>hn<mode>_insn_le (tmp, operands[1],
4821 operands[2], CONST0_RTX (<VNARROWQ>mode)));
4823 /* The intrinsic expects a narrow result, so emit a subreg that will get
4824 optimized away as appropriate. */
4825 emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
4831 (define_insn "aarch64_<sur><addsub>hn2<mode>_insn_le"
4832 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4833 (vec_concat:<VNARROWQ2>
4834 (match_operand:<VNARROWQ> 1 "register_operand" "0")
4835 (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
4836 (match_operand:VQN 3 "register_operand" "w")]
4838 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4839 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4840 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
4843 (define_insn "aarch64_<sur><addsub>hn2<mode>_insn_be"
4844 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4845 (vec_concat:<VNARROWQ2>
4846 (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
4847 (match_operand:VQN 3 "register_operand" "w")]
4849 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
4850 "TARGET_SIMD && BYTES_BIG_ENDIAN"
4851 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4852 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
4855 (define_expand "aarch64_<sur><addsub>hn2<mode>"
4856 [(match_operand:<VNARROWQ2> 0 "register_operand")
4857 (match_operand:<VNARROWQ> 1 "register_operand")
4858 (unspec [(match_operand:VQN 2 "register_operand")
4859 (match_operand:VQN 3 "register_operand")]
4863 if (BYTES_BIG_ENDIAN)
4864 emit_insn (gen_aarch64_<sur><addsub>hn2<mode>_insn_be (operands[0],
4865 operands[1], operands[2], operands[3]));
4867 emit_insn (gen_aarch64_<sur><addsub>hn2<mode>_insn_le (operands[0],
4868 operands[1], operands[2], operands[3]));
4875 (define_insn "aarch64_pmul<mode>"
4876 [(set (match_operand:VB 0 "register_operand" "=w")
4877 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
4878 (match_operand:VB 2 "register_operand" "w")]
4881 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
4882 [(set_attr "type" "neon_mul_<Vetype><q>")]
4885 (define_insn "aarch64_pmullv8qi"
4886 [(set (match_operand:V8HI 0 "register_operand" "=w")
4887 (unspec:V8HI [(match_operand:V8QI 1 "register_operand" "w")
4888 (match_operand:V8QI 2 "register_operand" "w")]
4891 "pmull\\t%0.8h, %1.8b, %2.8b"
4892 [(set_attr "type" "neon_mul_b_long")]
4895 (define_insn "aarch64_pmull_hiv16qi_insn"
4896 [(set (match_operand:V8HI 0 "register_operand" "=w")
4899 (match_operand:V16QI 1 "register_operand" "w")
4900 (match_operand:V16QI 3 "vect_par_cnst_hi_half" ""))
4902 (match_operand:V16QI 2 "register_operand" "w")
4906 "pmull2\\t%0.8h, %1.16b, %2.16b"
4907 [(set_attr "type" "neon_mul_b_long")]
4910 (define_expand "aarch64_pmull_hiv16qi"
4911 [(match_operand:V8HI 0 "register_operand")
4912 (match_operand:V16QI 1 "register_operand")
4913 (match_operand:V16QI 2 "register_operand")]
4916 rtx p = aarch64_simd_vect_par_cnst_half (V16QImode, 16, true);
4917 emit_insn (gen_aarch64_pmull_hiv16qi_insn (operands[0], operands[1],
4925 (define_insn "aarch64_fmulx<mode>"
4926 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
4928 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
4929 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
4932 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4933 [(set_attr "type" "neon_fp_mul_<stype>")]
4936 ;; vmulxq_lane_f32, and vmulx_laneq_f32
4938 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
4939 [(set (match_operand:VDQSF 0 "register_operand" "=w")
4941 [(match_operand:VDQSF 1 "register_operand" "w")
4942 (vec_duplicate:VDQSF
4944 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
4945 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
4949 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
4950 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4952 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
4955 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
4957 (define_insn "*aarch64_mulx_elt<mode>"
4958 [(set (match_operand:VDQF 0 "register_operand" "=w")
4960 [(match_operand:VDQF 1 "register_operand" "w")
4963 (match_operand:VDQF 2 "register_operand" "w")
4964 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
4968 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4969 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4971 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
4976 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
4977 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4979 [(match_operand:VHSDF 1 "register_operand" "w")
4980 (vec_duplicate:VHSDF
4981 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
4984 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
4985 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
4988 ;; vmulxs_lane_f32, vmulxs_laneq_f32
4989 ;; vmulxd_lane_f64 == vmulx_lane_f64
4990 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
4992 (define_insn "*aarch64_vgetfmulx<mode>"
4993 [(set (match_operand:<VEL> 0 "register_operand" "=w")
4995 [(match_operand:<VEL> 1 "register_operand" "w")
4997 (match_operand:VDQF 2 "register_operand" "w")
4998 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5002 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5003 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
5005 [(set_attr "type" "fmul<Vetype>")]
5009 (define_insn "aarch64_<su_optab>q<addsub><mode>"
5010 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5011 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
5012 (match_operand:VSDQ_I 2 "register_operand" "w")))]
5014 "<su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5015 [(set_attr "type" "neon_q<addsub><q>")]
5018 ;; suqadd and usqadd
5020 (define_insn "aarch64_<sur>qadd<mode>"
5021 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5022 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
5023 (match_operand:VSDQ_I 2 "register_operand" "w")]
5026 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
5027 [(set_attr "type" "neon_qadd<q>")]
5030 ;; sqmovn and uqmovn
5032 (define_insn "aarch64_<su>qmovn<mode>"
5033 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5034 (SAT_TRUNC:<VNARROWQ>
5035 (match_operand:SD_HSDI 1 "register_operand" "w")))]
5037 "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5038 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5041 (define_insn "aarch64_<su>qmovn<mode>_insn_le"
5042 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5043 (vec_concat:<VNARROWQ2>
5044 (SAT_TRUNC:<VNARROWQ>
5045 (match_operand:VQN 1 "register_operand" "w"))
5046 (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")))]
5047 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5048 "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5049 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5052 (define_insn "aarch64_<su>qmovn<mode>_insn_be"
5053 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5054 (vec_concat:<VNARROWQ2>
5055 (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")
5056 (SAT_TRUNC:<VNARROWQ>
5057 (match_operand:VQN 1 "register_operand" "w"))))]
5058 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5059 "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5060 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5063 (define_expand "aarch64_<su>qmovn<mode>"
5064 [(set (match_operand:<VNARROWQ> 0 "register_operand")
5065 (SAT_TRUNC:<VNARROWQ>
5066 (match_operand:VQN 1 "register_operand")))]
5069 rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
5070 if (BYTES_BIG_ENDIAN)
5071 emit_insn (gen_aarch64_<su>qmovn<mode>_insn_be (tmp, operands[1],
5072 CONST0_RTX (<VNARROWQ>mode)));
5074 emit_insn (gen_aarch64_<su>qmovn<mode>_insn_le (tmp, operands[1],
5075 CONST0_RTX (<VNARROWQ>mode)));
5077 /* The intrinsic expects a narrow result, so emit a subreg that will get
5078 optimized away as appropriate. */
5079 emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
5085 (define_insn "aarch64_<su>qxtn2<mode>_le"
5086 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5087 (vec_concat:<VNARROWQ2>
5088 (match_operand:<VNARROWQ> 1 "register_operand" "0")
5089 (SAT_TRUNC:<VNARROWQ>
5090 (match_operand:VQN 2 "register_operand" "w"))))]
5091 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5092 "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
5093 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5096 (define_insn "aarch64_<su>qxtn2<mode>_be"
5097 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5098 (vec_concat:<VNARROWQ2>
5099 (SAT_TRUNC:<VNARROWQ>
5100 (match_operand:VQN 2 "register_operand" "w"))
5101 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
5102 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5103 "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
5104 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5107 (define_expand "aarch64_<su>qxtn2<mode>"
5108 [(match_operand:<VNARROWQ2> 0 "register_operand")
5109 (match_operand:<VNARROWQ> 1 "register_operand")
5110 (SAT_TRUNC:<VNARROWQ>
5111 (match_operand:VQN 2 "register_operand"))]
5114 if (BYTES_BIG_ENDIAN)
5115 emit_insn (gen_aarch64_<su>qxtn2<mode>_be (operands[0], operands[1],
5118 emit_insn (gen_aarch64_<su>qxtn2<mode>_le (operands[0], operands[1],
5126 (define_insn "aarch64_sqmovun<mode>"
5127 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5128 (unspec:<VNARROWQ> [(match_operand:SD_HSDI 1 "register_operand" "w")]
5131 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5132 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5135 (define_insn "aarch64_sqmovun<mode>_insn_le"
5136 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5137 (vec_concat:<VNARROWQ2>
5138 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")]
5140 (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")))]
5141 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5142 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5143 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5146 (define_insn "aarch64_sqmovun<mode>_insn_be"
5147 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5148 (vec_concat:<VNARROWQ2>
5149 (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")
5150 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")]
5152 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5153 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5154 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5157 (define_expand "aarch64_sqmovun<mode>"
5158 [(set (match_operand:<VNARROWQ> 0 "register_operand")
5159 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand")]
5163 rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
5164 if (BYTES_BIG_ENDIAN)
5165 emit_insn (gen_aarch64_sqmovun<mode>_insn_be (tmp, operands[1],
5166 CONST0_RTX (<VNARROWQ>mode)));
5168 emit_insn (gen_aarch64_sqmovun<mode>_insn_le (tmp, operands[1],
5169 CONST0_RTX (<VNARROWQ>mode)));
5171 /* The intrinsic expects a narrow result, so emit a subreg that will get
5172 optimized away as appropriate. */
5173 emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
5179 (define_insn "aarch64_sqxtun2<mode>_le"
5180 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5181 (vec_concat:<VNARROWQ2>
5182 (match_operand:<VNARROWQ> 1 "register_operand" "0")
5184 [(match_operand:VQN 2 "register_operand" "w")] UNSPEC_SQXTUN)))]
5185 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5186 "sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
5187 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5190 (define_insn "aarch64_sqxtun2<mode>_be"
5191 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5192 (vec_concat:<VNARROWQ2>
5194 [(match_operand:VQN 2 "register_operand" "w")] UNSPEC_SQXTUN)
5195 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
5196 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5197 "sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
5198 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5201 (define_expand "aarch64_sqxtun2<mode>"
5202 [(match_operand:<VNARROWQ2> 0 "register_operand")
5203 (match_operand:<VNARROWQ> 1 "register_operand")
5205 [(match_operand:VQN 2 "register_operand")] UNSPEC_SQXTUN)]
5208 if (BYTES_BIG_ENDIAN)
5209 emit_insn (gen_aarch64_sqxtun2<mode>_be (operands[0], operands[1],
5212 emit_insn (gen_aarch64_sqxtun2<mode>_le (operands[0], operands[1],
5220 (define_insn "aarch64_s<optab><mode>"
5221 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5223 (match_operand:VSDQ_I 1 "register_operand" "w")))]
5225 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
5226 [(set_attr "type" "neon_<optab><q>")]
5231 (define_insn "aarch64_sq<r>dmulh<mode>"
5232 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
5234 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
5235 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
5238 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5239 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
5242 (define_insn "aarch64_sq<r>dmulh_n<mode>"
5243 [(set (match_operand:VDQHS 0 "register_operand" "=w")
5245 [(match_operand:VDQHS 1 "register_operand" "w")
5246 (vec_duplicate:VDQHS
5247 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
5250 "sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]"
5251 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5256 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
5257 [(set (match_operand:VDQHS 0 "register_operand" "=w")
5259 [(match_operand:VDQHS 1 "register_operand" "w")
5261 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
5262 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5266 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
5267 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
5268 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5271 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
5272 [(set (match_operand:VDQHS 0 "register_operand" "=w")
5274 [(match_operand:VDQHS 1 "register_operand" "w")
5276 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
5277 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5281 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
5282 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
5283 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5286 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
5287 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5289 [(match_operand:SD_HSI 1 "register_operand" "w")
5291 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
5292 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5296 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
5297 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
5298 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5301 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
5302 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5304 [(match_operand:SD_HSI 1 "register_operand" "w")
5306 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
5307 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5311 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
5312 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
5313 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5318 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
5319 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
5321 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
5322 (match_operand:VSDQ_HSI 2 "register_operand" "w")
5323 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
5326 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5327 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5330 ;; sqrdml[as]h_lane.
5332 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
5333 [(set (match_operand:VDQHS 0 "register_operand" "=w")
5335 [(match_operand:VDQHS 1 "register_operand" "0")
5336 (match_operand:VDQHS 2 "register_operand" "w")
5338 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5339 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5343 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5345 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
5347 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5350 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
5351 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5353 [(match_operand:SD_HSI 1 "register_operand" "0")
5354 (match_operand:SD_HSI 2 "register_operand" "w")
5356 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5357 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5361 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5363 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
5365 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5368 ;; sqrdml[as]h_laneq.
5370 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
5371 [(set (match_operand:VDQHS 0 "register_operand" "=w")
5373 [(match_operand:VDQHS 1 "register_operand" "0")
5374 (match_operand:VDQHS 2 "register_operand" "w")
5376 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5377 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5381 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5383 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
5385 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5388 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
5389 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5391 [(match_operand:SD_HSI 1 "register_operand" "0")
5392 (match_operand:SD_HSI 2 "register_operand" "w")
5394 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5395 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5399 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5401 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
5403 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5408 (define_insn "aarch64_sqdmlal<mode>"
5409 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5413 (sign_extend:<VWIDE>
5414 (match_operand:VSD_HSI 2 "register_operand" "w"))
5415 (sign_extend:<VWIDE>
5416 (match_operand:VSD_HSI 3 "register_operand" "w")))
5418 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5420 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5421 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5424 (define_insn "aarch64_sqdmlsl<mode>"
5425 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5427 (match_operand:<VWIDE> 1 "register_operand" "0")
5430 (sign_extend:<VWIDE>
5431 (match_operand:VSD_HSI 2 "register_operand" "w"))
5432 (sign_extend:<VWIDE>
5433 (match_operand:VSD_HSI 3 "register_operand" "w")))
5436 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5437 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5442 (define_insn "aarch64_sqdmlal_lane<mode>"
5443 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5447 (sign_extend:<VWIDE>
5448 (match_operand:VD_HSI 2 "register_operand" "w"))
5449 (vec_duplicate:<VWIDE>
5450 (sign_extend:<VWIDE_S>
5452 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5453 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5456 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5459 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5461 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5463 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5466 (define_insn "aarch64_sqdmlsl_lane<mode>"
5467 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5469 (match_operand:<VWIDE> 1 "register_operand" "0")
5472 (sign_extend:<VWIDE>
5473 (match_operand:VD_HSI 2 "register_operand" "w"))
5474 (vec_duplicate:<VWIDE>
5475 (sign_extend:<VWIDE_S>
5477 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5478 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5483 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5485 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5487 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5491 (define_insn "aarch64_sqdmlsl_laneq<mode>"
5492 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5494 (match_operand:<VWIDE> 1 "register_operand" "0")
5497 (sign_extend:<VWIDE>
5498 (match_operand:VD_HSI 2 "register_operand" "w"))
5499 (vec_duplicate:<VWIDE>
5500 (sign_extend:<VWIDE_S>
5502 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5503 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5508 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5510 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5512 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5515 (define_insn "aarch64_sqdmlal_laneq<mode>"
5516 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5520 (sign_extend:<VWIDE>
5521 (match_operand:VD_HSI 2 "register_operand" "w"))
5522 (vec_duplicate:<VWIDE>
5523 (sign_extend:<VWIDE_S>
5525 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5526 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5529 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5532 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5534 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5536 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5540 (define_insn "aarch64_sqdmlal_lane<mode>"
5541 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5545 (sign_extend:<VWIDE>
5546 (match_operand:SD_HSI 2 "register_operand" "w"))
5547 (sign_extend:<VWIDE>
5549 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5550 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5553 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5556 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5558 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5560 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5563 (define_insn "aarch64_sqdmlsl_lane<mode>"
5564 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5566 (match_operand:<VWIDE> 1 "register_operand" "0")
5569 (sign_extend:<VWIDE>
5570 (match_operand:SD_HSI 2 "register_operand" "w"))
5571 (sign_extend:<VWIDE>
5573 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5574 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5579 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5581 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5583 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5587 (define_insn "aarch64_sqdmlal_laneq<mode>"
5588 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5592 (sign_extend:<VWIDE>
5593 (match_operand:SD_HSI 2 "register_operand" "w"))
5594 (sign_extend:<VWIDE>
5596 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5597 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5600 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5603 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5605 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5607 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5610 (define_insn "aarch64_sqdmlsl_laneq<mode>"
5611 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5613 (match_operand:<VWIDE> 1 "register_operand" "0")
5616 (sign_extend:<VWIDE>
5617 (match_operand:SD_HSI 2 "register_operand" "w"))
5618 (sign_extend:<VWIDE>
5620 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5621 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5626 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5628 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5630 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5635 (define_insn "aarch64_sqdmlsl_n<mode>"
5636 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5638 (match_operand:<VWIDE> 1 "register_operand" "0")
5641 (sign_extend:<VWIDE>
5642 (match_operand:VD_HSI 2 "register_operand" "w"))
5643 (vec_duplicate:<VWIDE>
5644 (sign_extend:<VWIDE_S>
5645 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
5648 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
5649 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5652 (define_insn "aarch64_sqdmlal_n<mode>"
5653 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5657 (sign_extend:<VWIDE>
5658 (match_operand:VD_HSI 2 "register_operand" "w"))
5659 (vec_duplicate:<VWIDE>
5660 (sign_extend:<VWIDE_S>
5661 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
5663 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5665 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
5666 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5672 (define_insn "aarch64_sqdmlal2<mode>_internal"
5673 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5677 (sign_extend:<VWIDE>
5679 (match_operand:VQ_HSI 2 "register_operand" "w")
5680 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
5681 (sign_extend:<VWIDE>
5683 (match_operand:VQ_HSI 3 "register_operand" "w")
5686 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5688 "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5689 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5692 (define_insn "aarch64_sqdmlsl2<mode>_internal"
5693 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5695 (match_operand:<VWIDE> 1 "register_operand" "0")
5698 (sign_extend:<VWIDE>
5700 (match_operand:VQ_HSI 2 "register_operand" "w")
5701 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
5702 (sign_extend:<VWIDE>
5704 (match_operand:VQ_HSI 3 "register_operand" "w")
5708 "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5709 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5712 (define_expand "aarch64_sqdml<SBINQOPS:as>l2<mode>"
5713 [(match_operand:<VWIDE> 0 "register_operand")
5715 (match_operand:<VWIDE> 1 "register_operand")
5717 (match_operand:VQ_HSI 2 "register_operand")
5718 (match_operand:VQ_HSI 3 "register_operand")]
5721 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
5722 emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2<mode>_internal (operands[0],
5723 operands[1], operands[2],
5730 (define_insn "aarch64_sqdmlsl2_lane<mode>_internal"
5731 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5733 (match_operand:<VWIDE> 1 "register_operand" "0")
5736 (sign_extend:<VWIDE>
5738 (match_operand:VQ_HSI 2 "register_operand" "w")
5739 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5740 (vec_duplicate:<VWIDE>
5741 (sign_extend:<VWIDE_S>
5743 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5744 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5749 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5751 "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5753 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5756 (define_insn "aarch64_sqdmlal2_lane<mode>_internal"
5757 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5761 (sign_extend:<VWIDE>
5763 (match_operand:VQ_HSI 2 "register_operand" "w")
5764 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5765 (vec_duplicate:<VWIDE>
5766 (sign_extend:<VWIDE_S>
5768 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5769 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5772 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5775 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5777 "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5779 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5782 (define_insn "aarch64_sqdmlsl2_laneq<mode>_internal"
5783 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5785 (match_operand:<VWIDE> 1 "register_operand" "0")
5788 (sign_extend:<VWIDE>
5790 (match_operand:VQ_HSI 2 "register_operand" "w")
5791 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5792 (vec_duplicate:<VWIDE>
5793 (sign_extend:<VWIDE_S>
5795 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5796 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5801 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5803 "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5805 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5808 (define_insn "aarch64_sqdmlal2_laneq<mode>_internal"
5809 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5813 (sign_extend:<VWIDE>
5815 (match_operand:VQ_HSI 2 "register_operand" "w")
5816 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
5817 (vec_duplicate:<VWIDE>
5818 (sign_extend:<VWIDE_S>
5820 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5821 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
5824 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5827 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5829 "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5831 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5834 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>"
5835 [(match_operand:<VWIDE> 0 "register_operand")
5837 (match_operand:<VWIDE> 1 "register_operand")
5839 (match_operand:VQ_HSI 2 "register_operand")
5840 (match_operand:<VCOND> 3 "register_operand")
5841 (match_operand:SI 4 "immediate_operand")]
5844 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
5845 emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal (operands[0],
5846 operands[1], operands[2],
5847 operands[3], operands[4], p));
5851 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>"
5852 [(match_operand:<VWIDE> 0 "register_operand")
5854 (match_operand:<VWIDE> 1 "register_operand")
5856 (match_operand:VQ_HSI 2 "register_operand")
5857 (match_operand:<VCONQ> 3 "register_operand")
5858 (match_operand:SI 4 "immediate_operand")]
5861 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
5862 emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal (operands[0],
5863 operands[1], operands[2],
5864 operands[3], operands[4], p));
5868 (define_insn "aarch64_sqdmlsl2_n<mode>_internal"
5869 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5871 (match_operand:<VWIDE> 1 "register_operand" "0")
5874 (sign_extend:<VWIDE>
5876 (match_operand:VQ_HSI 2 "register_operand" "w")
5877 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
5878 (vec_duplicate:<VWIDE>
5879 (sign_extend:<VWIDE_S>
5880 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
5883 "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
5884 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5887 (define_insn "aarch64_sqdmlal2_n<mode>_internal"
5888 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5892 (sign_extend:<VWIDE>
5894 (match_operand:VQ_HSI 2 "register_operand" "w")
5895 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
5896 (vec_duplicate:<VWIDE>
5897 (sign_extend:<VWIDE_S>
5898 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
5900 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5902 "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
5903 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5906 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_n<mode>"
5907 [(match_operand:<VWIDE> 0 "register_operand")
5909 (match_operand:<VWIDE> 1 "register_operand")
5911 (match_operand:VQ_HSI 2 "register_operand")
5912 (match_operand:<VEL> 3 "register_operand")]
5915 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
5916 emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal (operands[0],
5917 operands[1], operands[2],
5924 (define_insn "aarch64_sqdmull<mode>"
5925 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5928 (sign_extend:<VWIDE>
5929 (match_operand:VSD_HSI 1 "register_operand" "w"))
5930 (sign_extend:<VWIDE>
5931 (match_operand:VSD_HSI 2 "register_operand" "w")))
5934 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5935 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
5940 (define_insn "aarch64_sqdmull_lane<mode>"
5941 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5944 (sign_extend:<VWIDE>
5945 (match_operand:VD_HSI 1 "register_operand" "w"))
5946 (vec_duplicate:<VWIDE>
5947 (sign_extend:<VWIDE_S>
5949 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
5950 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
5955 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
5956 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5958 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
5961 (define_insn "aarch64_sqdmull_laneq<mode>"
5962 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5965 (sign_extend:<VWIDE>
5966 (match_operand:VD_HSI 1 "register_operand" "w"))
5967 (vec_duplicate:<VWIDE>
5968 (sign_extend:<VWIDE_S>
5970 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
5971 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
5976 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
5977 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5979 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
5982 (define_insn "aarch64_sqdmull_lane<mode>"
5983 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5986 (sign_extend:<VWIDE>
5987 (match_operand:SD_HSI 1 "register_operand" "w"))
5988 (sign_extend:<VWIDE>
5990 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
5991 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
5996 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
5997 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5999 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6002 (define_insn "aarch64_sqdmull_laneq<mode>"
6003 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6006 (sign_extend:<VWIDE>
6007 (match_operand:SD_HSI 1 "register_operand" "w"))
6008 (sign_extend:<VWIDE>
6010 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
6011 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
6016 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
6017 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6019 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6024 (define_insn "aarch64_sqdmull_n<mode>"
6025 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6028 (sign_extend:<VWIDE>
6029 (match_operand:VD_HSI 1 "register_operand" "w"))
6030 (vec_duplicate:<VWIDE>
6031 (sign_extend:<VWIDE_S>
6032 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
6036 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
6037 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6042 (define_insn "aarch64_sqdmull2<mode>_internal"
6043 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6046 (sign_extend:<VWIDE>
6048 (match_operand:VQ_HSI 1 "register_operand" "w")
6049 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
6050 (sign_extend:<VWIDE>
6052 (match_operand:VQ_HSI 2 "register_operand" "w")
6057 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6058 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6061 (define_expand "aarch64_sqdmull2<mode>"
6062 [(match_operand:<VWIDE> 0 "register_operand")
6063 (match_operand:VQ_HSI 1 "register_operand")
6064 (match_operand:VQ_HSI 2 "register_operand")]
6067 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6068 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
6075 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
6076 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6079 (sign_extend:<VWIDE>
6081 (match_operand:VQ_HSI 1 "register_operand" "w")
6082 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6083 (vec_duplicate:<VWIDE>
6084 (sign_extend:<VWIDE_S>
6086 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
6087 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6092 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
6093 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6095 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6098 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
6099 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6102 (sign_extend:<VWIDE>
6104 (match_operand:VQ_HSI 1 "register_operand" "w")
6105 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6106 (vec_duplicate:<VWIDE>
6107 (sign_extend:<VWIDE_S>
6109 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
6110 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6115 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
6116 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6118 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6121 (define_expand "aarch64_sqdmull2_lane<mode>"
6122 [(match_operand:<VWIDE> 0 "register_operand")
6123 (match_operand:VQ_HSI 1 "register_operand")
6124 (match_operand:<VCOND> 2 "register_operand")
6125 (match_operand:SI 3 "immediate_operand")]
6128 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6129 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
6130 operands[2], operands[3],
6135 (define_expand "aarch64_sqdmull2_laneq<mode>"
6136 [(match_operand:<VWIDE> 0 "register_operand")
6137 (match_operand:VQ_HSI 1 "register_operand")
6138 (match_operand:<VCONQ> 2 "register_operand")
6139 (match_operand:SI 3 "immediate_operand")]
6142 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6143 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
6144 operands[2], operands[3],
6151 (define_insn "aarch64_sqdmull2_n<mode>_internal"
6152 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6155 (sign_extend:<VWIDE>
6157 (match_operand:VQ_HSI 1 "register_operand" "w")
6158 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
6159 (vec_duplicate:<VWIDE>
6160 (sign_extend:<VWIDE_S>
6161 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
6165 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
6166 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6169 (define_expand "aarch64_sqdmull2_n<mode>"
6170 [(match_operand:<VWIDE> 0 "register_operand")
6171 (match_operand:VQ_HSI 1 "register_operand")
6172 (match_operand:<VEL> 2 "register_operand")]
6175 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6176 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
6183 (define_insn "aarch64_<sur>shl<mode>"
6184 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6186 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
6187 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
6190 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
6191 [(set_attr "type" "neon_shift_reg<q>")]
6197 (define_insn "aarch64_<sur>q<r>shl<mode>"
6198 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
6200 [(match_operand:VSDQ_I 1 "register_operand" "w")
6201 (match_operand:VSDQ_I 2 "register_operand" "w")]
6204 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
6205 [(set_attr "type" "neon_sat_shift_reg<q>")]
6208 (define_expand "vec_widen_<sur>shiftl_lo_<mode>"
6209 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6210 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
6212 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
6216 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
6217 emit_insn (gen_aarch64_<sur>shll<mode>_internal (operands[0], operands[1],
6223 (define_expand "vec_widen_<sur>shiftl_hi_<mode>"
6224 [(set (match_operand:<VWIDE> 0 "register_operand")
6225 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
6227 "immediate_operand" "i")]
6231 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6232 emit_insn (gen_aarch64_<sur>shll2<mode>_internal (operands[0], operands[1],
6240 (define_insn "aarch64_<sur>shll<mode>_internal"
6241 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6242 (unspec:<VWIDE> [(vec_select:<VHALF>
6243 (match_operand:VQW 1 "register_operand" "w")
6244 (match_operand:VQW 2 "vect_par_cnst_lo_half" ""))
6246 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
6250 if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
6251 return "shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
6253 return "<sur>shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3";
6255 [(set_attr "type" "neon_shift_imm_long")]
6258 (define_insn "aarch64_<sur>shll2<mode>_internal"
6259 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6260 (unspec:<VWIDE> [(vec_select:<VHALF>
6261 (match_operand:VQW 1 "register_operand" "w")
6262 (match_operand:VQW 2 "vect_par_cnst_hi_half" ""))
6264 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
6268 if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
6269 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
6271 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %3";
6273 [(set_attr "type" "neon_shift_imm_long")]
6276 (define_insn "aarch64_<sur>shll_n<mode>"
6277 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6278 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
6280 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
6284 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
6285 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
6287 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
6289 [(set_attr "type" "neon_shift_imm_long")]
6294 (define_insn "aarch64_<sur>shll2_n<mode>"
6295 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6296 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
6297 (match_operand:SI 2 "immediate_operand" "i")]
6301 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
6302 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
6304 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
6306 [(set_attr "type" "neon_shift_imm_long")]
6311 (define_insn "aarch64_<sur>shr_n<mode>"
6312 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6313 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
6315 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
6318 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
6319 [(set_attr "type" "neon_sat_shift_imm<q>")]
6324 (define_insn "aarch64_<sur>sra_n<mode>"
6325 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6326 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
6327 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
6329 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
6332 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
6333 [(set_attr "type" "neon_shift_acc<q>")]
6338 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
6339 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6340 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
6341 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
6343 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
6346 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
6347 [(set_attr "type" "neon_shift_imm<q>")]
6352 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
6353 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
6354 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
6356 "aarch64_simd_shift_imm_<ve_mode>" "i")]
6359 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
6360 [(set_attr "type" "neon_sat_shift_imm<q>")]
6366 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
6367 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6368 (unspec:<VNARROWQ> [(match_operand:SD_HSDI 1 "register_operand" "w")
6370 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
6373 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6374 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
6377 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>_insn_le"
6378 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6379 (vec_concat:<VNARROWQ2>
6381 [(match_operand:VQN 1 "register_operand" "w")
6382 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
6384 (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")))]
6385 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
6386 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6387 [(set_attr "type" "neon_shift_imm_narrow_q")]
6390 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>_insn_be"
6391 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6392 (vec_concat:<VNARROWQ2>
6393 (match_operand:<VNARROWQ> 3 "aarch64_simd_or_scalar_imm_zero")
6395 [(match_operand:VQN 1 "register_operand" "w")
6396 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>")]
6398 "TARGET_SIMD && BYTES_BIG_ENDIAN"
6399 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6400 [(set_attr "type" "neon_shift_imm_narrow_q")]
6403 (define_expand "aarch64_<sur>q<r>shr<u>n_n<mode>"
6404 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6405 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand")
6407 "aarch64_simd_shift_imm_offset_<ve_mode>")]
6411 operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6412 INTVAL (operands[2]));
6413 rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
6414 if (BYTES_BIG_ENDIAN)
6415 emit_insn (gen_aarch64_<sur>q<r>shr<u>n_n<mode>_insn_be (tmp,
6416 operands[1], operands[2], CONST0_RTX (<VNARROWQ>mode)));
6418 emit_insn (gen_aarch64_<sur>q<r>shr<u>n_n<mode>_insn_le (tmp,
6419 operands[1], operands[2], CONST0_RTX (<VNARROWQ>mode)));
6421 /* The intrinsic expects a narrow result, so emit a subreg that will get
6422 optimized away as appropriate. */
6423 emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
6429 (define_insn "aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_le"
6430 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6431 (vec_concat:<VNARROWQ2>
6432 (match_operand:<VNARROWQ> 1 "register_operand" "0")
6433 (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
6434 (match_operand:VQN 3
6435 "aarch64_simd_shift_imm_vec_<vn_mode>")]
6437 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
6438 "<sur>q<r>shr<u>n2\\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6439 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
6442 (define_insn "aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_be"
6443 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6444 (vec_concat:<VNARROWQ2>
6445 (unspec:<VNARROWQ> [(match_operand:VQN 2 "register_operand" "w")
6446 (match_operand:VQN 3
6447 "aarch64_simd_shift_imm_vec_<vn_mode>")]
6449 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
6450 "TARGET_SIMD && BYTES_BIG_ENDIAN"
6451 "<sur>q<r>shr<u>n2\\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6452 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
6455 (define_expand "aarch64_<sur>q<r>shr<u>n2_n<mode>"
6456 [(match_operand:<VNARROWQ2> 0 "register_operand")
6457 (match_operand:<VNARROWQ> 1 "register_operand")
6459 [(match_operand:VQN 2 "register_operand")
6460 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
6464 operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6465 INTVAL (operands[3]));
6467 if (BYTES_BIG_ENDIAN)
6468 emit_insn (gen_aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_be (operands[0],
6469 operands[1], operands[2], operands[3]));
6471 emit_insn (gen_aarch64_<sur>q<r>shr<u>n2_n<mode>_insn_le (operands[0],
6472 operands[1], operands[2], operands[3]));
6478 ;; cm(eq|ge|gt|lt|le)
6479 ;; Note, we have constraints for Dz and Z as different expanders
6480 ;; have different ideas of what should be passed to this pattern.
6482 (define_insn "aarch64_cm<optab><mode>"
6483 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
6485 (COMPARISONS:<V_INT_EQUIV>
6486 (match_operand:VDQ_I 1 "register_operand" "w,w")
6487 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
6491 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
6492 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
6493 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
6496 (define_insn_and_split "aarch64_cm<optab>di"
6497 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
6500 (match_operand:DI 1 "register_operand" "w,w,r")
6501 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
6503 (clobber (reg:CC CC_REGNUM))]
6506 "&& reload_completed"
6507 [(set (match_operand:DI 0 "register_operand")
6510 (match_operand:DI 1 "register_operand")
6511 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
6514 /* If we are in the general purpose register file,
6515 we split to a sequence of comparison and store. */
6516 if (GP_REGNUM_P (REGNO (operands[0]))
6517 && GP_REGNUM_P (REGNO (operands[1])))
6519 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
6520 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
6521 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
6522 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
6525 /* Otherwise, we expand to a similar pattern which does not
6526 clobber CC_REGNUM. */
6528 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
6531 (define_insn "*aarch64_cm<optab>di"
6532 [(set (match_operand:DI 0 "register_operand" "=w,w")
6535 (match_operand:DI 1 "register_operand" "w,w")
6536 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
6538 "TARGET_SIMD && reload_completed"
6540 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
6541 cm<optab>\t%d0, %d1, #0"
6542 [(set_attr "type" "neon_compare, neon_compare_zero")]
6547 (define_insn "aarch64_cm<optab><mode>"
6548 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
6550 (UCOMPARISONS:<V_INT_EQUIV>
6551 (match_operand:VDQ_I 1 "register_operand" "w")
6552 (match_operand:VDQ_I 2 "register_operand" "w")
6555 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
6556 [(set_attr "type" "neon_compare<q>")]
6559 (define_insn_and_split "aarch64_cm<optab>di"
6560 [(set (match_operand:DI 0 "register_operand" "=w,r")
6563 (match_operand:DI 1 "register_operand" "w,r")
6564 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
6566 (clobber (reg:CC CC_REGNUM))]
6569 "&& reload_completed"
6570 [(set (match_operand:DI 0 "register_operand")
6573 (match_operand:DI 1 "register_operand")
6574 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
6577 /* If we are in the general purpose register file,
6578 we split to a sequence of comparison and store. */
6579 if (GP_REGNUM_P (REGNO (operands[0]))
6580 && GP_REGNUM_P (REGNO (operands[1])))
6582 machine_mode mode = CCmode;
6583 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
6584 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
6585 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
6588 /* Otherwise, we expand to a similar pattern which does not
6589 clobber CC_REGNUM. */
6591 [(set_attr "type" "neon_compare,multiple")]
6594 (define_insn "*aarch64_cm<optab>di"
6595 [(set (match_operand:DI 0 "register_operand" "=w")
6598 (match_operand:DI 1 "register_operand" "w")
6599 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
6601 "TARGET_SIMD && reload_completed"
6602 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
6603 [(set_attr "type" "neon_compare")]
6608 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
6609 ;; we don't have any insns using ne, and aarch64_vcond outputs
6610 ;; not (neg (eq (and x y) 0))
6611 ;; which is rewritten by simplify_rtx as
6612 ;; plus (eq (and x y) 0) -1.
6614 (define_insn "aarch64_cmtst<mode>"
6615 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
6619 (match_operand:VDQ_I 1 "register_operand" "w")
6620 (match_operand:VDQ_I 2 "register_operand" "w"))
6621 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
6622 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
6625 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6626 [(set_attr "type" "neon_tst<q>")]
6629 ;; One can also get a cmtsts by having to combine a
6630 ;; not (neq (eq x 0)) in which case you rewrite it to
6631 ;; a comparison against itself
6633 (define_insn "*aarch64_cmtst_same_<mode>"
6634 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
6637 (match_operand:VDQ_I 1 "register_operand" "w")
6638 (match_operand:VDQ_I 2 "aarch64_simd_imm_zero"))
6639 (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_imm_minus_one")))
6642 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>1<Vmtype>"
6643 [(set_attr "type" "neon_tst<q>")]
6646 (define_insn_and_split "aarch64_cmtstdi"
6647 [(set (match_operand:DI 0 "register_operand" "=w,r")
6651 (match_operand:DI 1 "register_operand" "w,r")
6652 (match_operand:DI 2 "register_operand" "w,r"))
6654 (clobber (reg:CC CC_REGNUM))]
6657 "&& reload_completed"
6658 [(set (match_operand:DI 0 "register_operand")
6662 (match_operand:DI 1 "register_operand")
6663 (match_operand:DI 2 "register_operand"))
6666 /* If we are in the general purpose register file,
6667 we split to a sequence of comparison and store. */
6668 if (GP_REGNUM_P (REGNO (operands[0]))
6669 && GP_REGNUM_P (REGNO (operands[1])))
6671 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
6672 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
6673 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
6674 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
6675 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
6678 /* Otherwise, we expand to a similar pattern which does not
6679 clobber CC_REGNUM. */
6681 [(set_attr "type" "neon_tst,multiple")]
6684 (define_insn "*aarch64_cmtstdi"
6685 [(set (match_operand:DI 0 "register_operand" "=w")
6689 (match_operand:DI 1 "register_operand" "w")
6690 (match_operand:DI 2 "register_operand" "w"))
6693 "cmtst\t%d0, %d1, %d2"
6694 [(set_attr "type" "neon_tst")]
6697 ;; fcm(eq|ge|gt|le|lt)
6699 (define_insn "aarch64_cm<optab><mode>"
6700 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
6702 (COMPARISONS:<V_INT_EQUIV>
6703 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
6704 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
6708 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
6709 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
6710 [(set_attr "type" "neon_fp_compare_<stype><q>")]
6714 ;; Note we can also handle what would be fac(le|lt) by
6715 ;; generating fac(ge|gt).
6717 (define_insn "aarch64_fac<optab><mode>"
6718 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
6720 (FAC_COMPARISONS:<V_INT_EQUIV>
6722 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
6724 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
6727 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
6728 [(set_attr "type" "neon_fp_compare_<stype><q>")]
6733 (define_insn "aarch64_addp<mode>"
6734 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
6736 [(match_operand:VDQ_I 1 "register_operand" "w")
6737 (match_operand:VDQ_I 2 "register_operand" "w")]
6740 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6741 [(set_attr "type" "neon_reduc_add<q>")]
6744 (define_insn "aarch64_addpdi"
6745 [(set (match_operand:DI 0 "register_operand" "=w")
6747 [(match_operand:V2DI 1 "register_operand" "w")]
6751 [(set_attr "type" "neon_reduc_add")]
6756 (define_expand "sqrt<mode>2"
6757 [(set (match_operand:VHSDF 0 "register_operand")
6758 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))]
6761 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
6765 (define_insn "*sqrt<mode>2"
6766 [(set (match_operand:VHSDF 0 "register_operand" "=w")
6767 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
6769 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
6770 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
6773 ;; Patterns for vector struct loads and stores.
6775 (define_insn "aarch64_simd_ld2<vstruct_elt>"
6776 [(set (match_operand:VSTRUCT_2Q 0 "register_operand" "=w")
6777 (unspec:VSTRUCT_2Q [
6778 (match_operand:VSTRUCT_2Q 1 "aarch64_simd_struct_operand" "Utv")]
6781 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
6782 [(set_attr "type" "neon_load2_2reg<q>")]
6785 (define_insn "aarch64_simd_ld2r<vstruct_elt>"
6786 [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
6787 (unspec:VSTRUCT_2QD [
6788 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
6791 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
6792 [(set_attr "type" "neon_load2_all_lanes<q>")]
6795 (define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
6796 [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
6797 (unspec:VSTRUCT_2QD [
6798 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6799 (match_operand:VSTRUCT_2QD 2 "register_operand" "0")
6800 (match_operand:SI 3 "immediate_operand" "i")]
6804 operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
6805 INTVAL (operands[3]));
6806 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
6808 [(set_attr "type" "neon_load2_one_lane")]
6811 (define_expand "vec_load_lanes<mode><vstruct_elt>"
6812 [(set (match_operand:VSTRUCT_2Q 0 "register_operand")
6813 (unspec:VSTRUCT_2Q [
6814 (match_operand:VSTRUCT_2Q 1 "aarch64_simd_struct_operand")]
6818 if (BYTES_BIG_ENDIAN)
6820 rtx tmp = gen_reg_rtx (<MODE>mode);
6821 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
6822 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
6823 emit_insn (gen_aarch64_simd_ld2<vstruct_elt> (tmp, operands[1]));
6824 emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
6827 emit_insn (gen_aarch64_simd_ld2<vstruct_elt> (operands[0], operands[1]));
6831 (define_insn "aarch64_simd_st2<vstruct_elt>"
6832 [(set (match_operand:VSTRUCT_2Q 0 "aarch64_simd_struct_operand" "=Utv")
6833 (unspec:VSTRUCT_2Q [
6834 (match_operand:VSTRUCT_2Q 1 "register_operand" "w")]
6837 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
6838 [(set_attr "type" "neon_store2_2reg<q>")]
6841 ;; RTL uses GCC vector extension indices, so flip only for assembly.
6842 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
6843 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6844 (unspec:BLK [(match_operand:VSTRUCT_2QD 1 "register_operand" "w")
6845 (match_operand:SI 2 "immediate_operand" "i")]
6849 operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
6850 INTVAL (operands[2]));
6851 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
6853 [(set_attr "type" "neon_store2_one_lane<q>")]
6856 (define_expand "vec_store_lanes<mode><vstruct_elt>"
6857 [(set (match_operand:VSTRUCT_2Q 0 "aarch64_simd_struct_operand")
6858 (unspec:VSTRUCT_2Q [(match_operand:VSTRUCT_2Q 1 "register_operand")]
6862 if (BYTES_BIG_ENDIAN)
6864 rtx tmp = gen_reg_rtx (<MODE>mode);
6865 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
6866 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
6867 emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
6868 emit_insn (gen_aarch64_simd_st2<vstruct_elt> (operands[0], tmp));
6871 emit_insn (gen_aarch64_simd_st2<vstruct_elt> (operands[0], operands[1]));
6875 (define_insn "aarch64_simd_ld3<vstruct_elt>"
6876 [(set (match_operand:VSTRUCT_3Q 0 "register_operand" "=w")
6877 (unspec:VSTRUCT_3Q [
6878 (match_operand:VSTRUCT_3Q 1 "aarch64_simd_struct_operand" "Utv")]
6881 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
6882 [(set_attr "type" "neon_load3_3reg<q>")]
6885 (define_insn "aarch64_simd_ld3r<vstruct_elt>"
6886 [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
6887 (unspec:VSTRUCT_3QD [
6888 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
6891 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
6892 [(set_attr "type" "neon_load3_all_lanes<q>")]
6895 (define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
6896 [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
6897 (unspec:VSTRUCT_3QD [
6898 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6899 (match_operand:VSTRUCT_3QD 2 "register_operand" "0")
6900 (match_operand:SI 3 "immediate_operand" "i")]
6904 operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
6905 INTVAL (operands[3]));
6906 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
6908 [(set_attr "type" "neon_load3_one_lane")]
6911 (define_expand "vec_load_lanes<mode><vstruct_elt>"
6912 [(set (match_operand:VSTRUCT_3Q 0 "register_operand")
6913 (unspec:VSTRUCT_3Q [
6914 (match_operand:VSTRUCT_3Q 1 "aarch64_simd_struct_operand")]
6918 if (BYTES_BIG_ENDIAN)
6920 rtx tmp = gen_reg_rtx (<MODE>mode);
6921 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
6922 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
6923 emit_insn (gen_aarch64_simd_ld3<vstruct_elt> (tmp, operands[1]));
6924 emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
6927 emit_insn (gen_aarch64_simd_ld3<vstruct_elt> (operands[0], operands[1]));
6931 (define_insn "aarch64_simd_st3<vstruct_elt>"
6932 [(set (match_operand:VSTRUCT_3Q 0 "aarch64_simd_struct_operand" "=Utv")
6933 (unspec:VSTRUCT_3Q [(match_operand:VSTRUCT_3Q 1 "register_operand" "w")]
6936 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
6937 [(set_attr "type" "neon_store3_3reg<q>")]
6940 ;; RTL uses GCC vector extension indices, so flip only for assembly.
6941 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
6942 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
6943 (unspec:BLK [(match_operand:VSTRUCT_3QD 1 "register_operand" "w")
6944 (match_operand:SI 2 "immediate_operand" "i")]
6948 operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
6949 INTVAL (operands[2]));
6950 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
6952 [(set_attr "type" "neon_store3_one_lane<q>")]
6955 (define_expand "vec_store_lanes<mode><vstruct_elt>"
6956 [(set (match_operand:VSTRUCT_3Q 0 "aarch64_simd_struct_operand")
6957 (unspec:VSTRUCT_3Q [
6958 (match_operand:VSTRUCT_3Q 1 "register_operand")]
6962 if (BYTES_BIG_ENDIAN)
6964 rtx tmp = gen_reg_rtx (<MODE>mode);
6965 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
6966 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
6967 emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
6968 emit_insn (gen_aarch64_simd_st3<vstruct_elt> (operands[0], tmp));
6971 emit_insn (gen_aarch64_simd_st3<vstruct_elt> (operands[0], operands[1]));
6975 (define_insn "aarch64_simd_ld4<vstruct_elt>"
6976 [(set (match_operand:VSTRUCT_4Q 0 "register_operand" "=w")
6977 (unspec:VSTRUCT_4Q [
6978 (match_operand:VSTRUCT_4Q 1 "aarch64_simd_struct_operand" "Utv")]
6981 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
6982 [(set_attr "type" "neon_load4_4reg<q>")]
6985 (define_insn "aarch64_simd_ld4r<vstruct_elt>"
6986 [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
6987 (unspec:VSTRUCT_4QD [
6988 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
6991 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
6992 [(set_attr "type" "neon_load4_all_lanes<q>")]
6995 (define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
6996 [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
6997 (unspec:VSTRUCT_4QD [
6998 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
6999 (match_operand:VSTRUCT_4QD 2 "register_operand" "0")
7000 (match_operand:SI 3 "immediate_operand" "i")]
7004 operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7005 INTVAL (operands[3]));
7006 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
7008 [(set_attr "type" "neon_load4_one_lane")]
7011 (define_expand "vec_load_lanes<mode><vstruct_elt>"
7012 [(set (match_operand:VSTRUCT_4Q 0 "register_operand")
7013 (unspec:VSTRUCT_4Q [
7014 (match_operand:VSTRUCT_4Q 1 "aarch64_simd_struct_operand")]
7018 if (BYTES_BIG_ENDIAN)
7020 rtx tmp = gen_reg_rtx (<MODE>mode);
7021 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7022 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7023 emit_insn (gen_aarch64_simd_ld4<vstruct_elt> (tmp, operands[1]));
7024 emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
7027 emit_insn (gen_aarch64_simd_ld4<vstruct_elt> (operands[0], operands[1]));
7031 (define_insn "aarch64_simd_st4<vstruct_elt>"
7032 [(set (match_operand:VSTRUCT_4Q 0 "aarch64_simd_struct_operand" "=Utv")
7033 (unspec:VSTRUCT_4Q [
7034 (match_operand:VSTRUCT_4Q 1 "register_operand" "w")]
7037 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
7038 [(set_attr "type" "neon_store4_4reg<q>")]
7041 ;; RTL uses GCC vector extension indices, so flip only for assembly.
7042 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
7043 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
7044 (unspec:BLK [(match_operand:VSTRUCT_4QD 1 "register_operand" "w")
7045 (match_operand:SI 2 "immediate_operand" "i")]
7049 operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7050 INTVAL (operands[2]));
7051 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
7053 [(set_attr "type" "neon_store4_one_lane<q>")]
7056 (define_expand "vec_store_lanes<mode><vstruct_elt>"
7057 [(set (match_operand:VSTRUCT_4Q 0 "aarch64_simd_struct_operand")
7058 (unspec:VSTRUCT_4Q [(match_operand:VSTRUCT_4Q 1 "register_operand")]
7062 if (BYTES_BIG_ENDIAN)
7064 rtx tmp = gen_reg_rtx (<MODE>mode);
7065 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7066 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7067 emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
7068 emit_insn (gen_aarch64_simd_st4<vstruct_elt> (operands[0], tmp));
7071 emit_insn (gen_aarch64_simd_st4<vstruct_elt> (operands[0], operands[1]));
7075 (define_insn_and_split "aarch64_rev_reglist<mode>"
7076 [(set (match_operand:VSTRUCT_QD 0 "register_operand" "=&w")
7078 [(match_operand:VSTRUCT_QD 1 "register_operand" "w")
7079 (match_operand:V16QI 2 "register_operand" "w")]
7080 UNSPEC_REV_REGLIST))]
7083 "&& reload_completed"
7087 int nregs = GET_MODE_SIZE (<MODE>mode).to_constant () / UNITS_PER_VREG;
7088 for (i = 0; i < nregs; i++)
7090 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
7091 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
7092 emit_insn (gen_aarch64_qtbl1v16qi (op0, op1, operands[2]));
7096 [(set_attr "type" "neon_tbl1_q")
7097 (set_attr "length" "<insn_count>")]
7100 ;; Reload patterns for AdvSIMD register list operands.
7102 (define_expand "mov<mode>"
7103 [(set (match_operand:VSTRUCT_QD 0 "nonimmediate_operand")
7104 (match_operand:VSTRUCT_QD 1 "general_operand"))]
7107 if (can_create_pseudo_p ())
7109 if (GET_CODE (operands[0]) != REG)
7110 operands[1] = force_reg (<MODE>mode, operands[1]);
7114 (define_expand "mov<mode>"
7115 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
7116 (match_operand:VSTRUCT 1 "general_operand"))]
7119 if (can_create_pseudo_p ())
7121 if (GET_CODE (operands[0]) != REG)
7122 operands[1] = force_reg (<MODE>mode, operands[1]);
7126 (define_expand "movv8di"
7127 [(set (match_operand:V8DI 0 "nonimmediate_operand")
7128 (match_operand:V8DI 1 "general_operand"))]
7131 if (can_create_pseudo_p () && MEM_P (operands[0]))
7132 operands[1] = force_reg (V8DImode, operands[1]);
7135 (define_expand "aarch64_ld1x3<vstruct_elt>"
7136 [(match_operand:VSTRUCT_3QD 0 "register_operand")
7137 (match_operand:DI 1 "register_operand")]
7140 rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
7141 emit_insn (gen_aarch64_ld1_x3_<vstruct_elt> (operands[0], mem));
7145 (define_insn "aarch64_ld1_x3_<vstruct_elt>"
7146 [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
7148 [(match_operand:VSTRUCT_3QD 1 "aarch64_simd_struct_operand" "Utv")]
7151 "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
7152 [(set_attr "type" "neon_load1_3reg<q>")]
7155 (define_expand "aarch64_ld1x4<vstruct_elt>"
7156 [(match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
7157 (match_operand:DI 1 "register_operand" "r")]
7160 rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
7161 emit_insn (gen_aarch64_ld1_x4_<vstruct_elt> (operands[0], mem));
7165 (define_insn "aarch64_ld1_x4_<vstruct_elt>"
7166 [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
7168 [(match_operand:VSTRUCT_4QD 1 "aarch64_simd_struct_operand" "Utv")]
7171 "ld1\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
7172 [(set_attr "type" "neon_load1_4reg<q>")]
7175 (define_expand "aarch64_st1x2<vstruct_elt>"
7176 [(match_operand:DI 0 "register_operand")
7177 (match_operand:VSTRUCT_2QD 1 "register_operand")]
7180 rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7181 emit_insn (gen_aarch64_st1_x2_<vstruct_elt> (mem, operands[1]));
7185 (define_insn "aarch64_st1_x2_<vstruct_elt>"
7186 [(set (match_operand:VSTRUCT_2QD 0 "aarch64_simd_struct_operand" "=Utv")
7188 [(match_operand:VSTRUCT_2QD 1 "register_operand" "w")]
7191 "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
7192 [(set_attr "type" "neon_store1_2reg<q>")]
7195 (define_expand "aarch64_st1x3<vstruct_elt>"
7196 [(match_operand:DI 0 "register_operand")
7197 (match_operand:VSTRUCT_3QD 1 "register_operand")]
7200 rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7201 emit_insn (gen_aarch64_st1_x3_<vstruct_elt> (mem, operands[1]));
7205 (define_insn "aarch64_st1_x3_<vstruct_elt>"
7206 [(set (match_operand:VSTRUCT_3QD 0 "aarch64_simd_struct_operand" "=Utv")
7208 [(match_operand:VSTRUCT_3QD 1 "register_operand" "w")]
7211 "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
7212 [(set_attr "type" "neon_store1_3reg<q>")]
7215 (define_expand "aarch64_st1x4<vstruct_elt>"
7216 [(match_operand:DI 0 "register_operand" "")
7217 (match_operand:VSTRUCT_4QD 1 "register_operand" "")]
7220 rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7221 emit_insn (gen_aarch64_st1_x4_<vstruct_elt> (mem, operands[1]));
7225 (define_insn "aarch64_st1_x4_<vstruct_elt>"
7226 [(set (match_operand:VSTRUCT_4QD 0 "aarch64_simd_struct_operand" "=Utv")
7228 [(match_operand:VSTRUCT_4QD 1 "register_operand" "w")]
7231 "st1\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
7232 [(set_attr "type" "neon_store1_4reg<q>")]
7235 (define_insn "*aarch64_mov<mode>"
7236 [(set (match_operand:VSTRUCT_QD 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
7237 (match_operand:VSTRUCT_QD 1 "aarch64_simd_general_operand" " w,w,Utv"))]
7238 "TARGET_SIMD && !BYTES_BIG_ENDIAN
7239 && (register_operand (operands[0], <MODE>mode)
7240 || register_operand (operands[1], <MODE>mode))"
7243 st1\\t{%S1.<Vtype> - %<Vendreg>1.<Vtype>}, %0
7244 ld1\\t{%S0.<Vtype> - %<Vendreg>0.<Vtype>}, %1"
7245 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
7246 neon_load<nregs>_<nregs>reg_q")
7247 (set_attr "length" "<insn_count>,4,4")]
7250 (define_insn "*aarch64_mov<mode>"
7251 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
7252 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
7253 "TARGET_SIMD && !BYTES_BIG_ENDIAN
7254 && (register_operand (operands[0], <MODE>mode)
7255 || register_operand (operands[1], <MODE>mode))"
7258 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
7259 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
7260 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
7261 neon_load<nregs>_<nregs>reg_q")
7262 (set_attr "length" "<insn_count>,4,4")]
7265 (define_insn "*aarch64_movv8di"
7266 [(set (match_operand:V8DI 0 "nonimmediate_operand" "=r,m,r")
7267 (match_operand:V8DI 1 "general_operand" " r,r,m"))]
7269 && (register_operand (operands[0], V8DImode)
7270 || register_operand (operands[1], V8DImode))"
7272 [(set_attr "type" "multiple,multiple,multiple")
7273 (set_attr "length" "32,16,16")]
7276 (define_insn "aarch64_be_ld1<mode>"
7277 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
7278 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
7279 "aarch64_simd_struct_operand" "Utv")]
7282 "ld1\\t{%0<Vmtype>}, %1"
7283 [(set_attr "type" "neon_load1_1reg<q>")]
7286 (define_insn "aarch64_be_st1<mode>"
7287 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
7288 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
7291 "st1\\t{%1<Vmtype>}, %0"
7292 [(set_attr "type" "neon_store1_1reg<q>")]
7295 (define_insn "*aarch64_be_mov<mode>"
7296 [(set (match_operand:VSTRUCT_2D 0 "nonimmediate_operand" "=w,m,w")
7297 (match_operand:VSTRUCT_2D 1 "general_operand" " w,w,m"))]
7298 "TARGET_SIMD && BYTES_BIG_ENDIAN
7299 && (register_operand (operands[0], <MODE>mode)
7300 || register_operand (operands[1], <MODE>mode))"
7305 [(set_attr "type" "multiple,neon_stp,neon_ldp")
7306 (set_attr "length" "8,4,4")]
7309 (define_insn "*aarch64_be_mov<mode>"
7310 [(set (match_operand:VSTRUCT_2Q 0 "nonimmediate_operand" "=w,m,w")
7311 (match_operand:VSTRUCT_2Q 1 "general_operand" " w,w,m"))]
7312 "TARGET_SIMD && BYTES_BIG_ENDIAN
7313 && (register_operand (operands[0], <MODE>mode)
7314 || register_operand (operands[1], <MODE>mode))"
7319 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
7320 (set_attr "length" "8,4,4")]
7323 (define_insn "*aarch64_be_movoi"
7324 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
7325 (match_operand:OI 1 "general_operand" " w,w,m"))]
7326 "TARGET_SIMD && BYTES_BIG_ENDIAN
7327 && (register_operand (operands[0], OImode)
7328 || register_operand (operands[1], OImode))"
7333 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
7334 (set_attr "length" "8,4,4")]
7337 (define_insn "*aarch64_be_mov<mode>"
7338 [(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand" "=w,o,w")
7339 (match_operand:VSTRUCT_3QD 1 "general_operand" " w,w,o"))]
7340 "TARGET_SIMD && BYTES_BIG_ENDIAN
7341 && (register_operand (operands[0], <MODE>mode)
7342 || register_operand (operands[1], <MODE>mode))"
7344 [(set_attr "type" "multiple")
7345 (set_attr "length" "12,8,8")]
7348 (define_insn "*aarch64_be_movci"
7349 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
7350 (match_operand:CI 1 "general_operand" " w,w,o"))]
7351 "TARGET_SIMD && BYTES_BIG_ENDIAN
7352 && (register_operand (operands[0], CImode)
7353 || register_operand (operands[1], CImode))"
7355 [(set_attr "type" "multiple")
7356 (set_attr "length" "12,4,4")]
7359 (define_insn "*aarch64_be_mov<mode>"
7360 [(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand" "=w,o,w")
7361 (match_operand:VSTRUCT_4QD 1 "general_operand" " w,w,o"))]
7362 "TARGET_SIMD && BYTES_BIG_ENDIAN
7363 && (register_operand (operands[0], <MODE>mode)
7364 || register_operand (operands[1], <MODE>mode))"
7366 [(set_attr "type" "multiple")
7367 (set_attr "length" "16,8,8")]
7370 (define_insn "*aarch64_be_movxi"
7371 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
7372 (match_operand:XI 1 "general_operand" " w,w,o"))]
7373 "TARGET_SIMD && BYTES_BIG_ENDIAN
7374 && (register_operand (operands[0], XImode)
7375 || register_operand (operands[1], XImode))"
7377 [(set_attr "type" "multiple")
7378 (set_attr "length" "16,4,4")]
7382 [(set (match_operand:VSTRUCT_2QD 0 "register_operand")
7383 (match_operand:VSTRUCT_2QD 1 "register_operand"))]
7384 "TARGET_SIMD && reload_completed"
7387 aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 2);
7392 [(set (match_operand:OI 0 "register_operand")
7393 (match_operand:OI 1 "register_operand"))]
7394 "TARGET_SIMD && reload_completed"
7397 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
7402 [(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand")
7403 (match_operand:VSTRUCT_3QD 1 "general_operand"))]
7404 "TARGET_SIMD && reload_completed"
7407 if (register_operand (operands[0], <MODE>mode)
7408 && register_operand (operands[1], <MODE>mode))
7410 aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 3);
7413 else if (BYTES_BIG_ENDIAN)
7415 int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
7416 machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
7417 emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
7419 simplify_gen_subreg (pair_mode, operands[1],
7421 emit_move_insn (gen_lowpart (<VSTRUCT_ELT>mode,
7422 simplify_gen_subreg (<VSTRUCT_ELT>mode,
7426 gen_lowpart (<VSTRUCT_ELT>mode,
7427 simplify_gen_subreg (<VSTRUCT_ELT>mode,
7438 [(set (match_operand:CI 0 "nonimmediate_operand")
7439 (match_operand:CI 1 "general_operand"))]
7440 "TARGET_SIMD && reload_completed"
7443 if (register_operand (operands[0], CImode)
7444 && register_operand (operands[1], CImode))
7446 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
7449 else if (BYTES_BIG_ENDIAN)
7451 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
7452 simplify_gen_subreg (OImode, operands[1], CImode, 0));
7453 emit_move_insn (gen_lowpart (V16QImode,
7454 simplify_gen_subreg (TImode, operands[0],
7456 gen_lowpart (V16QImode,
7457 simplify_gen_subreg (TImode, operands[1],
7466 [(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand")
7467 (match_operand:VSTRUCT_4QD 1 "general_operand"))]
7468 "TARGET_SIMD && reload_completed"
7471 if (register_operand (operands[0], <MODE>mode)
7472 && register_operand (operands[1], <MODE>mode))
7474 aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 4);
7477 else if (BYTES_BIG_ENDIAN)
7479 int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
7480 machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
7481 emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
7483 simplify_gen_subreg (pair_mode, operands[1],
7485 emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
7486 <MODE>mode, 2 * elt_size),
7487 simplify_gen_subreg (pair_mode, operands[1],
7488 <MODE>mode, 2 * elt_size));
7496 [(set (match_operand:XI 0 "nonimmediate_operand")
7497 (match_operand:XI 1 "general_operand"))]
7498 "TARGET_SIMD && reload_completed"
7501 if (register_operand (operands[0], XImode)
7502 && register_operand (operands[1], XImode))
7504 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
7507 else if (BYTES_BIG_ENDIAN)
7509 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
7510 simplify_gen_subreg (OImode, operands[1], XImode, 0));
7511 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
7512 simplify_gen_subreg (OImode, operands[1], XImode, 32));
7520 [(set (match_operand:V8DI 0 "nonimmediate_operand")
7521 (match_operand:V8DI 1 "general_operand"))]
7522 "TARGET_SIMD && reload_completed"
7525 if (register_operand (operands[0], V8DImode)
7526 && register_operand (operands[1], V8DImode))
7528 aarch64_simd_emit_reg_reg_move (operands, DImode, 8);
7531 else if ((register_operand (operands[0], V8DImode)
7532 && memory_operand (operands[1], V8DImode))
7533 || (memory_operand (operands[0], V8DImode)
7534 && register_operand (operands[1], V8DImode)))
7536 for (int offset = 0; offset < 64; offset += 16)
7537 emit_move_insn (simplify_gen_subreg (TImode, operands[0],
7539 simplify_gen_subreg (TImode, operands[1],
7547 (define_expand "aarch64_ld<nregs>r<vstruct_elt>"
7548 [(match_operand:VSTRUCT_QD 0 "register_operand")
7549 (match_operand:DI 1 "register_operand")]
7552 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
7553 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
7555 emit_insn (gen_aarch64_simd_ld<nregs>r<vstruct_elt> (operands[0], mem));
7559 (define_insn "aarch64_ld2<vstruct_elt>_dreg"
7560 [(set (match_operand:VSTRUCT_2DNX 0 "register_operand" "=w")
7561 (unspec:VSTRUCT_2DNX [
7562 (match_operand:VSTRUCT_2DNX 1 "aarch64_simd_struct_operand" "Utv")]
7565 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
7566 [(set_attr "type" "neon_load2_2reg<q>")]
7569 (define_insn "aarch64_ld2<vstruct_elt>_dreg"
7570 [(set (match_operand:VSTRUCT_2DX 0 "register_operand" "=w")
7571 (unspec:VSTRUCT_2DX [
7572 (match_operand:VSTRUCT_2DX 1 "aarch64_simd_struct_operand" "Utv")]
7575 "ld1\\t{%S0.1d - %T0.1d}, %1"
7576 [(set_attr "type" "neon_load1_2reg<q>")]
7579 (define_insn "aarch64_ld3<vstruct_elt>_dreg"
7580 [(set (match_operand:VSTRUCT_3DNX 0 "register_operand" "=w")
7581 (unspec:VSTRUCT_3DNX [
7582 (match_operand:VSTRUCT_3DNX 1 "aarch64_simd_struct_operand" "Utv")]
7585 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
7586 [(set_attr "type" "neon_load3_3reg<q>")]
7589 (define_insn "aarch64_ld3<vstruct_elt>_dreg"
7590 [(set (match_operand:VSTRUCT_3DX 0 "register_operand" "=w")
7591 (unspec:VSTRUCT_3DX [
7592 (match_operand:VSTRUCT_3DX 1 "aarch64_simd_struct_operand" "Utv")]
7595 "ld1\\t{%S0.1d - %U0.1d}, %1"
7596 [(set_attr "type" "neon_load1_3reg<q>")]
7599 (define_insn "aarch64_ld4<vstruct_elt>_dreg"
7600 [(set (match_operand:VSTRUCT_4DNX 0 "register_operand" "=w")
7601 (unspec:VSTRUCT_4DNX [
7602 (match_operand:VSTRUCT_4DNX 1 "aarch64_simd_struct_operand" "Utv")]
7605 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
7606 [(set_attr "type" "neon_load4_4reg<q>")]
7609 (define_insn "aarch64_ld4<vstruct_elt>_dreg"
7610 [(set (match_operand:VSTRUCT_4DX 0 "register_operand" "=w")
7611 (unspec:VSTRUCT_4DX [
7612 (match_operand:VSTRUCT_4DX 1 "aarch64_simd_struct_operand" "Utv")]
7615 "ld1\\t{%S0.1d - %V0.1d}, %1"
7616 [(set_attr "type" "neon_load1_4reg<q>")]
7619 (define_expand "aarch64_ld<nregs><vstruct_elt>"
7620 [(match_operand:VSTRUCT_D 0 "register_operand")
7621 (match_operand:DI 1 "register_operand")]
7624 rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
7625 emit_insn (gen_aarch64_ld<nregs><vstruct_elt>_dreg (operands[0], mem));
7629 (define_expand "aarch64_ld1<VALL_F16:mode>"
7630 [(match_operand:VALL_F16 0 "register_operand")
7631 (match_operand:DI 1 "register_operand")]
7634 machine_mode mode = <VALL_F16:MODE>mode;
7635 rtx mem = gen_rtx_MEM (mode, operands[1]);
7637 if (BYTES_BIG_ENDIAN)
7638 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
7640 emit_move_insn (operands[0], mem);
7644 (define_expand "aarch64_ld<nregs><vstruct_elt>"
7645 [(match_operand:VSTRUCT_Q 0 "register_operand")
7646 (match_operand:DI 1 "register_operand")]
7649 rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
7650 emit_insn (gen_aarch64_simd_ld<nregs><vstruct_elt> (operands[0], mem));
7654 (define_expand "aarch64_ld1x2<vstruct_elt>"
7655 [(match_operand:VSTRUCT_2QD 0 "register_operand")
7656 (match_operand:DI 1 "register_operand")]
7659 machine_mode mode = <MODE>mode;
7660 rtx mem = gen_rtx_MEM (mode, operands[1]);
7662 emit_insn (gen_aarch64_simd_ld1<vstruct_elt>_x2 (operands[0], mem));
7666 (define_expand "aarch64_ld<nregs>_lane<vstruct_elt>"
7667 [(match_operand:VSTRUCT_QD 0 "register_operand")
7668 (match_operand:DI 1 "register_operand")
7669 (match_operand:VSTRUCT_QD 2 "register_operand")
7670 (match_operand:SI 3 "immediate_operand")]
7673 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
7674 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
7676 aarch64_simd_lane_bounds (operands[3], 0,
7677 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>, NULL);
7678 emit_insn (gen_aarch64_vec_load_lanes<mode>_lane<vstruct_elt> (operands[0],
7679 mem, operands[2], operands[3]));
7683 ;; Permuted-store expanders for neon intrinsics.
7685 ;; Permute instructions
7689 (define_expand "vec_perm<mode>"
7690 [(match_operand:VB 0 "register_operand")
7691 (match_operand:VB 1 "register_operand")
7692 (match_operand:VB 2 "register_operand")
7693 (match_operand:VB 3 "register_operand")]
7696 aarch64_expand_vec_perm (operands[0], operands[1],
7697 operands[2], operands[3], <nunits>);
7701 (define_insn "aarch64_qtbl1<mode>"
7702 [(set (match_operand:VB 0 "register_operand" "=w")
7703 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
7704 (match_operand:VB 2 "register_operand" "w")]
7707 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
7708 [(set_attr "type" "neon_tbl1<q>")]
7711 (define_insn "aarch64_qtbx1<mode>"
7712 [(set (match_operand:VB 0 "register_operand" "=w")
7713 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
7714 (match_operand:V16QI 2 "register_operand" "w")
7715 (match_operand:VB 3 "register_operand" "w")]
7718 "tbx\\t%0.<Vtype>, {%2.16b}, %3.<Vtype>"
7719 [(set_attr "type" "neon_tbl1<q>")]
7722 ;; Two source registers.
7724 (define_insn "aarch64_qtbl2<mode>"
7725 [(set (match_operand:VB 0 "register_operand" "=w")
7726 (unspec:VB [(match_operand:V2x16QI 1 "register_operand" "w")
7727 (match_operand:VB 2 "register_operand" "w")]
7730 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
7731 [(set_attr "type" "neon_tbl2")]
7734 (define_insn "aarch64_qtbx2<mode>"
7735 [(set (match_operand:VB 0 "register_operand" "=w")
7736 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
7737 (match_operand:V2x16QI 2 "register_operand" "w")
7738 (match_operand:VB 3 "register_operand" "w")]
7741 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
7742 [(set_attr "type" "neon_tbl2")]
7745 ;; Three source registers.
7747 (define_insn "aarch64_qtbl3<mode>"
7748 [(set (match_operand:VB 0 "register_operand" "=w")
7749 (unspec:VB [(match_operand:V3x16QI 1 "register_operand" "w")
7750 (match_operand:VB 2 "register_operand" "w")]
7753 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
7754 [(set_attr "type" "neon_tbl3")]
7757 (define_insn "aarch64_qtbx3<mode>"
7758 [(set (match_operand:VB 0 "register_operand" "=w")
7759 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
7760 (match_operand:V3x16QI 2 "register_operand" "w")
7761 (match_operand:VB 3 "register_operand" "w")]
7764 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
7765 [(set_attr "type" "neon_tbl3")]
7768 ;; Four source registers.
7770 (define_insn "aarch64_qtbl4<mode>"
7771 [(set (match_operand:VB 0 "register_operand" "=w")
7772 (unspec:VB [(match_operand:V4x16QI 1 "register_operand" "w")
7773 (match_operand:VB 2 "register_operand" "w")]
7776 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
7777 [(set_attr "type" "neon_tbl4")]
7780 (define_insn "aarch64_qtbx4<mode>"
7781 [(set (match_operand:VB 0 "register_operand" "=w")
7782 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
7783 (match_operand:V4x16QI 2 "register_operand" "w")
7784 (match_operand:VB 3 "register_operand" "w")]
7787 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
7788 [(set_attr "type" "neon_tbl4")]
7791 (define_insn_and_split "aarch64_combinev16qi"
7792 [(set (match_operand:V2x16QI 0 "register_operand" "=w")
7793 (unspec:V2x16QI [(match_operand:V16QI 1 "register_operand" "w")
7794 (match_operand:V16QI 2 "register_operand" "w")]
7798 "&& reload_completed"
7801 aarch64_split_combinev16qi (operands);
7804 [(set_attr "type" "multiple")]
7807 ;; This instruction's pattern is generated directly by
7808 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
7809 ;; need corresponding changes there.
7810 (define_insn "aarch64_<PERMUTE:perm_insn><mode>"
7811 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
7812 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
7813 (match_operand:VALL_F16 2 "register_operand" "w")]
7816 "<PERMUTE:perm_insn>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
7817 [(set_attr "type" "neon_permute<q>")]
7820 ;; This instruction's pattern is generated directly by
7821 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
7822 ;; need corresponding changes there. Note that the immediate (third)
7823 ;; operand is a lane index not a byte index.
7824 (define_insn "aarch64_ext<mode>"
7825 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
7826 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
7827 (match_operand:VALL_F16 2 "register_operand" "w")
7828 (match_operand:SI 3 "immediate_operand" "i")]
7832 operands[3] = GEN_INT (INTVAL (operands[3])
7833 * GET_MODE_UNIT_SIZE (<MODE>mode));
7834 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
7836 [(set_attr "type" "neon_ext<q>")]
7839 ;; This instruction's pattern is generated directly by
7840 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
7841 ;; need corresponding changes there.
7842 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
7843 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
7844 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
7847 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
7848 [(set_attr "type" "neon_rev<q>")]
7851 (define_insn "aarch64_st2<vstruct_elt>_dreg"
7852 [(set (match_operand:VSTRUCT_2DNX 0 "aarch64_simd_struct_operand" "=Utv")
7853 (unspec:VSTRUCT_2DNX [
7854 (match_operand:VSTRUCT_2DNX 1 "register_operand" "w")]
7857 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
7858 [(set_attr "type" "neon_store2_2reg")]
7861 (define_insn "aarch64_st2<vstruct_elt>_dreg"
7862 [(set (match_operand:VSTRUCT_2DX 0 "aarch64_simd_struct_operand" "=Utv")
7863 (unspec:VSTRUCT_2DX [
7864 (match_operand:VSTRUCT_2DX 1 "register_operand" "w")]
7867 "st1\\t{%S1.1d - %T1.1d}, %0"
7868 [(set_attr "type" "neon_store1_2reg")]
7871 (define_insn "aarch64_st3<vstruct_elt>_dreg"
7872 [(set (match_operand:VSTRUCT_3DNX 0 "aarch64_simd_struct_operand" "=Utv")
7873 (unspec:VSTRUCT_3DNX [
7874 (match_operand:VSTRUCT_3DNX 1 "register_operand" "w")]
7877 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
7878 [(set_attr "type" "neon_store3_3reg")]
7881 (define_insn "aarch64_st3<vstruct_elt>_dreg"
7882 [(set (match_operand:VSTRUCT_3DX 0 "aarch64_simd_struct_operand" "=Utv")
7883 (unspec:VSTRUCT_3DX [
7884 (match_operand:VSTRUCT_3DX 1 "register_operand" "w")]
7887 "st1\\t{%S1.1d - %U1.1d}, %0"
7888 [(set_attr "type" "neon_store1_3reg")]
7891 (define_insn "aarch64_st4<vstruct_elt>_dreg"
7892 [(set (match_operand:VSTRUCT_4DNX 0 "aarch64_simd_struct_operand" "=Utv")
7893 (unspec:VSTRUCT_4DNX [
7894 (match_operand:VSTRUCT_4DNX 1 "register_operand" "w")]
7897 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
7898 [(set_attr "type" "neon_store4_4reg")]
7901 (define_insn "aarch64_st4<vstruct_elt>_dreg"
7902 [(set (match_operand:VSTRUCT_4DX 0 "aarch64_simd_struct_operand" "=Utv")
7903 (unspec:VSTRUCT_4DX [
7904 (match_operand:VSTRUCT_4DX 1 "register_operand" "w")]
7907 "st1\\t{%S1.1d - %V1.1d}, %0"
7908 [(set_attr "type" "neon_store1_4reg")]
7911 (define_expand "aarch64_st<nregs><vstruct_elt>"
7912 [(match_operand:DI 0 "register_operand")
7913 (match_operand:VSTRUCT_D 1 "register_operand")]
7916 rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7917 emit_insn (gen_aarch64_st<nregs><vstruct_elt>_dreg (mem, operands[1]));
7921 (define_expand "aarch64_st<nregs><vstruct_elt>"
7922 [(match_operand:DI 0 "register_operand")
7923 (match_operand:VSTRUCT_Q 1 "register_operand")]
7926 rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
7927 emit_insn (gen_aarch64_simd_st<nregs><vstruct_elt> (mem, operands[1]));
7931 (define_expand "aarch64_st<nregs>_lane<vstruct_elt>"
7932 [(match_operand:DI 0 "register_operand")
7933 (match_operand:VSTRUCT_QD 1 "register_operand")
7934 (match_operand:SI 2 "immediate_operand")]
7937 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
7938 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
7940 aarch64_simd_lane_bounds (operands[2], 0,
7941 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>, NULL);
7942 emit_insn (gen_aarch64_vec_store_lanes<mode>_lane<vstruct_elt> (mem,
7943 operands[1], operands[2]));
7947 (define_expand "aarch64_st1<VALL_F16:mode>"
7948 [(match_operand:DI 0 "register_operand")
7949 (match_operand:VALL_F16 1 "register_operand")]
7952 machine_mode mode = <VALL_F16:MODE>mode;
7953 rtx mem = gen_rtx_MEM (mode, operands[0]);
7955 if (BYTES_BIG_ENDIAN)
7956 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
7958 emit_move_insn (mem, operands[1]);
7962 ;; Standard pattern name vec_init<mode><Vel>.
7964 (define_expand "vec_init<mode><Vel>"
7965 [(match_operand:VALL_F16 0 "register_operand")
7966 (match_operand 1 "" "")]
7969 aarch64_expand_vector_init (operands[0], operands[1]);
7973 (define_expand "vec_init<mode><Vhalf>"
7974 [(match_operand:VQ_NO2E 0 "register_operand")
7975 (match_operand 1 "" "")]
7978 aarch64_expand_vector_init (operands[0], operands[1]);
7982 (define_insn "*aarch64_simd_ld1r<mode>"
7983 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
7984 (vec_duplicate:VALL_F16
7985 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
7987 "ld1r\\t{%0.<Vtype>}, %1"
7988 [(set_attr "type" "neon_load1_all_lanes")]
7991 (define_insn "aarch64_simd_ld1<vstruct_elt>_x2"
7992 [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
7993 (unspec:VSTRUCT_2QD [
7994 (match_operand:VSTRUCT_2QD 1 "aarch64_simd_struct_operand" "Utv")]
7997 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
7998 [(set_attr "type" "neon_load1_2reg<q>")]
8002 (define_insn "@aarch64_frecpe<mode>"
8003 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
8005 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
8008 "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
8009 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
8012 (define_insn "aarch64_frecpx<mode>"
8013 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
8014 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
8017 "frecpx\t%<s>0, %<s>1"
8018 [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
8021 (define_insn "@aarch64_frecps<mode>"
8022 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
8024 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
8025 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
8028 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
8029 [(set_attr "type" "neon_fp_recps_<stype><q>")]
8032 (define_insn "aarch64_urecpe<mode>"
8033 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
8034 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
8037 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
8038 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
8040 ;; Standard pattern name vec_extract<mode><Vel>.
8042 (define_expand "vec_extract<mode><Vel>"
8043 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
8044 (match_operand:VALL_F16 1 "register_operand")
8045 (match_operand:SI 2 "immediate_operand")]
8049 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
8053 ;; Extract a 64-bit vector from one half of a 128-bit vector.
8054 (define_expand "vec_extract<mode><Vhalf>"
8055 [(match_operand:<VHALF> 0 "register_operand")
8056 (match_operand:VQMOV_NO2E 1 "register_operand")
8057 (match_operand 2 "immediate_operand")]
8060 int start = INTVAL (operands[2]);
8061 if (start != 0 && start != <nunits> / 2)
8063 rtx sel = aarch64_gen_stepped_int_parallel (<nunits> / 2, start, 1);
8064 emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], sel));
8068 ;; Extract a single-element 64-bit vector from one half of a 128-bit vector.
8069 (define_expand "vec_extractv2dfv1df"
8070 [(match_operand:V1DF 0 "register_operand")
8071 (match_operand:V2DF 1 "register_operand")
8072 (match_operand 2 "immediate_operand")]
8075 /* V1DF is rarely used by other patterns, so it should be better to hide
8076 it in a subreg destination of a normal DF op. */
8077 rtx scalar0 = gen_lowpart (DFmode, operands[0]);
8078 emit_insn (gen_vec_extractv2dfdf (scalar0, operands[1], operands[2]));
8084 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
8085 [(set (match_operand:V16QI 0 "register_operand" "=w")
8088 (match_operand:V16QI 1 "register_operand" "%0")
8089 (match_operand:V16QI 2 "register_operand" "w"))]
8091 "TARGET_SIMD && TARGET_AES"
8092 "aes<aes_op>\\t%0.16b, %2.16b"
8093 [(set_attr "type" "crypto_aese")]
8096 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
8097 [(set (match_operand:V16QI 0 "register_operand" "=w")
8098 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
8100 "TARGET_SIMD && TARGET_AES"
8101 "aes<aesmc_op>\\t%0.16b, %1.16b"
8102 [(set_attr "type" "crypto_aesmc")]
8105 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
8106 ;; and enforce the register dependency without scheduling or register
8107 ;; allocation messing up the order or introducing moves inbetween.
8108 ;; Mash the two together during combine.
8110 (define_insn "*aarch64_crypto_aese_fused"
8111 [(set (match_operand:V16QI 0 "register_operand" "=w")
8115 (match_operand:V16QI 1 "register_operand" "%0")
8116 (match_operand:V16QI 2 "register_operand" "w"))]
8119 "TARGET_SIMD && TARGET_AES
8120 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
8121 "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
8122 [(set_attr "type" "crypto_aese")
8123 (set_attr "length" "8")]
8126 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
8127 ;; and enforce the register dependency without scheduling or register
8128 ;; allocation messing up the order or introducing moves inbetween.
8129 ;; Mash the two together during combine.
8131 (define_insn "*aarch64_crypto_aesd_fused"
8132 [(set (match_operand:V16QI 0 "register_operand" "=w")
8136 (match_operand:V16QI 1 "register_operand" "%0")
8137 (match_operand:V16QI 2 "register_operand" "w"))]
8140 "TARGET_SIMD && TARGET_AES
8141 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
8142 "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
8143 [(set_attr "type" "crypto_aese")
8144 (set_attr "length" "8")]
8149 (define_insn "aarch64_crypto_sha1hsi"
8150 [(set (match_operand:SI 0 "register_operand" "=w")
8151 (unspec:SI [(match_operand:SI 1
8152 "register_operand" "w")]
8154 "TARGET_SIMD && TARGET_SHA2"
8156 [(set_attr "type" "crypto_sha1_fast")]
8159 (define_insn "aarch64_crypto_sha1hv4si"
8160 [(set (match_operand:SI 0 "register_operand" "=w")
8161 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
8162 (parallel [(const_int 0)]))]
8164 "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
8166 [(set_attr "type" "crypto_sha1_fast")]
8169 (define_insn "aarch64_be_crypto_sha1hv4si"
8170 [(set (match_operand:SI 0 "register_operand" "=w")
8171 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
8172 (parallel [(const_int 3)]))]
8174 "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
8176 [(set_attr "type" "crypto_sha1_fast")]
8179 (define_insn "aarch64_crypto_sha1su1v4si"
8180 [(set (match_operand:V4SI 0 "register_operand" "=w")
8181 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8182 (match_operand:V4SI 2 "register_operand" "w")]
8184 "TARGET_SIMD && TARGET_SHA2"
8185 "sha1su1\\t%0.4s, %2.4s"
8186 [(set_attr "type" "crypto_sha1_fast")]
8189 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
8190 [(set (match_operand:V4SI 0 "register_operand" "=w")
8191 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8192 (match_operand:SI 2 "register_operand" "w")
8193 (match_operand:V4SI 3 "register_operand" "w")]
8195 "TARGET_SIMD && TARGET_SHA2"
8196 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
8197 [(set_attr "type" "crypto_sha1_slow")]
8200 (define_insn "aarch64_crypto_sha1su0v4si"
8201 [(set (match_operand:V4SI 0 "register_operand" "=w")
8202 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8203 (match_operand:V4SI 2 "register_operand" "w")
8204 (match_operand:V4SI 3 "register_operand" "w")]
8206 "TARGET_SIMD && TARGET_SHA2"
8207 "sha1su0\\t%0.4s, %2.4s, %3.4s"
8208 [(set_attr "type" "crypto_sha1_xor")]
8213 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
8214 [(set (match_operand:V4SI 0 "register_operand" "=w")
8215 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8216 (match_operand:V4SI 2 "register_operand" "w")
8217 (match_operand:V4SI 3 "register_operand" "w")]
8219 "TARGET_SIMD && TARGET_SHA2"
8220 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
8221 [(set_attr "type" "crypto_sha256_slow")]
8224 (define_insn "aarch64_crypto_sha256su0v4si"
8225 [(set (match_operand:V4SI 0 "register_operand" "=w")
8226 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8227 (match_operand:V4SI 2 "register_operand" "w")]
8229 "TARGET_SIMD && TARGET_SHA2"
8230 "sha256su0\\t%0.4s, %2.4s"
8231 [(set_attr "type" "crypto_sha256_fast")]
8234 (define_insn "aarch64_crypto_sha256su1v4si"
8235 [(set (match_operand:V4SI 0 "register_operand" "=w")
8236 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8237 (match_operand:V4SI 2 "register_operand" "w")
8238 (match_operand:V4SI 3 "register_operand" "w")]
8240 "TARGET_SIMD && TARGET_SHA2"
8241 "sha256su1\\t%0.4s, %2.4s, %3.4s"
8242 [(set_attr "type" "crypto_sha256_slow")]
8247 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
8248 [(set (match_operand:V2DI 0 "register_operand" "=w")
8249 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8250 (match_operand:V2DI 2 "register_operand" "w")
8251 (match_operand:V2DI 3 "register_operand" "w")]
8253 "TARGET_SIMD && TARGET_SHA3"
8254 "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
8255 [(set_attr "type" "crypto_sha512")]
8258 (define_insn "aarch64_crypto_sha512su0qv2di"
8259 [(set (match_operand:V2DI 0 "register_operand" "=w")
8260 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8261 (match_operand:V2DI 2 "register_operand" "w")]
8263 "TARGET_SIMD && TARGET_SHA3"
8264 "sha512su0\\t%0.2d, %2.2d"
8265 [(set_attr "type" "crypto_sha512")]
8268 (define_insn "aarch64_crypto_sha512su1qv2di"
8269 [(set (match_operand:V2DI 0 "register_operand" "=w")
8270 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8271 (match_operand:V2DI 2 "register_operand" "w")
8272 (match_operand:V2DI 3 "register_operand" "w")]
8274 "TARGET_SIMD && TARGET_SHA3"
8275 "sha512su1\\t%0.2d, %2.2d, %3.2d"
8276 [(set_attr "type" "crypto_sha512")]
8281 (define_insn "eor3q<mode>4"
8282 [(set (match_operand:VQ_I 0 "register_operand" "=w")
8285 (match_operand:VQ_I 2 "register_operand" "w")
8286 (match_operand:VQ_I 3 "register_operand" "w"))
8287 (match_operand:VQ_I 1 "register_operand" "w")))]
8288 "TARGET_SIMD && TARGET_SHA3"
8289 "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
8290 [(set_attr "type" "crypto_sha3")]
8293 (define_insn "aarch64_rax1qv2di"
8294 [(set (match_operand:V2DI 0 "register_operand" "=w")
8297 (match_operand:V2DI 2 "register_operand" "w")
8299 (match_operand:V2DI 1 "register_operand" "w")))]
8300 "TARGET_SIMD && TARGET_SHA3"
8301 "rax1\\t%0.2d, %1.2d, %2.2d"
8302 [(set_attr "type" "crypto_sha3")]
8305 (define_insn "aarch64_xarqv2di"
8306 [(set (match_operand:V2DI 0 "register_operand" "=w")
8309 (match_operand:V2DI 1 "register_operand" "%w")
8310 (match_operand:V2DI 2 "register_operand" "w"))
8311 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
8312 "TARGET_SIMD && TARGET_SHA3"
8313 "xar\\t%0.2d, %1.2d, %2.2d, %3"
8314 [(set_attr "type" "crypto_sha3")]
8317 (define_insn "bcaxq<mode>4"
8318 [(set (match_operand:VQ_I 0 "register_operand" "=w")
8321 (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
8322 (match_operand:VQ_I 2 "register_operand" "w"))
8323 (match_operand:VQ_I 1 "register_operand" "w")))]
8324 "TARGET_SIMD && TARGET_SHA3"
8325 "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
8326 [(set_attr "type" "crypto_sha3")]
8331 (define_insn "aarch64_sm3ss1qv4si"
8332 [(set (match_operand:V4SI 0 "register_operand" "=w")
8333 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
8334 (match_operand:V4SI 2 "register_operand" "w")
8335 (match_operand:V4SI 3 "register_operand" "w")]
8337 "TARGET_SIMD && TARGET_SM4"
8338 "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
8339 [(set_attr "type" "crypto_sm3")]
8343 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
8344 [(set (match_operand:V4SI 0 "register_operand" "=w")
8345 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8346 (match_operand:V4SI 2 "register_operand" "w")
8347 (match_operand:V4SI 3 "register_operand" "w")
8348 (match_operand:SI 4 "aarch64_imm2" "Ui2")]
8350 "TARGET_SIMD && TARGET_SM4"
8351 "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
8352 [(set_attr "type" "crypto_sm3")]
8355 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
8356 [(set (match_operand:V4SI 0 "register_operand" "=w")
8357 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8358 (match_operand:V4SI 2 "register_operand" "w")
8359 (match_operand:V4SI 3 "register_operand" "w")]
8361 "TARGET_SIMD && TARGET_SM4"
8362 "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
8363 [(set_attr "type" "crypto_sm3")]
8368 (define_insn "aarch64_sm4eqv4si"
8369 [(set (match_operand:V4SI 0 "register_operand" "=w")
8370 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
8371 (match_operand:V4SI 2 "register_operand" "w")]
8373 "TARGET_SIMD && TARGET_SM4"
8374 "sm4e\\t%0.4s, %2.4s"
8375 [(set_attr "type" "crypto_sm4")]
8378 (define_insn "aarch64_sm4ekeyqv4si"
8379 [(set (match_operand:V4SI 0 "register_operand" "=w")
8380 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
8381 (match_operand:V4SI 2 "register_operand" "w")]
8383 "TARGET_SIMD && TARGET_SM4"
8384 "sm4ekey\\t%0.4s, %1.4s, %2.4s"
8385 [(set_attr "type" "crypto_sm4")]
8390 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
8391 [(set (match_operand:VDQSF 0 "register_operand")
8393 [(match_operand:VDQSF 1 "register_operand")
8394 (match_operand:<VFMLA_W> 2 "register_operand")
8395 (match_operand:<VFMLA_W> 3 "register_operand")]
8399 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
8400 <nunits> * 2, false);
8401 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
8402 <nunits> * 2, false);
8404 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
8413 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
8414 [(set (match_operand:VDQSF 0 "register_operand")
8416 [(match_operand:VDQSF 1 "register_operand")
8417 (match_operand:<VFMLA_W> 2 "register_operand")
8418 (match_operand:<VFMLA_W> 3 "register_operand")]
8422 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
8423 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
8425 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
8433 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
8434 [(set (match_operand:VDQSF 0 "register_operand" "=w")
8437 (vec_select:<VFMLA_SEL_W>
8438 (match_operand:<VFMLA_W> 2 "register_operand" "w")
8439 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
8441 (vec_select:<VFMLA_SEL_W>
8442 (match_operand:<VFMLA_W> 3 "register_operand" "w")
8443 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
8444 (match_operand:VDQSF 1 "register_operand" "0")))]
8446 "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
8447 [(set_attr "type" "neon_fp_mul_s")]
8450 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
8451 [(set (match_operand:VDQSF 0 "register_operand" "=w")
8455 (vec_select:<VFMLA_SEL_W>
8456 (match_operand:<VFMLA_W> 2 "register_operand" "w")
8457 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
8459 (vec_select:<VFMLA_SEL_W>
8460 (match_operand:<VFMLA_W> 3 "register_operand" "w")
8461 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
8462 (match_operand:VDQSF 1 "register_operand" "0")))]
8464 "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
8465 [(set_attr "type" "neon_fp_mul_s")]
8468 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
8469 [(set (match_operand:VDQSF 0 "register_operand" "=w")
8472 (vec_select:<VFMLA_SEL_W>
8473 (match_operand:<VFMLA_W> 2 "register_operand" "w")
8474 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
8476 (vec_select:<VFMLA_SEL_W>
8477 (match_operand:<VFMLA_W> 3 "register_operand" "w")
8478 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
8479 (match_operand:VDQSF 1 "register_operand" "0")))]
8481 "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
8482 [(set_attr "type" "neon_fp_mul_s")]
8485 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
8486 [(set (match_operand:VDQSF 0 "register_operand" "=w")
8490 (vec_select:<VFMLA_SEL_W>
8491 (match_operand:<VFMLA_W> 2 "register_operand" "w")
8492 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
8494 (vec_select:<VFMLA_SEL_W>
8495 (match_operand:<VFMLA_W> 3 "register_operand" "w")
8496 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
8497 (match_operand:VDQSF 1 "register_operand" "0")))]
8499 "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
8500 [(set_attr "type" "neon_fp_mul_s")]
8503 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
8504 [(set (match_operand:V2SF 0 "register_operand")
8505 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
8506 (match_operand:V4HF 2 "register_operand")
8507 (match_operand:V4HF 3 "register_operand")
8508 (match_operand:SI 4 "aarch64_imm2")]
8512 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
8513 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
8515 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
8524 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
8525 [(set (match_operand:V2SF 0 "register_operand")
8526 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
8527 (match_operand:V4HF 2 "register_operand")
8528 (match_operand:V4HF 3 "register_operand")
8529 (match_operand:SI 4 "aarch64_imm2")]
8533 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
8534 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
8536 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
8544 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
8545 [(set (match_operand:V2SF 0 "register_operand" "=w")
8549 (match_operand:V4HF 2 "register_operand" "w")
8550 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
8554 (match_operand:V4HF 3 "register_operand" "x")
8555 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8556 (match_operand:V2SF 1 "register_operand" "0")))]
8558 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
8559 [(set_attr "type" "neon_fp_mul_s")]
8562 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
8563 [(set (match_operand:V2SF 0 "register_operand" "=w")
8568 (match_operand:V4HF 2 "register_operand" "w")
8569 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
8573 (match_operand:V4HF 3 "register_operand" "x")
8574 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8575 (match_operand:V2SF 1 "register_operand" "0")))]
8577 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
8578 [(set_attr "type" "neon_fp_mul_s")]
8581 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
8582 [(set (match_operand:V2SF 0 "register_operand" "=w")
8586 (match_operand:V4HF 2 "register_operand" "w")
8587 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
8591 (match_operand:V4HF 3 "register_operand" "x")
8592 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8593 (match_operand:V2SF 1 "register_operand" "0")))]
8595 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
8596 [(set_attr "type" "neon_fp_mul_s")]
8599 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
8600 [(set (match_operand:V2SF 0 "register_operand" "=w")
8605 (match_operand:V4HF 2 "register_operand" "w")
8606 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
8610 (match_operand:V4HF 3 "register_operand" "x")
8611 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8612 (match_operand:V2SF 1 "register_operand" "0")))]
8614 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
8615 [(set_attr "type" "neon_fp_mul_s")]
8618 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
8619 [(set (match_operand:V4SF 0 "register_operand")
8620 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
8621 (match_operand:V8HF 2 "register_operand")
8622 (match_operand:V8HF 3 "register_operand")
8623 (match_operand:SI 4 "aarch64_lane_imm3")]
8627 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
8628 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
8630 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
8638 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
8639 [(set (match_operand:V4SF 0 "register_operand")
8640 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
8641 (match_operand:V8HF 2 "register_operand")
8642 (match_operand:V8HF 3 "register_operand")
8643 (match_operand:SI 4 "aarch64_lane_imm3")]
8647 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
8648 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
8650 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
8658 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
8659 [(set (match_operand:V4SF 0 "register_operand" "=w")
8663 (match_operand:V8HF 2 "register_operand" "w")
8664 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
8668 (match_operand:V8HF 3 "register_operand" "x")
8669 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
8670 (match_operand:V4SF 1 "register_operand" "0")))]
8672 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
8673 [(set_attr "type" "neon_fp_mul_s")]
8676 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
8677 [(set (match_operand:V4SF 0 "register_operand" "=w")
8682 (match_operand:V8HF 2 "register_operand" "w")
8683 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
8687 (match_operand:V8HF 3 "register_operand" "x")
8688 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
8689 (match_operand:V4SF 1 "register_operand" "0")))]
8691 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
8692 [(set_attr "type" "neon_fp_mul_s")]
8695 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
8696 [(set (match_operand:V4SF 0 "register_operand" "=w")
8700 (match_operand:V8HF 2 "register_operand" "w")
8701 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
8705 (match_operand:V8HF 3 "register_operand" "x")
8706 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
8707 (match_operand:V4SF 1 "register_operand" "0")))]
8709 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
8710 [(set_attr "type" "neon_fp_mul_s")]
8713 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
8714 [(set (match_operand:V4SF 0 "register_operand" "=w")
8719 (match_operand:V8HF 2 "register_operand" "w")
8720 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
8724 (match_operand:V8HF 3 "register_operand" "x")
8725 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
8726 (match_operand:V4SF 1 "register_operand" "0")))]
8728 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
8729 [(set_attr "type" "neon_fp_mul_s")]
8732 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
8733 [(set (match_operand:V2SF 0 "register_operand")
8734 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
8735 (match_operand:V4HF 2 "register_operand")
8736 (match_operand:V8HF 3 "register_operand")
8737 (match_operand:SI 4 "aarch64_lane_imm3")]
8741 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
8742 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
8744 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
8753 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
8754 [(set (match_operand:V2SF 0 "register_operand")
8755 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
8756 (match_operand:V4HF 2 "register_operand")
8757 (match_operand:V8HF 3 "register_operand")
8758 (match_operand:SI 4 "aarch64_lane_imm3")]
8762 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
8763 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
8765 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
8774 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
8775 [(set (match_operand:V2SF 0 "register_operand" "=w")
8779 (match_operand:V4HF 2 "register_operand" "w")
8780 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
8784 (match_operand:V8HF 3 "register_operand" "x")
8785 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
8786 (match_operand:V2SF 1 "register_operand" "0")))]
8788 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
8789 [(set_attr "type" "neon_fp_mul_s")]
8792 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
8793 [(set (match_operand:V2SF 0 "register_operand" "=w")
8798 (match_operand:V4HF 2 "register_operand" "w")
8799 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
8803 (match_operand:V8HF 3 "register_operand" "x")
8804 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
8805 (match_operand:V2SF 1 "register_operand" "0")))]
8807 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
8808 [(set_attr "type" "neon_fp_mul_s")]
8811 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
8812 [(set (match_operand:V2SF 0 "register_operand" "=w")
8816 (match_operand:V4HF 2 "register_operand" "w")
8817 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
8821 (match_operand:V8HF 3 "register_operand" "x")
8822 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
8823 (match_operand:V2SF 1 "register_operand" "0")))]
8825 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
8826 [(set_attr "type" "neon_fp_mul_s")]
8829 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
8830 [(set (match_operand:V2SF 0 "register_operand" "=w")
8835 (match_operand:V4HF 2 "register_operand" "w")
8836 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
8840 (match_operand:V8HF 3 "register_operand" "x")
8841 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
8842 (match_operand:V2SF 1 "register_operand" "0")))]
8844 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
8845 [(set_attr "type" "neon_fp_mul_s")]
8848 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
8849 [(set (match_operand:V4SF 0 "register_operand")
8850 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
8851 (match_operand:V8HF 2 "register_operand")
8852 (match_operand:V4HF 3 "register_operand")
8853 (match_operand:SI 4 "aarch64_imm2")]
8857 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
8858 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
8860 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
8868 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
8869 [(set (match_operand:V4SF 0 "register_operand")
8870 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
8871 (match_operand:V8HF 2 "register_operand")
8872 (match_operand:V4HF 3 "register_operand")
8873 (match_operand:SI 4 "aarch64_imm2")]
8877 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
8878 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
8880 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
8888 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
8889 [(set (match_operand:V4SF 0 "register_operand" "=w")
8893 (match_operand:V8HF 2 "register_operand" "w")
8894 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
8898 (match_operand:V4HF 3 "register_operand" "x")
8899 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8900 (match_operand:V4SF 1 "register_operand" "0")))]
8902 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
8903 [(set_attr "type" "neon_fp_mul_s")]
8906 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
8907 [(set (match_operand:V4SF 0 "register_operand" "=w")
8912 (match_operand:V8HF 2 "register_operand" "w")
8913 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
8917 (match_operand:V4HF 3 "register_operand" "x")
8918 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8919 (match_operand:V4SF 1 "register_operand" "0")))]
8921 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
8922 [(set_attr "type" "neon_fp_mul_s")]
8925 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
8926 [(set (match_operand:V4SF 0 "register_operand" "=w")
8930 (match_operand:V8HF 2 "register_operand" "w")
8931 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
8935 (match_operand:V4HF 3 "register_operand" "x")
8936 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8937 (match_operand:V4SF 1 "register_operand" "0")))]
8939 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
8940 [(set_attr "type" "neon_fp_mul_s")]
8943 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
8944 [(set (match_operand:V4SF 0 "register_operand" "=w")
8949 (match_operand:V8HF 2 "register_operand" "w")
8950 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
8954 (match_operand:V4HF 3 "register_operand" "x")
8955 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
8956 (match_operand:V4SF 1 "register_operand" "0")))]
8958 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
8959 [(set_attr "type" "neon_fp_mul_s")]
8964 (define_insn "aarch64_crypto_pmulldi"
8965 [(set (match_operand:TI 0 "register_operand" "=w")
8966 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
8967 (match_operand:DI 2 "register_operand" "w")]
8969 "TARGET_SIMD && TARGET_AES"
8970 "pmull\\t%0.1q, %1.1d, %2.1d"
8971 [(set_attr "type" "crypto_pmull")]
8974 (define_insn "aarch64_crypto_pmullv2di"
8975 [(set (match_operand:TI 0 "register_operand" "=w")
8976 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
8977 (match_operand:V2DI 2 "register_operand" "w")]
8979 "TARGET_SIMD && TARGET_AES"
8980 "pmull2\\t%0.1q, %1.2d, %2.2d"
8981 [(set_attr "type" "crypto_pmull")]
8984 ;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector.
8985 (define_insn "<optab><Vnarrowq><mode>2"
8986 [(set (match_operand:VQN 0 "register_operand" "=w")
8987 (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
8989 "<su>xtl\t%0.<Vtype>, %1.<Vntype>"
8990 [(set_attr "type" "neon_shift_imm_long")]
8993 (define_expand "aarch64_<su>xtl<mode>"
8994 [(set (match_operand:VQN 0 "register_operand" "=w")
8995 (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
9000 ;; Truncate a 128-bit integer vector to a 64-bit vector.
9001 (define_insn "trunc<mode><Vnarrowq>2"
9002 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
9003 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
9005 "xtn\t%0.<Vntype>, %1.<Vtype>"
9006 [(set_attr "type" "neon_move_narrow_q")]
9009 (define_insn "aarch64_bfdot<mode>"
9010 [(set (match_operand:VDQSF 0 "register_operand" "=w")
9013 [(match_operand:<VBFMLA_W> 2 "register_operand" "w")
9014 (match_operand:<VBFMLA_W> 3 "register_operand" "w")]
9016 (match_operand:VDQSF 1 "register_operand" "0")))]
9018 "bfdot\t%0.<Vtype>, %2.<Vbfdottype>, %3.<Vbfdottype>"
9019 [(set_attr "type" "neon_dot<q>")]
9022 (define_insn "aarch64_bfdot_lane<VBF:isquadop><VDQSF:mode>"
9023 [(set (match_operand:VDQSF 0 "register_operand" "=w")
9026 [(match_operand:<VDQSF:VBFMLA_W> 2 "register_operand" "w")
9027 (match_operand:VBF 3 "register_operand" "w")
9028 (match_operand:SI 4 "const_int_operand" "n")]
9030 (match_operand:VDQSF 1 "register_operand" "0")))]
9033 int nunits = GET_MODE_NUNITS (<VBF:MODE>mode).to_constant ();
9034 int lane = INTVAL (operands[4]);
9035 operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane), SImode);
9036 return "bfdot\t%0.<VDQSF:Vtype>, %2.<VDQSF:Vbfdottype>, %3.2h[%4]";
9038 [(set_attr "type" "neon_dot<VDQSF:q>")]
9041 ;; vget_low/high_bf16
9042 (define_expand "aarch64_vget_lo_halfv8bf"
9043 [(match_operand:V4BF 0 "register_operand")
9044 (match_operand:V8BF 1 "register_operand")]
9047 rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, false);
9048 emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
9052 (define_expand "aarch64_vget_hi_halfv8bf"
9053 [(match_operand:V4BF 0 "register_operand")
9054 (match_operand:V8BF 1 "register_operand")]
9057 rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, true);
9058 emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
9063 (define_insn "aarch64_bfmmlaqv4sf"
9064 [(set (match_operand:V4SF 0 "register_operand" "=w")
9065 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
9066 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9067 (match_operand:V8BF 3 "register_operand" "w")]
9070 "bfmmla\\t%0.4s, %2.8h, %3.8h"
9071 [(set_attr "type" "neon_fp_mla_s_q")]
9075 (define_insn "aarch64_bfmlal<bt>v4sf"
9076 [(set (match_operand:V4SF 0 "register_operand" "=w")
9077 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
9078 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9079 (match_operand:V8BF 3 "register_operand" "w")]
9082 "bfmlal<bt>\\t%0.4s, %2.8h, %3.8h"
9083 [(set_attr "type" "neon_fp_mla_s_q")]
9086 (define_insn "aarch64_bfmlal<bt>_lane<q>v4sf"
9087 [(set (match_operand:V4SF 0 "register_operand" "=w")
9088 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
9089 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9090 (match_operand:VBF 3 "register_operand" "w")
9091 (match_operand:SI 4 "const_int_operand" "n")]
9095 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
9096 return "bfmlal<bt>\\t%0.4s, %2.8h, %3.h[%4]";
9098 [(set_attr "type" "neon_fp_mla_s_scalar_q")]
9101 ;; 8-bit integer matrix multiply-accumulate
9102 (define_insn "aarch64_simd_<sur>mmlav16qi"
9103 [(set (match_operand:V4SI 0 "register_operand" "=w")
9105 (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w")
9106 (match_operand:V16QI 3 "register_operand" "w")] MATMUL)
9107 (match_operand:V4SI 1 "register_operand" "0")))]
9109 "<sur>mmla\\t%0.4s, %2.16b, %3.16b"
9110 [(set_attr "type" "neon_mla_s_q")]
9114 (define_insn "aarch64_bfcvtn<q><mode>"
9115 [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w")
9116 (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")]
9119 "bfcvtn\\t%0.4h, %1.4s"
9120 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
9123 (define_insn "aarch64_bfcvtn2v8bf"
9124 [(set (match_operand:V8BF 0 "register_operand" "=w")
9125 (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0")
9126 (match_operand:V4SF 2 "register_operand" "w")]
9129 "bfcvtn2\\t%0.8h, %2.4s"
9130 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
9133 (define_insn "aarch64_bfcvtbf"
9134 [(set (match_operand:BF 0 "register_operand" "=w")
9135 (unspec:BF [(match_operand:SF 1 "register_operand" "w")]
9139 [(set_attr "type" "f_cvt")]
9142 ;; Use shl/shll/shll2 to convert BF scalar/vector modes to SF modes.
9143 (define_insn "aarch64_vbfcvt<mode>"
9144 [(set (match_operand:V4SF 0 "register_operand" "=w")
9145 (unspec:V4SF [(match_operand:VBF 1 "register_operand" "w")]
9148 "shll\\t%0.4s, %1.4h, #16"
9149 [(set_attr "type" "neon_shift_imm_long")]
9152 (define_insn "aarch64_vbfcvt_highv8bf"
9153 [(set (match_operand:V4SF 0 "register_operand" "=w")
9154 (unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
9157 "shll2\\t%0.4s, %1.8h, #16"
9158 [(set_attr "type" "neon_shift_imm_long")]
9161 (define_insn "aarch64_bfcvtsf"
9162 [(set (match_operand:SF 0 "register_operand" "=w")
9163 (unspec:SF [(match_operand:BF 1 "register_operand" "w")]
9166 "shl\\t%d0, %d1, #16"
9167 [(set_attr "type" "neon_shift_imm")]