1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2019 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16 1 "general_operand" ""))]
26 /* Force the operand into a register if it is not an
27 immediate whose use can be replaced with xzr.
28 If the mode is 16 bytes wide, then we will be doing
29 a stp in DI mode, so we check the validity of that.
30 If the mode is 8 bytes wide, then we will do doing a
31 normal str, so the check need not apply. */
32 if (GET_CODE (operands[0]) == MEM
33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34 && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
35 && aarch64_mem_pair_operand (operands[0], DImode))
36 || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
37 operands[1] = force_reg (<MODE>mode, operands[1]);
41 (define_expand "movmisalign<mode>"
42 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
43 (match_operand:VALL 1 "general_operand" ""))]
46 /* This pattern is not permitted to fail during expansion: if both arguments
47 are non-registers (e.g. memory := constant, which can be created by the
48 auto-vectorizer), force operand 1 into a register. */
49 if (!register_operand (operands[0], <MODE>mode)
50 && !register_operand (operands[1], <MODE>mode))
51 operands[1] = force_reg (<MODE>mode, operands[1]);
54 (define_insn "aarch64_simd_dup<mode>"
55 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
57 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
60 dup\\t%0.<Vtype>, %1.<Vetype>[0]
61 dup\\t%0.<Vtype>, %<vw>1"
62 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
65 (define_insn "aarch64_simd_dup<mode>"
66 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
67 (vec_duplicate:VDQF_F16
68 (match_operand:<VEL> 1 "register_operand" "w")))]
70 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
71 [(set_attr "type" "neon_dup<q>")]
74 (define_insn "aarch64_dup_lane<mode>"
75 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
76 (vec_duplicate:VALL_F16
78 (match_operand:VALL_F16 1 "register_operand" "w")
79 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
83 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
84 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
86 [(set_attr "type" "neon_dup<q>")]
89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
90 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
91 (vec_duplicate:VALL_F16_NO_V2Q
93 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
98 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
99 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
101 [(set_attr "type" "neon_dup<q>")]
104 (define_insn "*aarch64_simd_mov<VD:mode>"
105 [(set (match_operand:VD 0 "nonimmediate_operand"
106 "=w, m, m, w, ?r, ?w, ?r, w")
107 (match_operand:VD 1 "general_operand"
108 "m, Dz, w, w, w, r, r, Dn"))]
110 && (register_operand (operands[0], <MODE>mode)
111 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
113 switch (which_alternative)
115 case 0: return "ldr\t%d0, %1";
116 case 1: return "str\txzr, %0";
117 case 2: return "str\t%d1, %0";
118 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
119 case 4: return "umov\t%0, %1.d[0]";
120 case 5: return "fmov\t%d0, %1";
121 case 6: return "mov\t%0, %1";
123 return aarch64_output_simd_mov_immediate (operands[1], 64);
124 default: gcc_unreachable ();
127 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
128 neon_logic<q>, neon_to_gp<q>, f_mcr,\
129 mov_reg, neon_move<q>")]
132 (define_insn "*aarch64_simd_mov<VQ:mode>"
133 [(set (match_operand:VQ 0 "nonimmediate_operand"
134 "=w, Umn, m, w, ?r, ?w, ?r, w")
135 (match_operand:VQ 1 "general_operand"
136 "m, Dz, w, w, w, r, r, Dn"))]
138 && (register_operand (operands[0], <MODE>mode)
139 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
141 switch (which_alternative)
144 return "ldr\t%q0, %1";
146 return "stp\txzr, xzr, %0";
148 return "str\t%q1, %0";
150 return "mov\t%0.<Vbtype>, %1.<Vbtype>";
156 return aarch64_output_simd_mov_immediate (operands[1], 128);
161 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
162 neon_logic<q>, multiple, multiple,\
163 multiple, neon_move<q>")
164 (set_attr "length" "4,4,4,4,8,8,8,4")]
167 ;; When storing lane zero we can use the normal STR and its more permissive
170 (define_insn "aarch64_store_lane0<mode>"
171 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
172 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
173 (parallel [(match_operand 2 "const_int_operand" "n")])))]
175 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
176 "str\\t%<Vetype>1, %0"
177 [(set_attr "type" "neon_store1_1reg<q>")]
180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
181 [(set (match_operand:DREG 0 "register_operand" "=w")
182 (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
183 (set (match_operand:DREG2 2 "register_operand" "=w")
184 (match_operand:DREG2 3 "memory_operand" "m"))]
186 && rtx_equal_p (XEXP (operands[3], 0),
187 plus_constant (Pmode,
188 XEXP (operands[1], 0),
189 GET_MODE_SIZE (<DREG:MODE>mode)))"
191 [(set_attr "type" "neon_ldp")]
194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
195 [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
196 (match_operand:DREG 1 "register_operand" "w"))
197 (set (match_operand:DREG2 2 "memory_operand" "=m")
198 (match_operand:DREG2 3 "register_operand" "w"))]
200 && rtx_equal_p (XEXP (operands[2], 0),
201 plus_constant (Pmode,
202 XEXP (operands[0], 0),
203 GET_MODE_SIZE (<DREG:MODE>mode)))"
205 [(set_attr "type" "neon_stp")]
208 (define_insn "load_pair<VQ:mode><VQ2:mode>"
209 [(set (match_operand:VQ 0 "register_operand" "=w")
210 (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
211 (set (match_operand:VQ2 2 "register_operand" "=w")
212 (match_operand:VQ2 3 "memory_operand" "m"))]
214 && rtx_equal_p (XEXP (operands[3], 0),
215 plus_constant (Pmode,
216 XEXP (operands[1], 0),
217 GET_MODE_SIZE (<VQ:MODE>mode)))"
219 [(set_attr "type" "neon_ldp_q")]
222 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
223 [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
224 (match_operand:VQ 1 "register_operand" "w"))
225 (set (match_operand:VQ2 2 "memory_operand" "=m")
226 (match_operand:VQ2 3 "register_operand" "w"))]
227 "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
228 plus_constant (Pmode,
229 XEXP (operands[0], 0),
230 GET_MODE_SIZE (<VQ:MODE>mode)))"
232 [(set_attr "type" "neon_stp_q")]
237 [(set (match_operand:VQ 0 "register_operand" "")
238 (match_operand:VQ 1 "register_operand" ""))]
239 "TARGET_SIMD && reload_completed
240 && GP_REGNUM_P (REGNO (operands[0]))
241 && GP_REGNUM_P (REGNO (operands[1]))"
244 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
249 [(set (match_operand:VQ 0 "register_operand" "")
250 (match_operand:VQ 1 "register_operand" ""))]
251 "TARGET_SIMD && reload_completed
252 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
253 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
256 aarch64_split_simd_move (operands[0], operands[1]);
260 (define_expand "@aarch64_split_simd_mov<mode>"
261 [(set (match_operand:VQ 0)
262 (match_operand:VQ 1))]
265 rtx dst = operands[0];
266 rtx src = operands[1];
268 if (GP_REGNUM_P (REGNO (src)))
270 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
271 rtx src_high_part = gen_highpart (<VHALF>mode, src);
274 (gen_move_lo_quad_<mode> (dst, src_low_part));
276 (gen_move_hi_quad_<mode> (dst, src_high_part));
281 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
282 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
283 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
284 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
287 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
289 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
295 (define_insn "aarch64_simd_mov_from_<mode>low"
296 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
298 (match_operand:VQ 1 "register_operand" "w")
299 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
300 "TARGET_SIMD && reload_completed"
302 [(set_attr "type" "neon_to_gp<q>")
303 (set_attr "length" "4")
306 (define_insn "aarch64_simd_mov_from_<mode>high"
307 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
309 (match_operand:VQ 1 "register_operand" "w")
310 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
311 "TARGET_SIMD && reload_completed"
313 [(set_attr "type" "neon_to_gp<q>")
314 (set_attr "length" "4")
317 (define_insn "orn<mode>3"
318 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
319 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
320 (match_operand:VDQ_I 2 "register_operand" "w")))]
322 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
323 [(set_attr "type" "neon_logic<q>")]
326 (define_insn "bic<mode>3"
327 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
328 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
329 (match_operand:VDQ_I 2 "register_operand" "w")))]
331 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
332 [(set_attr "type" "neon_logic<q>")]
335 (define_insn "add<mode>3"
336 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
337 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
338 (match_operand:VDQ_I 2 "register_operand" "w")))]
340 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
341 [(set_attr "type" "neon_add<q>")]
344 (define_insn "sub<mode>3"
345 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
346 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
347 (match_operand:VDQ_I 2 "register_operand" "w")))]
349 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
350 [(set_attr "type" "neon_sub<q>")]
353 (define_insn "mul<mode>3"
354 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
355 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
356 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
358 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
359 [(set_attr "type" "neon_mul_<Vetype><q>")]
362 (define_insn "bswap<mode>2"
363 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
364 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
366 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
367 [(set_attr "type" "neon_rev<q>")]
370 (define_insn "aarch64_rbit<mode>"
371 [(set (match_operand:VB 0 "register_operand" "=w")
372 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
375 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
376 [(set_attr "type" "neon_rbit")]
379 (define_expand "ctz<mode>2"
380 [(set (match_operand:VS 0 "register_operand")
381 (ctz:VS (match_operand:VS 1 "register_operand")))]
384 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
385 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
387 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
388 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
393 (define_expand "xorsign<mode>3"
394 [(match_operand:VHSDF 0 "register_operand")
395 (match_operand:VHSDF 1 "register_operand")
396 (match_operand:VHSDF 2 "register_operand")]
400 machine_mode imode = <V_INT_EQUIV>mode;
401 rtx v_bitmask = gen_reg_rtx (imode);
402 rtx op1x = gen_reg_rtx (imode);
403 rtx op2x = gen_reg_rtx (imode);
405 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
406 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
408 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
410 emit_move_insn (v_bitmask,
411 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
412 HOST_WIDE_INT_M1U << bits));
414 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
415 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
416 emit_move_insn (operands[0],
417 lowpart_subreg (<MODE>mode, op1x, imode));
422 ;; These instructions map to the __builtins for the Dot Product operations.
423 (define_insn "aarch64_<sur>dot<vsi2qi>"
424 [(set (match_operand:VS 0 "register_operand" "=w")
425 (plus:VS (match_operand:VS 1 "register_operand" "0")
426 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
427 (match_operand:<VSI2QI> 3 "register_operand" "w")]
430 "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
431 [(set_attr "type" "neon_dot")]
434 ;; These expands map to the Dot Product optab the vectorizer checks for.
435 ;; The auto-vectorizer expects a dot product builtin that also does an
436 ;; accumulation into the provided register.
437 ;; Given the following pattern
439 ;; for (i=0; i<len; i++) {
445 ;; This can be auto-vectorized to
446 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
448 ;; given enough iterations. However the vectorizer can keep unrolling the loop
449 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
450 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
453 ;; and so the vectorizer provides r, in which the result has to be accumulated.
454 (define_expand "<sur>dot_prod<vsi2qi>"
455 [(set (match_operand:VS 0 "register_operand")
456 (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
457 (match_operand:<VSI2QI> 2 "register_operand")]
459 (match_operand:VS 3 "register_operand")))]
463 gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
465 emit_insn (gen_rtx_SET (operands[0], operands[3]));
469 ;; These instructions map to the __builtins for the Dot Product
470 ;; indexed operations.
471 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
472 [(set (match_operand:VS 0 "register_operand" "=w")
473 (plus:VS (match_operand:VS 1 "register_operand" "0")
474 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
475 (match_operand:V8QI 3 "register_operand" "<h_con>")
476 (match_operand:SI 4 "immediate_operand" "i")]
480 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
481 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
483 [(set_attr "type" "neon_dot")]
486 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
487 [(set (match_operand:VS 0 "register_operand" "=w")
488 (plus:VS (match_operand:VS 1 "register_operand" "0")
489 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
490 (match_operand:V16QI 3 "register_operand" "<h_con>")
491 (match_operand:SI 4 "immediate_operand" "i")]
495 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
496 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
498 [(set_attr "type" "neon_dot")]
501 (define_expand "copysign<mode>3"
502 [(match_operand:VHSDF 0 "register_operand")
503 (match_operand:VHSDF 1 "register_operand")
504 (match_operand:VHSDF 2 "register_operand")]
505 "TARGET_FLOAT && TARGET_SIMD"
507 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
508 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
510 emit_move_insn (v_bitmask,
511 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
512 HOST_WIDE_INT_M1U << bits));
513 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
514 operands[2], operands[1]));
519 (define_insn "*aarch64_mul3_elt<mode>"
520 [(set (match_operand:VMUL 0 "register_operand" "=w")
524 (match_operand:VMUL 1 "register_operand" "<h_con>")
525 (parallel [(match_operand:SI 2 "immediate_operand")])))
526 (match_operand:VMUL 3 "register_operand" "w")))]
529 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
530 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
532 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
535 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
536 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
537 (mult:VMUL_CHANGE_NLANES
538 (vec_duplicate:VMUL_CHANGE_NLANES
540 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
541 (parallel [(match_operand:SI 2 "immediate_operand")])))
542 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
545 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
546 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
548 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
551 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
552 [(set (match_operand:VMUL 0 "register_operand" "=w")
555 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
556 (match_operand:VMUL 2 "register_operand" "w")))]
558 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
559 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
562 (define_insn "@aarch64_rsqrte<mode>"
563 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
564 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
567 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
568 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
570 (define_insn "@aarch64_rsqrts<mode>"
571 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
572 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
573 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
576 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
577 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
579 (define_expand "rsqrt<mode>2"
580 [(set (match_operand:VALLF 0 "register_operand" "=w")
581 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
585 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
589 (define_insn "*aarch64_mul3_elt_to_64v2df"
590 [(set (match_operand:DF 0 "register_operand" "=w")
593 (match_operand:V2DF 1 "register_operand" "w")
594 (parallel [(match_operand:SI 2 "immediate_operand")]))
595 (match_operand:DF 3 "register_operand" "w")))]
598 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
599 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
601 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
604 (define_insn "neg<mode>2"
605 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
606 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
608 "neg\t%0.<Vtype>, %1.<Vtype>"
609 [(set_attr "type" "neon_neg<q>")]
612 (define_insn "abs<mode>2"
613 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
614 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
616 "abs\t%0.<Vtype>, %1.<Vtype>"
617 [(set_attr "type" "neon_abs<q>")]
620 ;; The intrinsic version of integer ABS must not be allowed to
621 ;; combine with any operation with an integerated ABS step, such
623 (define_insn "aarch64_abs<mode>"
624 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
626 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
629 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
630 [(set_attr "type" "neon_abs<q>")]
633 (define_insn "abd<mode>_3"
634 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
635 (abs:VDQ_BHSI (minus:VDQ_BHSI
636 (match_operand:VDQ_BHSI 1 "register_operand" "w")
637 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
639 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
640 [(set_attr "type" "neon_abd<q>")]
643 (define_insn "aarch64_<sur>abdl2<mode>_3"
644 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
645 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
646 (match_operand:VDQV_S 2 "register_operand" "w")]
649 "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
650 [(set_attr "type" "neon_abd<q>")]
653 (define_insn "aarch64_<sur>abal<mode>_4"
654 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
655 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
656 (match_operand:VDQV_S 2 "register_operand" "w")
657 (match_operand:<VDBLW> 3 "register_operand" "0")]
660 "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
661 [(set_attr "type" "neon_arith_acc<q>")]
664 (define_insn "aarch64_<sur>adalp<mode>_3"
665 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
666 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
667 (match_operand:<VDBLW> 2 "register_operand" "0")]
670 "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
671 [(set_attr "type" "neon_reduc_add<q>")]
674 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
675 ;; inputs in operands 1 and 2. The sequence also has to perform a widening
676 ;; reduction of the difference into a V4SI vector and accumulate that into
677 ;; operand 3 before copying that into the result operand 0.
678 ;; Perform that with a sequence of:
679 ;; UABDL2 tmp.8h, op1.16b, op2.16b
680 ;; UABAL tmp.8h, op1.16b, op2.16b
681 ;; UADALP op3.4s, tmp.8h
682 ;; MOV op0, op3 // should be eliminated in later passes.
683 ;; The signed version just uses the signed variants of the above instructions.
685 (define_expand "<sur>sadv16qi"
686 [(use (match_operand:V4SI 0 "register_operand"))
687 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
688 (use (match_operand:V16QI 2 "register_operand"))] ABAL)
689 (use (match_operand:V4SI 3 "register_operand"))]
692 rtx reduc = gen_reg_rtx (V8HImode);
693 emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
695 emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
696 operands[2], reduc));
697 emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
699 emit_move_insn (operands[0], operands[3]);
704 (define_insn "aba<mode>_3"
705 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
706 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
707 (match_operand:VDQ_BHSI 1 "register_operand" "w")
708 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
709 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
711 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
712 [(set_attr "type" "neon_arith_acc<q>")]
715 (define_insn "fabd<mode>3"
716 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
719 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
720 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
722 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
723 [(set_attr "type" "neon_fp_abd_<stype><q>")]
726 ;; For AND (vector, register) and BIC (vector, immediate)
727 (define_insn "and<mode>3"
728 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
729 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
730 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
733 switch (which_alternative)
736 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
738 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
744 [(set_attr "type" "neon_logic<q>")]
747 ;; For ORR (vector, register) and ORR (vector, immediate)
748 (define_insn "ior<mode>3"
749 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
750 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
751 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
754 switch (which_alternative)
757 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
759 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
765 [(set_attr "type" "neon_logic<q>")]
768 (define_insn "xor<mode>3"
769 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
770 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
771 (match_operand:VDQ_I 2 "register_operand" "w")))]
773 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
774 [(set_attr "type" "neon_logic<q>")]
777 (define_insn "one_cmpl<mode>2"
778 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
779 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
781 "not\t%0.<Vbtype>, %1.<Vbtype>"
782 [(set_attr "type" "neon_logic<q>")]
785 (define_insn "aarch64_simd_vec_set<mode>"
786 [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
788 (vec_duplicate:VALL_F16
789 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
790 (match_operand:VALL_F16 3 "register_operand" "0,0,0")
791 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
794 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
795 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
796 switch (which_alternative)
799 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
801 return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
803 return "ld1\\t{%0.<Vetype>}[%p2], %1";
808 [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
811 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
812 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
814 (vec_duplicate:VALL_F16
816 (match_operand:VALL_F16 3 "register_operand" "w")
818 [(match_operand:SI 4 "immediate_operand" "i")])))
819 (match_operand:VALL_F16 1 "register_operand" "0")
820 (match_operand:SI 2 "immediate_operand" "i")))]
823 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
824 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
825 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
827 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
829 [(set_attr "type" "neon_ins<q>")]
832 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
833 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
834 (vec_merge:VALL_F16_NO_V2Q
835 (vec_duplicate:VALL_F16_NO_V2Q
837 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
839 [(match_operand:SI 4 "immediate_operand" "i")])))
840 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
841 (match_operand:SI 2 "immediate_operand" "i")))]
844 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
845 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
846 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
847 INTVAL (operands[4]));
849 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
851 [(set_attr "type" "neon_ins<q>")]
854 (define_insn "aarch64_simd_lshr<mode>"
855 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
856 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
857 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
859 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
860 [(set_attr "type" "neon_shift_imm<q>")]
863 (define_insn "aarch64_simd_ashr<mode>"
864 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
865 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
866 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
868 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
869 [(set_attr "type" "neon_shift_imm<q>")]
872 (define_insn "aarch64_simd_imm_shl<mode>"
873 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
874 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
875 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
877 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
878 [(set_attr "type" "neon_shift_imm<q>")]
881 (define_insn "aarch64_simd_reg_sshl<mode>"
882 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
883 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
884 (match_operand:VDQ_I 2 "register_operand" "w")))]
886 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
887 [(set_attr "type" "neon_shift_reg<q>")]
890 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
891 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
892 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
893 (match_operand:VDQ_I 2 "register_operand" "w")]
894 UNSPEC_ASHIFT_UNSIGNED))]
896 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
897 [(set_attr "type" "neon_shift_reg<q>")]
900 (define_insn "aarch64_simd_reg_shl<mode>_signed"
901 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
902 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
903 (match_operand:VDQ_I 2 "register_operand" "w")]
904 UNSPEC_ASHIFT_SIGNED))]
906 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
907 [(set_attr "type" "neon_shift_reg<q>")]
910 (define_expand "ashl<mode>3"
911 [(match_operand:VDQ_I 0 "register_operand" "")
912 (match_operand:VDQ_I 1 "register_operand" "")
913 (match_operand:SI 2 "general_operand" "")]
916 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
919 if (CONST_INT_P (operands[2]))
921 shift_amount = INTVAL (operands[2]);
922 if (shift_amount >= 0 && shift_amount < bit_width)
924 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
926 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
933 operands[2] = force_reg (SImode, operands[2]);
936 else if (MEM_P (operands[2]))
938 operands[2] = force_reg (SImode, operands[2]);
941 if (REG_P (operands[2]))
943 rtx tmp = gen_reg_rtx (<MODE>mode);
944 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
945 convert_to_mode (<VEL>mode,
948 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
957 (define_expand "lshr<mode>3"
958 [(match_operand:VDQ_I 0 "register_operand" "")
959 (match_operand:VDQ_I 1 "register_operand" "")
960 (match_operand:SI 2 "general_operand" "")]
963 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
966 if (CONST_INT_P (operands[2]))
968 shift_amount = INTVAL (operands[2]);
969 if (shift_amount > 0 && shift_amount <= bit_width)
971 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
973 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
979 operands[2] = force_reg (SImode, operands[2]);
981 else if (MEM_P (operands[2]))
983 operands[2] = force_reg (SImode, operands[2]);
986 if (REG_P (operands[2]))
988 rtx tmp = gen_reg_rtx (SImode);
989 rtx tmp1 = gen_reg_rtx (<MODE>mode);
990 emit_insn (gen_negsi2 (tmp, operands[2]));
991 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
992 convert_to_mode (<VEL>mode,
994 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
1004 (define_expand "ashr<mode>3"
1005 [(match_operand:VDQ_I 0 "register_operand" "")
1006 (match_operand:VDQ_I 1 "register_operand" "")
1007 (match_operand:SI 2 "general_operand" "")]
1010 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1013 if (CONST_INT_P (operands[2]))
1015 shift_amount = INTVAL (operands[2]);
1016 if (shift_amount > 0 && shift_amount <= bit_width)
1018 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1020 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1026 operands[2] = force_reg (SImode, operands[2]);
1028 else if (MEM_P (operands[2]))
1030 operands[2] = force_reg (SImode, operands[2]);
1033 if (REG_P (operands[2]))
1035 rtx tmp = gen_reg_rtx (SImode);
1036 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1037 emit_insn (gen_negsi2 (tmp, operands[2]));
1038 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1039 convert_to_mode (<VEL>mode,
1041 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
1051 (define_expand "vashl<mode>3"
1052 [(match_operand:VDQ_I 0 "register_operand" "")
1053 (match_operand:VDQ_I 1 "register_operand" "")
1054 (match_operand:VDQ_I 2 "register_operand" "")]
1057 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1062 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1063 ;; Negating individual lanes most certainly offsets the
1064 ;; gain from vectorization.
1065 (define_expand "vashr<mode>3"
1066 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1067 (match_operand:VDQ_BHSI 1 "register_operand" "")
1068 (match_operand:VDQ_BHSI 2 "register_operand" "")]
1071 rtx neg = gen_reg_rtx (<MODE>mode);
1072 emit (gen_neg<mode>2 (neg, operands[2]));
1073 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1079 (define_expand "aarch64_ashr_simddi"
1080 [(match_operand:DI 0 "register_operand" "=w")
1081 (match_operand:DI 1 "register_operand" "w")
1082 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1085 /* An arithmetic shift right by 64 fills the result with copies of the sign
1086 bit, just like asr by 63 - however the standard pattern does not handle
1088 if (INTVAL (operands[2]) == 64)
1089 operands[2] = GEN_INT (63);
1090 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1095 (define_expand "vlshr<mode>3"
1096 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1097 (match_operand:VDQ_BHSI 1 "register_operand" "")
1098 (match_operand:VDQ_BHSI 2 "register_operand" "")]
1101 rtx neg = gen_reg_rtx (<MODE>mode);
1102 emit (gen_neg<mode>2 (neg, operands[2]));
1103 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1108 (define_expand "aarch64_lshr_simddi"
1109 [(match_operand:DI 0 "register_operand" "=w")
1110 (match_operand:DI 1 "register_operand" "w")
1111 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1114 if (INTVAL (operands[2]) == 64)
1115 emit_move_insn (operands[0], const0_rtx);
1117 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1122 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1123 (define_insn "vec_shr_<mode>"
1124 [(set (match_operand:VD 0 "register_operand" "=w")
1125 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1126 (match_operand:SI 2 "immediate_operand" "i")]
1130 if (BYTES_BIG_ENDIAN)
1131 return "shl %d0, %d1, %2";
1133 return "ushr %d0, %d1, %2";
1135 [(set_attr "type" "neon_shift_imm")]
1138 (define_expand "vec_set<mode>"
1139 [(match_operand:VALL_F16 0 "register_operand" "+w")
1140 (match_operand:<VEL> 1 "register_operand" "w")
1141 (match_operand:SI 2 "immediate_operand" "")]
1144 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1145 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1146 GEN_INT (elem), operands[0]));
1152 (define_insn "aarch64_mla<mode>"
1153 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1154 (plus:VDQ_BHSI (mult:VDQ_BHSI
1155 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1156 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1157 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1159 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1160 [(set_attr "type" "neon_mla_<Vetype><q>")]
1163 (define_insn "*aarch64_mla_elt<mode>"
1164 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1167 (vec_duplicate:VDQHS
1169 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1170 (parallel [(match_operand:SI 2 "immediate_operand")])))
1171 (match_operand:VDQHS 3 "register_operand" "w"))
1172 (match_operand:VDQHS 4 "register_operand" "0")))]
1175 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1176 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1178 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1181 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1182 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1185 (vec_duplicate:VDQHS
1187 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1188 (parallel [(match_operand:SI 2 "immediate_operand")])))
1189 (match_operand:VDQHS 3 "register_operand" "w"))
1190 (match_operand:VDQHS 4 "register_operand" "0")))]
1193 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1194 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1196 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1199 (define_insn "*aarch64_mla_elt_merge<mode>"
1200 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1202 (mult:VDQHS (vec_duplicate:VDQHS
1203 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1204 (match_operand:VDQHS 2 "register_operand" "w"))
1205 (match_operand:VDQHS 3 "register_operand" "0")))]
1207 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1208 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1211 (define_insn "aarch64_mls<mode>"
1212 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1213 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1214 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1215 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1217 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1218 [(set_attr "type" "neon_mla_<Vetype><q>")]
1221 (define_insn "*aarch64_mls_elt<mode>"
1222 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1224 (match_operand:VDQHS 4 "register_operand" "0")
1226 (vec_duplicate:VDQHS
1228 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1229 (parallel [(match_operand:SI 2 "immediate_operand")])))
1230 (match_operand:VDQHS 3 "register_operand" "w"))))]
1233 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1234 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1236 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1239 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1240 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1242 (match_operand:VDQHS 4 "register_operand" "0")
1244 (vec_duplicate:VDQHS
1246 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1247 (parallel [(match_operand:SI 2 "immediate_operand")])))
1248 (match_operand:VDQHS 3 "register_operand" "w"))))]
1251 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1252 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1254 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1257 (define_insn "*aarch64_mls_elt_merge<mode>"
1258 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1260 (match_operand:VDQHS 1 "register_operand" "0")
1261 (mult:VDQHS (vec_duplicate:VDQHS
1262 (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1263 (match_operand:VDQHS 3 "register_operand" "w"))))]
1265 "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1266 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1269 ;; Max/Min operations.
1270 (define_insn "<su><maxmin><mode>3"
1271 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1272 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1273 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1275 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1276 [(set_attr "type" "neon_minmax<q>")]
1279 (define_expand "<su><maxmin>v2di3"
1280 [(set (match_operand:V2DI 0 "register_operand" "")
1281 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1282 (match_operand:V2DI 2 "register_operand" "")))]
1285 enum rtx_code cmp_operator;
1306 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1307 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1308 operands[2], cmp_fmt, operands[1], operands[2]));
1312 ;; Pairwise Integer Max/Min operations.
1313 (define_insn "aarch64_<maxmin_uns>p<mode>"
1314 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1315 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1316 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1319 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1320 [(set_attr "type" "neon_minmax<q>")]
1323 ;; Pairwise FP Max/Min operations.
1324 (define_insn "aarch64_<maxmin_uns>p<mode>"
1325 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1326 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1327 (match_operand:VHSDF 2 "register_operand" "w")]
1330 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1331 [(set_attr "type" "neon_minmax<q>")]
1334 ;; vec_concat gives a new vector with the low elements from operand 1, and
1335 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1336 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1337 ;; What that means, is that the RTL descriptions of the below patterns
1338 ;; need to change depending on endianness.
1340 ;; Move to the low architectural bits of the register.
1341 ;; On little-endian this is { operand, zeroes }
1342 ;; On big-endian this is { zeroes, operand }
1344 (define_insn "move_lo_quad_internal_<mode>"
1345 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1347 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1348 (vec_duplicate:<VHALF> (const_int 0))))]
1349 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1354 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1355 (set_attr "length" "4")
1356 (set_attr "arch" "simd,fp,simd")]
1359 (define_insn "move_lo_quad_internal_<mode>"
1360 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1362 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1364 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1369 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1370 (set_attr "length" "4")
1371 (set_attr "arch" "simd,fp,simd")]
1374 (define_insn "move_lo_quad_internal_be_<mode>"
1375 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1377 (vec_duplicate:<VHALF> (const_int 0))
1378 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1379 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1384 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1385 (set_attr "length" "4")
1386 (set_attr "arch" "simd,fp,simd")]
1389 (define_insn "move_lo_quad_internal_be_<mode>"
1390 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1393 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1394 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1399 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1400 (set_attr "length" "4")
1401 (set_attr "arch" "simd,fp,simd")]
1404 (define_expand "move_lo_quad_<mode>"
1405 [(match_operand:VQ 0 "register_operand")
1406 (match_operand:VQ 1 "register_operand")]
1409 if (BYTES_BIG_ENDIAN)
1410 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1412 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1417 ;; Move operand1 to the high architectural bits of the register, keeping
1418 ;; the low architectural bits of operand2.
1419 ;; For little-endian this is { operand2, operand1 }
1420 ;; For big-endian this is { operand1, operand2 }
1422 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1423 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1427 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1428 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1429 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1431 ins\\t%0.d[1], %1.d[0]
1433 [(set_attr "type" "neon_ins")]
1436 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1437 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1439 (match_operand:<VHALF> 1 "register_operand" "w,r")
1442 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1443 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1445 ins\\t%0.d[1], %1.d[0]
1447 [(set_attr "type" "neon_ins")]
1450 (define_expand "move_hi_quad_<mode>"
1451 [(match_operand:VQ 0 "register_operand" "")
1452 (match_operand:<VHALF> 1 "register_operand" "")]
1455 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1456 if (BYTES_BIG_ENDIAN)
1457 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1460 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1465 ;; Narrowing operations.
1468 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1469 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1470 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1472 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1473 [(set_attr "type" "neon_shift_imm_narrow_q")]
1476 (define_expand "vec_pack_trunc_<mode>"
1477 [(match_operand:<VNARROWD> 0 "register_operand" "")
1478 (match_operand:VDN 1 "register_operand" "")
1479 (match_operand:VDN 2 "register_operand" "")]
1482 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1483 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1484 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1486 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1487 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1488 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1494 (define_insn "vec_pack_trunc_<mode>"
1495 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1496 (vec_concat:<VNARROWQ2>
1497 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1498 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1501 if (BYTES_BIG_ENDIAN)
1502 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1504 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1506 [(set_attr "type" "multiple")
1507 (set_attr "length" "8")]
1510 ;; Widening operations.
1512 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1513 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1514 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1515 (match_operand:VQW 1 "register_operand" "w")
1516 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1519 "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1520 [(set_attr "type" "neon_shift_imm_long")]
1523 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1524 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1525 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1526 (match_operand:VQW 1 "register_operand" "w")
1527 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1530 "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1531 [(set_attr "type" "neon_shift_imm_long")]
1534 (define_expand "vec_unpack<su>_hi_<mode>"
1535 [(match_operand:<VWIDE> 0 "register_operand" "")
1536 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1539 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1540 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1546 (define_expand "vec_unpack<su>_lo_<mode>"
1547 [(match_operand:<VWIDE> 0 "register_operand" "")
1548 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1551 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1552 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1558 ;; Widening arithmetic.
1560 (define_insn "*aarch64_<su>mlal_lo<mode>"
1561 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1564 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1565 (match_operand:VQW 2 "register_operand" "w")
1566 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1567 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1568 (match_operand:VQW 4 "register_operand" "w")
1570 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1572 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1573 [(set_attr "type" "neon_mla_<Vetype>_long")]
1576 (define_insn "*aarch64_<su>mlal_hi<mode>"
1577 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1580 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1581 (match_operand:VQW 2 "register_operand" "w")
1582 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1583 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1584 (match_operand:VQW 4 "register_operand" "w")
1586 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1588 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1589 [(set_attr "type" "neon_mla_<Vetype>_long")]
1592 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1593 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1595 (match_operand:<VWIDE> 1 "register_operand" "0")
1597 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1598 (match_operand:VQW 2 "register_operand" "w")
1599 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1600 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1601 (match_operand:VQW 4 "register_operand" "w")
1604 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1605 [(set_attr "type" "neon_mla_<Vetype>_long")]
1608 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1609 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1611 (match_operand:<VWIDE> 1 "register_operand" "0")
1613 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1614 (match_operand:VQW 2 "register_operand" "w")
1615 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1616 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1617 (match_operand:VQW 4 "register_operand" "w")
1620 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1621 [(set_attr "type" "neon_mla_<Vetype>_long")]
1624 (define_insn "*aarch64_<su>mlal<mode>"
1625 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1629 (match_operand:VD_BHSI 1 "register_operand" "w"))
1631 (match_operand:VD_BHSI 2 "register_operand" "w")))
1632 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1634 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1635 [(set_attr "type" "neon_mla_<Vetype>_long")]
1638 (define_insn "*aarch64_<su>mlsl<mode>"
1639 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1641 (match_operand:<VWIDE> 1 "register_operand" "0")
1644 (match_operand:VD_BHSI 2 "register_operand" "w"))
1646 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1648 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1649 [(set_attr "type" "neon_mla_<Vetype>_long")]
1652 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1653 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1654 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1655 (match_operand:VQW 1 "register_operand" "w")
1656 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1657 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1658 (match_operand:VQW 2 "register_operand" "w")
1661 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1662 [(set_attr "type" "neon_mul_<Vetype>_long")]
1665 (define_expand "vec_widen_<su>mult_lo_<mode>"
1666 [(match_operand:<VWIDE> 0 "register_operand" "")
1667 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1668 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1671 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1672 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1679 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1680 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1681 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1682 (match_operand:VQW 1 "register_operand" "w")
1683 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1684 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1685 (match_operand:VQW 2 "register_operand" "w")
1688 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1689 [(set_attr "type" "neon_mul_<Vetype>_long")]
1692 (define_expand "vec_widen_<su>mult_hi_<mode>"
1693 [(match_operand:<VWIDE> 0 "register_operand" "")
1694 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1695 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1698 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1699 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1707 ;; FP vector operations.
1708 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1709 ;; double-precision (64-bit) floating-point data types and arithmetic as
1710 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1711 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1713 ;; Floating-point operations can raise an exception. Vectorizing such
1714 ;; operations are safe because of reasons explained below.
1716 ;; ARMv8 permits an extension to enable trapped floating-point
1717 ;; exception handling, however this is an optional feature. In the
1718 ;; event of a floating-point exception being raised by vectorised
1720 ;; 1. If trapped floating-point exceptions are available, then a trap
1721 ;; will be taken when any lane raises an enabled exception. A trap
1722 ;; handler may determine which lane raised the exception.
1723 ;; 2. Alternatively a sticky exception flag is set in the
1724 ;; floating-point status register (FPSR). Software may explicitly
1725 ;; test the exception flags, in which case the tests will either
1726 ;; prevent vectorisation, allowing precise identification of the
1727 ;; failing operation, or if tested outside of vectorisable regions
1728 ;; then the specific operation and lane are not of interest.
1730 ;; FP arithmetic operations.
1732 (define_insn "add<mode>3"
1733 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1734 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1735 (match_operand:VHSDF 2 "register_operand" "w")))]
1737 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1738 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1741 (define_insn "sub<mode>3"
1742 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1743 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1744 (match_operand:VHSDF 2 "register_operand" "w")))]
1746 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1747 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1750 (define_insn "mul<mode>3"
1751 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1752 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1753 (match_operand:VHSDF 2 "register_operand" "w")))]
1755 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1756 [(set_attr "type" "neon_fp_mul_<stype><q>")]
1759 (define_expand "div<mode>3"
1760 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1761 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1762 (match_operand:VHSDF 2 "register_operand" "w")))]
1765 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1768 operands[1] = force_reg (<MODE>mode, operands[1]);
1771 (define_insn "*div<mode>3"
1772 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1773 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1774 (match_operand:VHSDF 2 "register_operand" "w")))]
1776 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1777 [(set_attr "type" "neon_fp_div_<stype><q>")]
1780 (define_insn "neg<mode>2"
1781 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1782 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1784 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1785 [(set_attr "type" "neon_fp_neg_<stype><q>")]
1788 (define_insn "abs<mode>2"
1789 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1790 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1792 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1793 [(set_attr "type" "neon_fp_abs_<stype><q>")]
1796 (define_insn "fma<mode>4"
1797 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1798 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1799 (match_operand:VHSDF 2 "register_operand" "w")
1800 (match_operand:VHSDF 3 "register_operand" "0")))]
1802 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1803 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1806 (define_insn "*aarch64_fma4_elt<mode>"
1807 [(set (match_operand:VDQF 0 "register_operand" "=w")
1811 (match_operand:VDQF 1 "register_operand" "<h_con>")
1812 (parallel [(match_operand:SI 2 "immediate_operand")])))
1813 (match_operand:VDQF 3 "register_operand" "w")
1814 (match_operand:VDQF 4 "register_operand" "0")))]
1817 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1818 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1820 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1823 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1824 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1826 (vec_duplicate:VDQSF
1828 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1829 (parallel [(match_operand:SI 2 "immediate_operand")])))
1830 (match_operand:VDQSF 3 "register_operand" "w")
1831 (match_operand:VDQSF 4 "register_operand" "0")))]
1834 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1835 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1837 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1840 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1841 [(set (match_operand:VMUL 0 "register_operand" "=w")
1844 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1845 (match_operand:VMUL 2 "register_operand" "w")
1846 (match_operand:VMUL 3 "register_operand" "0")))]
1848 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1849 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1852 (define_insn "*aarch64_fma4_elt_to_64v2df"
1853 [(set (match_operand:DF 0 "register_operand" "=w")
1856 (match_operand:V2DF 1 "register_operand" "w")
1857 (parallel [(match_operand:SI 2 "immediate_operand")]))
1858 (match_operand:DF 3 "register_operand" "w")
1859 (match_operand:DF 4 "register_operand" "0")))]
1862 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1863 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1865 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1868 (define_insn "fnma<mode>4"
1869 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1871 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1872 (match_operand:VHSDF 2 "register_operand" "w")
1873 (match_operand:VHSDF 3 "register_operand" "0")))]
1875 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1876 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1879 (define_insn "*aarch64_fnma4_elt<mode>"
1880 [(set (match_operand:VDQF 0 "register_operand" "=w")
1883 (match_operand:VDQF 3 "register_operand" "w"))
1886 (match_operand:VDQF 1 "register_operand" "<h_con>")
1887 (parallel [(match_operand:SI 2 "immediate_operand")])))
1888 (match_operand:VDQF 4 "register_operand" "0")))]
1891 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1892 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1894 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1897 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1898 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1901 (match_operand:VDQSF 3 "register_operand" "w"))
1902 (vec_duplicate:VDQSF
1904 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1905 (parallel [(match_operand:SI 2 "immediate_operand")])))
1906 (match_operand:VDQSF 4 "register_operand" "0")))]
1909 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1910 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1912 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1915 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1916 [(set (match_operand:VMUL 0 "register_operand" "=w")
1919 (match_operand:VMUL 2 "register_operand" "w"))
1921 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1922 (match_operand:VMUL 3 "register_operand" "0")))]
1924 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1925 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1928 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1929 [(set (match_operand:DF 0 "register_operand" "=w")
1932 (match_operand:V2DF 1 "register_operand" "w")
1933 (parallel [(match_operand:SI 2 "immediate_operand")]))
1935 (match_operand:DF 3 "register_operand" "w"))
1936 (match_operand:DF 4 "register_operand" "0")))]
1939 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1940 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1942 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1945 ;; Vector versions of the floating-point frint patterns.
1946 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1947 (define_insn "<frint_pattern><mode>2"
1948 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1949 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1952 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1953 [(set_attr "type" "neon_fp_round_<stype><q>")]
1956 ;; Vector versions of the fcvt standard patterns.
1957 ;; Expands to lbtrunc, lround, lceil, lfloor
1958 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1959 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1960 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1961 [(match_operand:VHSDF 1 "register_operand" "w")]
1964 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1965 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1968 ;; HF Scalar variants of related SIMD instructions.
1969 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1970 [(set (match_operand:HI 0 "register_operand" "=w")
1971 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1973 "TARGET_SIMD_F16INST"
1974 "fcvt<frint_suffix><su>\t%h0, %h1"
1975 [(set_attr "type" "neon_fp_to_int_s")]
1978 (define_insn "<optab>_trunchfhi2"
1979 [(set (match_operand:HI 0 "register_operand" "=w")
1980 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1981 "TARGET_SIMD_F16INST"
1982 "fcvtz<su>\t%h0, %h1"
1983 [(set_attr "type" "neon_fp_to_int_s")]
1986 (define_insn "<optab>hihf2"
1987 [(set (match_operand:HF 0 "register_operand" "=w")
1988 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1989 "TARGET_SIMD_F16INST"
1990 "<su_optab>cvtf\t%h0, %h1"
1991 [(set_attr "type" "neon_int_to_fp_s")]
1994 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1995 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1996 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1998 (match_operand:VDQF 1 "register_operand" "w")
1999 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
2002 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
2003 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
2005 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
2007 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
2008 output_asm_insn (buf, operands);
2011 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
2014 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
2015 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2016 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2017 [(match_operand:VHSDF 1 "register_operand")]
2022 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2023 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2024 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2025 [(match_operand:VHSDF 1 "register_operand")]
2030 (define_expand "ftrunc<VHSDF:mode>2"
2031 [(set (match_operand:VHSDF 0 "register_operand")
2032 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2037 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2038 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2040 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2042 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2043 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2046 ;; Conversions between vectors of floats and doubles.
2047 ;; Contains a mix of patterns to match standard pattern names
2048 ;; and those for intrinsics.
2050 ;; Float widening operations.
2052 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2053 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2054 (float_extend:<VWIDE> (vec_select:<VHALF>
2055 (match_operand:VQ_HSF 1 "register_operand" "w")
2056 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2059 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2060 [(set_attr "type" "neon_fp_cvt_widen_s")]
2063 ;; Convert between fixed-point and floating-point (vector modes)
2065 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2066 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2067 (unspec:<VHSDF:FCVT_TARGET>
2068 [(match_operand:VHSDF 1 "register_operand" "w")
2069 (match_operand:SI 2 "immediate_operand" "i")]
2072 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2073 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2076 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2077 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2078 (unspec:<VDQ_HSDI:FCVT_TARGET>
2079 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2080 (match_operand:SI 2 "immediate_operand" "i")]
2083 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2084 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2087 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2088 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2089 ;; the meaning of HI and LO changes depending on the target endianness.
2090 ;; While elsewhere we map the higher numbered elements of a vector to
2091 ;; the lower architectural lanes of the vector, for these patterns we want
2092 ;; to always treat "hi" as referring to the higher architectural lanes.
2093 ;; Consequently, while the patterns below look inconsistent with our
2094 ;; other big-endian patterns their behavior is as required.
2096 (define_expand "vec_unpacks_lo_<mode>"
2097 [(match_operand:<VWIDE> 0 "register_operand" "")
2098 (match_operand:VQ_HSF 1 "register_operand" "")]
2101 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2102 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2108 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2109 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2110 (float_extend:<VWIDE> (vec_select:<VHALF>
2111 (match_operand:VQ_HSF 1 "register_operand" "w")
2112 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2115 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2116 [(set_attr "type" "neon_fp_cvt_widen_s")]
2119 (define_expand "vec_unpacks_hi_<mode>"
2120 [(match_operand:<VWIDE> 0 "register_operand" "")
2121 (match_operand:VQ_HSF 1 "register_operand" "")]
2124 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2125 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2130 (define_insn "aarch64_float_extend_lo_<Vwide>"
2131 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2132 (float_extend:<VWIDE>
2133 (match_operand:VDF 1 "register_operand" "w")))]
2135 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2136 [(set_attr "type" "neon_fp_cvt_widen_s")]
2139 ;; Float narrowing operations.
2141 (define_insn "aarch64_float_truncate_lo_<mode>"
2142 [(set (match_operand:VDF 0 "register_operand" "=w")
2144 (match_operand:<VWIDE> 1 "register_operand" "w")))]
2146 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2147 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2150 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2151 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2153 (match_operand:VDF 1 "register_operand" "0")
2155 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2156 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2157 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2158 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2161 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2162 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2165 (match_operand:<VWIDE> 2 "register_operand" "w"))
2166 (match_operand:VDF 1 "register_operand" "0")))]
2167 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2168 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2169 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2172 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2173 [(match_operand:<VDBL> 0 "register_operand" "=w")
2174 (match_operand:VDF 1 "register_operand" "0")
2175 (match_operand:<VWIDE> 2 "register_operand" "w")]
2178 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2179 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2180 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2181 emit_insn (gen (operands[0], operands[1], operands[2]));
2186 (define_expand "vec_pack_trunc_v2df"
2187 [(set (match_operand:V4SF 0 "register_operand")
2189 (float_truncate:V2SF
2190 (match_operand:V2DF 1 "register_operand"))
2191 (float_truncate:V2SF
2192 (match_operand:V2DF 2 "register_operand"))
2196 rtx tmp = gen_reg_rtx (V2SFmode);
2197 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2198 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2200 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2201 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2202 tmp, operands[hi]));
2207 (define_expand "vec_pack_trunc_df"
2208 [(set (match_operand:V2SF 0 "register_operand")
2211 (match_operand:DF 1 "register_operand"))
2213 (match_operand:DF 2 "register_operand"))
2217 rtx tmp = gen_reg_rtx (V2SFmode);
2218 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2219 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2221 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2222 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2223 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2229 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2231 ;; a = (b < c) ? b : c;
2232 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
2233 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
2236 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2237 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2238 ;; operand will be returned when both operands are zero (i.e. they may not
2239 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2240 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2243 (define_insn "<su><maxmin><mode>3"
2244 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2245 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2246 (match_operand:VHSDF 2 "register_operand" "w")))]
2248 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2249 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2252 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2253 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2254 ;; which implement the IEEE fmax ()/fmin () functions.
2255 (define_insn "<maxmin_uns><mode>3"
2256 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2257 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2258 (match_operand:VHSDF 2 "register_operand" "w")]
2261 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2262 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2265 ;; 'across lanes' add.
2267 (define_expand "reduc_plus_scal_<mode>"
2268 [(match_operand:<VEL> 0 "register_operand" "=w")
2269 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2273 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2274 rtx scratch = gen_reg_rtx (<MODE>mode);
2275 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2276 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2281 (define_insn "aarch64_faddp<mode>"
2282 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2283 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2284 (match_operand:VHSDF 2 "register_operand" "w")]
2287 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2288 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2291 (define_insn "aarch64_reduc_plus_internal<mode>"
2292 [(set (match_operand:VDQV 0 "register_operand" "=w")
2293 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2296 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2297 [(set_attr "type" "neon_reduc_add<q>")]
2300 (define_insn "aarch64_reduc_plus_internalv2si"
2301 [(set (match_operand:V2SI 0 "register_operand" "=w")
2302 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2305 "addp\\t%0.2s, %1.2s, %1.2s"
2306 [(set_attr "type" "neon_reduc_add")]
2309 (define_insn "reduc_plus_scal_<mode>"
2310 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2311 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2314 "faddp\\t%<Vetype>0, %1.<Vtype>"
2315 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2318 (define_expand "reduc_plus_scal_v4sf"
2319 [(set (match_operand:SF 0 "register_operand")
2320 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2324 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2325 rtx scratch = gen_reg_rtx (V4SFmode);
2326 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2327 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2328 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2332 (define_insn "clrsb<mode>2"
2333 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2334 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2336 "cls\\t%0.<Vtype>, %1.<Vtype>"
2337 [(set_attr "type" "neon_cls<q>")]
2340 (define_insn "clz<mode>2"
2341 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2342 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2344 "clz\\t%0.<Vtype>, %1.<Vtype>"
2345 [(set_attr "type" "neon_cls<q>")]
2348 (define_insn "popcount<mode>2"
2349 [(set (match_operand:VB 0 "register_operand" "=w")
2350 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2352 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2353 [(set_attr "type" "neon_cnt<q>")]
2356 ;; 'across lanes' max and min ops.
2358 ;; Template for outputting a scalar, so we can create __builtins which can be
2359 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
2360 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2361 [(match_operand:<VEL> 0 "register_operand")
2362 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2366 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2367 rtx scratch = gen_reg_rtx (<MODE>mode);
2368 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2370 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2375 ;; Likewise for integer cases, signed and unsigned.
2376 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2377 [(match_operand:<VEL> 0 "register_operand")
2378 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2382 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2383 rtx scratch = gen_reg_rtx (<MODE>mode);
2384 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2386 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2391 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2392 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2393 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2396 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2397 [(set_attr "type" "neon_reduc_minmax<q>")]
2400 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2401 [(set (match_operand:V2SI 0 "register_operand" "=w")
2402 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2405 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2406 [(set_attr "type" "neon_reduc_minmax")]
2409 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2410 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2411 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2414 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2415 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2418 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2420 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2423 ;; Thus our BSL is of the form:
2424 ;; op0 = bsl (mask, op2, op3)
2425 ;; We can use any of:
2428 ;; bsl mask, op1, op2
2429 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2430 ;; bit op0, op2, mask
2431 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2432 ;; bif op0, op1, mask
2434 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2435 ;; Some forms of straight-line code may generate the equivalent form
2436 ;; in *aarch64_simd_bsl<mode>_alt.
2438 (define_insn "aarch64_simd_bsl<mode>_internal"
2439 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2443 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2444 (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2445 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2446 (match_dup:<V_INT_EQUIV> 3)
2450 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2451 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2452 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2453 [(set_attr "type" "neon_bsl<q>")]
2456 ;; We need this form in addition to the above pattern to match the case
2457 ;; when combine tries merging three insns such that the second operand of
2458 ;; the outer XOR matches the second operand of the inner XOR rather than
2459 ;; the first. The two are equivalent but since recog doesn't try all
2460 ;; permutations of commutative operations, we have to have a separate pattern.
2462 (define_insn "*aarch64_simd_bsl<mode>_alt"
2463 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2467 (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2468 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2469 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2470 (match_dup:<V_INT_EQUIV> 2)))]
2473 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2474 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2475 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2476 [(set_attr "type" "neon_bsl<q>")]
2479 ;; DImode is special, we want to avoid computing operations which are
2480 ;; more naturally computed in general purpose registers in the vector
2481 ;; registers. If we do that, we need to move all three operands from general
2482 ;; purpose registers to vector registers, then back again. However, we
2483 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2484 ;; optimizations based on the component operations of a BSL.
2486 ;; That means we need a splitter back to the individual operations, if they
2487 ;; would be better calculated on the integer side.
2489 (define_insn_and_split "aarch64_simd_bsldi_internal"
2490 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2494 (match_operand:DI 3 "register_operand" "w,0,w,r")
2495 (match_operand:DI 2 "register_operand" "w,w,0,r"))
2496 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2501 bsl\\t%0.8b, %2.8b, %3.8b
2502 bit\\t%0.8b, %2.8b, %1.8b
2503 bif\\t%0.8b, %3.8b, %1.8b
2505 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2506 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2508 /* Split back to individual operations. If we're before reload, and
2509 able to create a temporary register, do so. If we're after reload,
2510 we've got an early-clobber destination register, so use that.
2511 Otherwise, we can't create pseudos and we can't yet guarantee that
2512 operands[0] is safe to write, so FAIL to split. */
2515 if (reload_completed)
2516 scratch = operands[0];
2517 else if (can_create_pseudo_p ())
2518 scratch = gen_reg_rtx (DImode);
2522 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2523 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2524 emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2527 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2528 (set_attr "length" "4,4,4,12")]
2531 (define_insn_and_split "aarch64_simd_bsldi_alt"
2532 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2536 (match_operand:DI 3 "register_operand" "w,w,0,r")
2537 (match_operand:DI 2 "register_operand" "w,0,w,r"))
2538 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2543 bsl\\t%0.8b, %3.8b, %2.8b
2544 bit\\t%0.8b, %3.8b, %1.8b
2545 bif\\t%0.8b, %2.8b, %1.8b
2547 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2548 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2550 /* Split back to individual operations. If we're before reload, and
2551 able to create a temporary register, do so. If we're after reload,
2552 we've got an early-clobber destination register, so use that.
2553 Otherwise, we can't create pseudos and we can't yet guarantee that
2554 operands[0] is safe to write, so FAIL to split. */
2557 if (reload_completed)
2558 scratch = operands[0];
2559 else if (can_create_pseudo_p ())
2560 scratch = gen_reg_rtx (DImode);
2564 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2565 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2566 emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2569 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2570 (set_attr "length" "4,4,4,12")]
2573 (define_expand "aarch64_simd_bsl<mode>"
2574 [(match_operand:VALLDIF 0 "register_operand")
2575 (match_operand:<V_INT_EQUIV> 1 "register_operand")
2576 (match_operand:VALLDIF 2 "register_operand")
2577 (match_operand:VALLDIF 3 "register_operand")]
2580 /* We can't alias operands together if they have different modes. */
2581 rtx tmp = operands[0];
2582 if (FLOAT_MODE_P (<MODE>mode))
2584 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2585 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2586 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2588 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2589 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2593 if (tmp != operands[0])
2594 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2599 (define_expand "vcond_mask_<mode><v_int_equiv>"
2600 [(match_operand:VALLDI 0 "register_operand")
2601 (match_operand:VALLDI 1 "nonmemory_operand")
2602 (match_operand:VALLDI 2 "nonmemory_operand")
2603 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2606 /* If we have (a = (P) ? -1 : 0);
2607 Then we can simply move the generated mask (result must be int). */
2608 if (operands[1] == CONSTM1_RTX (<MODE>mode)
2609 && operands[2] == CONST0_RTX (<MODE>mode))
2610 emit_move_insn (operands[0], operands[3]);
2611 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
2612 else if (operands[1] == CONST0_RTX (<MODE>mode)
2613 && operands[2] == CONSTM1_RTX (<MODE>mode))
2614 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2617 if (!REG_P (operands[1]))
2618 operands[1] = force_reg (<MODE>mode, operands[1]);
2619 if (!REG_P (operands[2]))
2620 operands[2] = force_reg (<MODE>mode, operands[2]);
2621 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2622 operands[1], operands[2]));
2628 ;; Patterns comparing two vectors to produce a mask.
2630 (define_expand "vec_cmp<mode><mode>"
2631 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2632 (match_operator 1 "comparison_operator"
2633 [(match_operand:VSDQ_I_DI 2 "register_operand")
2634 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2637 rtx mask = operands[0];
2638 enum rtx_code code = GET_CODE (operands[1]);
2648 if (operands[3] == CONST0_RTX (<MODE>mode))
2653 if (!REG_P (operands[3]))
2654 operands[3] = force_reg (<MODE>mode, operands[3]);
2662 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2666 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2670 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2674 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2678 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2682 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2686 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2690 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2694 /* Handle NE as !EQ. */
2695 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2696 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2700 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2710 (define_expand "vec_cmp<mode><v_int_equiv>"
2711 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2712 (match_operator 1 "comparison_operator"
2713 [(match_operand:VDQF 2 "register_operand")
2714 (match_operand:VDQF 3 "nonmemory_operand")]))]
2717 int use_zero_form = 0;
2718 enum rtx_code code = GET_CODE (operands[1]);
2719 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2721 rtx (*comparison) (rtx, rtx, rtx) = NULL;
2730 if (operands[3] == CONST0_RTX (<MODE>mode))
2737 if (!REG_P (operands[3]))
2738 operands[3] = force_reg (<MODE>mode, operands[3]);
2748 comparison = gen_aarch64_cmlt<mode>;
2753 std::swap (operands[2], operands[3]);
2757 comparison = gen_aarch64_cmgt<mode>;
2762 comparison = gen_aarch64_cmle<mode>;
2767 std::swap (operands[2], operands[3]);
2771 comparison = gen_aarch64_cmge<mode>;
2775 comparison = gen_aarch64_cmeq<mode>;
2793 /* All of the above must not raise any FP exceptions. Thus we first
2794 check each operand for NaNs and force any elements containing NaN to
2795 zero before using them in the compare.
2796 Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2797 (cm<cc> (isnan (a) ? 0.0 : a,
2798 isnan (b) ? 0.0 : b))
2799 We use the following transformations for doing the comparisions:
2803 a UNLT b -> b GT a. */
2805 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2806 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2807 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2808 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2809 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2810 emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2811 emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2812 lowpart_subreg (<V_INT_EQUIV>mode,
2815 emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2816 lowpart_subreg (<V_INT_EQUIV>mode,
2819 gcc_assert (comparison != NULL);
2820 emit_insn (comparison (operands[0],
2821 lowpart_subreg (<MODE>mode,
2822 tmp0, <V_INT_EQUIV>mode),
2823 lowpart_subreg (<MODE>mode,
2824 tmp1, <V_INT_EQUIV>mode)));
2825 emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2835 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2836 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2842 a NE b -> ~(a EQ b) */
2843 gcc_assert (comparison != NULL);
2844 emit_insn (comparison (operands[0], operands[2], operands[3]));
2846 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2850 /* LTGT is not guranteed to not generate a FP exception. So let's
2851 go the faster way : ((a > b) || (b > a)). */
2852 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2853 operands[2], operands[3]));
2854 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2855 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2861 /* cmeq (a, a) & cmeq (b, b). */
2862 emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2863 operands[2], operands[2]));
2864 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2865 emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2867 if (code == UNORDERED)
2868 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2869 else if (code == UNEQ)
2871 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
2872 emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
2883 (define_expand "vec_cmpu<mode><mode>"
2884 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2885 (match_operator 1 "comparison_operator"
2886 [(match_operand:VSDQ_I_DI 2 "register_operand")
2887 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2890 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2891 operands[2], operands[3]));
2895 (define_expand "vcond<mode><mode>"
2896 [(set (match_operand:VALLDI 0 "register_operand")
2897 (if_then_else:VALLDI
2898 (match_operator 3 "comparison_operator"
2899 [(match_operand:VALLDI 4 "register_operand")
2900 (match_operand:VALLDI 5 "nonmemory_operand")])
2901 (match_operand:VALLDI 1 "nonmemory_operand")
2902 (match_operand:VALLDI 2 "nonmemory_operand")))]
2905 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2906 enum rtx_code code = GET_CODE (operands[3]);
2908 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2909 it as well as switch operands 1/2 in order to avoid the additional
2913 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2914 operands[4], operands[5]);
2915 std::swap (operands[1], operands[2]);
2917 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2918 operands[4], operands[5]));
2919 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2920 operands[2], mask));
2925 (define_expand "vcond<v_cmp_mixed><mode>"
2926 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2927 (if_then_else:<V_cmp_mixed>
2928 (match_operator 3 "comparison_operator"
2929 [(match_operand:VDQF_COND 4 "register_operand")
2930 (match_operand:VDQF_COND 5 "nonmemory_operand")])
2931 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2932 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2935 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2936 enum rtx_code code = GET_CODE (operands[3]);
2938 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2939 it as well as switch operands 1/2 in order to avoid the additional
2943 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2944 operands[4], operands[5]);
2945 std::swap (operands[1], operands[2]);
2947 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2948 operands[4], operands[5]));
2949 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
2950 operands[0], operands[1],
2951 operands[2], mask));
2956 (define_expand "vcondu<mode><mode>"
2957 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2958 (if_then_else:VSDQ_I_DI
2959 (match_operator 3 "comparison_operator"
2960 [(match_operand:VSDQ_I_DI 4 "register_operand")
2961 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2962 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2963 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2966 rtx mask = gen_reg_rtx (<MODE>mode);
2967 enum rtx_code code = GET_CODE (operands[3]);
2969 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2970 it as well as switch operands 1/2 in order to avoid the additional
2974 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2975 operands[4], operands[5]);
2976 std::swap (operands[1], operands[2]);
2978 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2979 operands[4], operands[5]));
2980 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2981 operands[2], mask));
2985 (define_expand "vcondu<mode><v_cmp_mixed>"
2986 [(set (match_operand:VDQF 0 "register_operand")
2988 (match_operator 3 "comparison_operator"
2989 [(match_operand:<V_cmp_mixed> 4 "register_operand")
2990 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2991 (match_operand:VDQF 1 "nonmemory_operand")
2992 (match_operand:VDQF 2 "nonmemory_operand")))]
2995 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2996 enum rtx_code code = GET_CODE (operands[3]);
2998 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2999 it as well as switch operands 1/2 in order to avoid the additional
3003 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3004 operands[4], operands[5]);
3005 std::swap (operands[1], operands[2]);
3007 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
3009 operands[4], operands[5]));
3010 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3011 operands[2], mask));
3015 ;; Patterns for AArch64 SIMD Intrinsics.
3017 ;; Lane extraction with sign extension to general purpose register.
3018 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3019 [(set (match_operand:GPI 0 "register_operand" "=r")
3022 (match_operand:VDQQH 1 "register_operand" "w")
3023 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3026 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3027 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3029 [(set_attr "type" "neon_to_gp<q>")]
3032 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
3033 [(set (match_operand:GPI 0 "register_operand" "=r")
3036 (match_operand:VDQQH 1 "register_operand" "w")
3037 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3040 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3041 INTVAL (operands[2]));
3042 return "umov\\t%w0, %1.<Vetype>[%2]";
3044 [(set_attr "type" "neon_to_gp<q>")]
3047 ;; Lane extraction of a value, neither sign nor zero extension
3048 ;; is guaranteed so upper bits should be considered undefined.
3049 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3050 (define_insn "aarch64_get_lane<mode>"
3051 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3053 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3054 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3057 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3058 switch (which_alternative)
3061 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3063 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3065 return "st1\\t{%1.<Vetype>}[%2], %0";
3070 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3073 (define_insn "load_pair_lanes<mode>"
3074 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3076 (match_operand:VDC 1 "memory_operand" "Utq")
3077 (match_operand:VDC 2 "memory_operand" "m")))]
3078 "TARGET_SIMD && !STRICT_ALIGNMENT
3079 && rtx_equal_p (XEXP (operands[2], 0),
3080 plus_constant (Pmode,
3081 XEXP (operands[1], 0),
3082 GET_MODE_SIZE (<MODE>mode)))"
3084 [(set_attr "type" "neon_load1_1reg_q")]
3087 (define_insn "store_pair_lanes<mode>"
3088 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
3090 (match_operand:VDC 1 "register_operand" "w, r")
3091 (match_operand:VDC 2 "register_operand" "w, r")))]
3095 stp\\t%x1, %x2, %y0"
3096 [(set_attr "type" "neon_stp, store_16")]
3099 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3102 (define_insn "*aarch64_combinez<mode>"
3103 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3105 (match_operand:VDC 1 "general_operand" "w,?r,m")
3106 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3107 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3112 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3113 (set_attr "arch" "simd,fp,simd")]
3116 (define_insn "*aarch64_combinez_be<mode>"
3117 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3119 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3120 (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3121 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3126 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3127 (set_attr "arch" "simd,fp,simd")]
3130 (define_expand "aarch64_combine<mode>"
3131 [(match_operand:<VDBL> 0 "register_operand")
3132 (match_operand:VDC 1 "register_operand")
3133 (match_operand:VDC 2 "register_operand")]
3136 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3142 (define_expand "@aarch64_simd_combine<mode>"
3143 [(match_operand:<VDBL> 0 "register_operand")
3144 (match_operand:VDC 1 "register_operand")
3145 (match_operand:VDC 2 "register_operand")]
3148 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3149 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3152 [(set_attr "type" "multiple")]
3155 ;; <su><addsub>l<q>.
3157 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3158 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3159 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3160 (match_operand:VQW 1 "register_operand" "w")
3161 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3162 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3163 (match_operand:VQW 2 "register_operand" "w")
3166 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3167 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3170 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3171 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3172 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3173 (match_operand:VQW 1 "register_operand" "w")
3174 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3175 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3176 (match_operand:VQW 2 "register_operand" "w")
3179 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3180 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3184 (define_expand "aarch64_saddl2<mode>"
3185 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3186 (match_operand:VQW 1 "register_operand" "w")
3187 (match_operand:VQW 2 "register_operand" "w")]
3190 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3191 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3196 (define_expand "aarch64_uaddl2<mode>"
3197 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3198 (match_operand:VQW 1 "register_operand" "w")
3199 (match_operand:VQW 2 "register_operand" "w")]
3202 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3203 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3208 (define_expand "aarch64_ssubl2<mode>"
3209 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3210 (match_operand:VQW 1 "register_operand" "w")
3211 (match_operand:VQW 2 "register_operand" "w")]
3214 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3215 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3220 (define_expand "aarch64_usubl2<mode>"
3221 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3222 (match_operand:VQW 1 "register_operand" "w")
3223 (match_operand:VQW 2 "register_operand" "w")]
3226 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3227 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3232 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3233 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3234 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3235 (match_operand:VD_BHSI 1 "register_operand" "w"))
3237 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3239 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3240 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3243 ;; <su><addsub>w<q>.
3245 (define_expand "widen_ssum<mode>3"
3246 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3247 (plus:<VDBLW> (sign_extend:<VDBLW>
3248 (match_operand:VQW 1 "register_operand" ""))
3249 (match_operand:<VDBLW> 2 "register_operand" "")))]
3252 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3253 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3255 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3257 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3262 (define_expand "widen_ssum<mode>3"
3263 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3264 (plus:<VWIDE> (sign_extend:<VWIDE>
3265 (match_operand:VD_BHSI 1 "register_operand" ""))
3266 (match_operand:<VWIDE> 2 "register_operand" "")))]
3269 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3273 (define_expand "widen_usum<mode>3"
3274 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3275 (plus:<VDBLW> (zero_extend:<VDBLW>
3276 (match_operand:VQW 1 "register_operand" ""))
3277 (match_operand:<VDBLW> 2 "register_operand" "")))]
3280 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3281 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3283 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3285 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3290 (define_expand "widen_usum<mode>3"
3291 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3292 (plus:<VWIDE> (zero_extend:<VWIDE>
3293 (match_operand:VD_BHSI 1 "register_operand" ""))
3294 (match_operand:<VWIDE> 2 "register_operand" "")))]
3297 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3301 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
3302 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3303 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3305 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3307 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3308 [(set_attr "type" "neon_sub_widen")]
3311 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
3312 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3313 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3316 (match_operand:VQW 2 "register_operand" "w")
3317 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3319 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3320 [(set_attr "type" "neon_sub_widen")]
3323 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
3324 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3325 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3328 (match_operand:VQW 2 "register_operand" "w")
3329 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3331 "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3332 [(set_attr "type" "neon_sub_widen")]
3335 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
3336 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3338 (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
3339 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3341 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3342 [(set_attr "type" "neon_add_widen")]
3345 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
3346 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3350 (match_operand:VQW 2 "register_operand" "w")
3351 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3352 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3354 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3355 [(set_attr "type" "neon_add_widen")]
3358 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
3359 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3363 (match_operand:VQW 2 "register_operand" "w")
3364 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3365 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3367 "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3368 [(set_attr "type" "neon_add_widen")]
3371 (define_expand "aarch64_saddw2<mode>"
3372 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3373 (match_operand:<VWIDE> 1 "register_operand" "w")
3374 (match_operand:VQW 2 "register_operand" "w")]
3377 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3378 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3383 (define_expand "aarch64_uaddw2<mode>"
3384 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3385 (match_operand:<VWIDE> 1 "register_operand" "w")
3386 (match_operand:VQW 2 "register_operand" "w")]
3389 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3390 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3396 (define_expand "aarch64_ssubw2<mode>"
3397 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3398 (match_operand:<VWIDE> 1 "register_operand" "w")
3399 (match_operand:VQW 2 "register_operand" "w")]
3402 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3403 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3408 (define_expand "aarch64_usubw2<mode>"
3409 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3410 (match_operand:<VWIDE> 1 "register_operand" "w")
3411 (match_operand:VQW 2 "register_operand" "w")]
3414 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3415 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3420 ;; <su><r>h<addsub>.
3422 (define_expand "<u>avg<mode>3_floor"
3423 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3424 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3425 (match_operand:VDQ_BHSI 2 "register_operand")]
3430 (define_expand "<u>avg<mode>3_ceil"
3431 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3432 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3433 (match_operand:VDQ_BHSI 2 "register_operand")]
3438 (define_insn "aarch64_<sur>h<addsub><mode>"
3439 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3440 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3441 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3444 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3445 [(set_attr "type" "neon_<addsub>_halve<q>")]
3448 ;; <r><addsub>hn<q>.
3450 (define_insn "aarch64_<sur><addsub>hn<mode>"
3451 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3452 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3453 (match_operand:VQN 2 "register_operand" "w")]
3456 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3457 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3460 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3461 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3462 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3463 (match_operand:VQN 2 "register_operand" "w")
3464 (match_operand:VQN 3 "register_operand" "w")]
3467 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3468 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3473 (define_insn "aarch64_pmul<mode>"
3474 [(set (match_operand:VB 0 "register_operand" "=w")
3475 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3476 (match_operand:VB 2 "register_operand" "w")]
3479 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3480 [(set_attr "type" "neon_mul_<Vetype><q>")]
3485 (define_insn "aarch64_fmulx<mode>"
3486 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3488 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3489 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3492 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3493 [(set_attr "type" "neon_fp_mul_<stype>")]
3496 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3498 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3499 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3501 [(match_operand:VDQSF 1 "register_operand" "w")
3502 (vec_duplicate:VDQSF
3504 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3505 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3509 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3510 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3512 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3515 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3517 (define_insn "*aarch64_mulx_elt<mode>"
3518 [(set (match_operand:VDQF 0 "register_operand" "=w")
3520 [(match_operand:VDQF 1 "register_operand" "w")
3523 (match_operand:VDQF 2 "register_operand" "w")
3524 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3528 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3529 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3531 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3536 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3537 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3539 [(match_operand:VHSDF 1 "register_operand" "w")
3540 (vec_duplicate:VHSDF
3541 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3544 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3545 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3548 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3549 ;; vmulxd_lane_f64 == vmulx_lane_f64
3550 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3552 (define_insn "*aarch64_vgetfmulx<mode>"
3553 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3555 [(match_operand:<VEL> 1 "register_operand" "w")
3557 (match_operand:VDQF 2 "register_operand" "w")
3558 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3562 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3563 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3565 [(set_attr "type" "fmul<Vetype>")]
3569 (define_insn "aarch64_<su_optab><optab><mode>"
3570 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3571 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3572 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3574 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3575 [(set_attr "type" "neon_<optab><q>")]
3578 ;; suqadd and usqadd
3580 (define_insn "aarch64_<sur>qadd<mode>"
3581 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3582 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3583 (match_operand:VSDQ_I 2 "register_operand" "w")]
3586 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3587 [(set_attr "type" "neon_qadd<q>")]
3592 (define_insn "aarch64_sqmovun<mode>"
3593 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3594 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3597 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3598 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3601 ;; sqmovn and uqmovn
3603 (define_insn "aarch64_<sur>qmovn<mode>"
3604 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3605 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3608 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3609 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3614 (define_insn "aarch64_s<optab><mode>"
3615 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3617 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3619 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3620 [(set_attr "type" "neon_<optab><q>")]
3625 (define_insn "aarch64_sq<r>dmulh<mode>"
3626 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3628 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3629 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3632 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3633 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3638 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3639 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3641 [(match_operand:VDQHS 1 "register_operand" "w")
3643 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3644 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3648 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3649 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3650 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3653 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3654 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3656 [(match_operand:VDQHS 1 "register_operand" "w")
3658 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3659 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3663 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3664 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3665 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3668 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3669 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3671 [(match_operand:SD_HSI 1 "register_operand" "w")
3673 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3674 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3678 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3679 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3680 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3683 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3684 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3686 [(match_operand:SD_HSI 1 "register_operand" "w")
3688 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3689 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3693 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3694 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3695 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3700 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3701 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3703 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3704 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3705 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3708 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3709 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3712 ;; sqrdml[as]h_lane.
3714 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3715 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3717 [(match_operand:VDQHS 1 "register_operand" "0")
3718 (match_operand:VDQHS 2 "register_operand" "w")
3720 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3721 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3725 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3727 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3729 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3732 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3733 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3735 [(match_operand:SD_HSI 1 "register_operand" "0")
3736 (match_operand:SD_HSI 2 "register_operand" "w")
3738 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3739 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3743 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3745 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3747 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3750 ;; sqrdml[as]h_laneq.
3752 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3753 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3755 [(match_operand:VDQHS 1 "register_operand" "0")
3756 (match_operand:VDQHS 2 "register_operand" "w")
3758 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3759 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3763 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3765 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3767 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3770 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3771 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3773 [(match_operand:SD_HSI 1 "register_operand" "0")
3774 (match_operand:SD_HSI 2 "register_operand" "w")
3776 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3777 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3781 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3783 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3785 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3790 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3791 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3793 (match_operand:<VWIDE> 1 "register_operand" "0")
3796 (sign_extend:<VWIDE>
3797 (match_operand:VSD_HSI 2 "register_operand" "w"))
3798 (sign_extend:<VWIDE>
3799 (match_operand:VSD_HSI 3 "register_operand" "w")))
3802 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3803 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3808 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3809 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3811 (match_operand:<VWIDE> 1 "register_operand" "0")
3814 (sign_extend:<VWIDE>
3815 (match_operand:VD_HSI 2 "register_operand" "w"))
3816 (sign_extend:<VWIDE>
3817 (vec_duplicate:VD_HSI
3819 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3820 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3825 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3827 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3829 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3832 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3833 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3835 (match_operand:<VWIDE> 1 "register_operand" "0")
3838 (sign_extend:<VWIDE>
3839 (match_operand:VD_HSI 2 "register_operand" "w"))
3840 (sign_extend:<VWIDE>
3841 (vec_duplicate:VD_HSI
3843 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3844 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3849 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3851 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3853 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3856 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3857 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3859 (match_operand:<VWIDE> 1 "register_operand" "0")
3862 (sign_extend:<VWIDE>
3863 (match_operand:SD_HSI 2 "register_operand" "w"))
3864 (sign_extend:<VWIDE>
3866 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3867 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3872 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3874 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3876 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3879 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3880 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3882 (match_operand:<VWIDE> 1 "register_operand" "0")
3885 (sign_extend:<VWIDE>
3886 (match_operand:SD_HSI 2 "register_operand" "w"))
3887 (sign_extend:<VWIDE>
3889 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3890 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3895 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3897 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3899 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3904 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3905 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3907 (match_operand:<VWIDE> 1 "register_operand" "0")
3910 (sign_extend:<VWIDE>
3911 (match_operand:VD_HSI 2 "register_operand" "w"))
3912 (sign_extend:<VWIDE>
3913 (vec_duplicate:VD_HSI
3914 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3917 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3918 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3923 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3924 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3926 (match_operand:<VWIDE> 1 "register_operand" "0")
3929 (sign_extend:<VWIDE>
3931 (match_operand:VQ_HSI 2 "register_operand" "w")
3932 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3933 (sign_extend:<VWIDE>
3935 (match_operand:VQ_HSI 3 "register_operand" "w")
3939 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3940 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3943 (define_expand "aarch64_sqdmlal2<mode>"
3944 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3945 (match_operand:<VWIDE> 1 "register_operand" "w")
3946 (match_operand:VQ_HSI 2 "register_operand" "w")
3947 (match_operand:VQ_HSI 3 "register_operand" "w")]
3950 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3951 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3952 operands[2], operands[3], p));
3956 (define_expand "aarch64_sqdmlsl2<mode>"
3957 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3958 (match_operand:<VWIDE> 1 "register_operand" "w")
3959 (match_operand:VQ_HSI 2 "register_operand" "w")
3960 (match_operand:VQ_HSI 3 "register_operand" "w")]
3963 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3964 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3965 operands[2], operands[3], p));
3971 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3972 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3974 (match_operand:<VWIDE> 1 "register_operand" "0")
3977 (sign_extend:<VWIDE>
3979 (match_operand:VQ_HSI 2 "register_operand" "w")
3980 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3981 (sign_extend:<VWIDE>
3982 (vec_duplicate:<VHALF>
3984 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3985 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3990 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3992 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3994 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3997 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3998 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4000 (match_operand:<VWIDE> 1 "register_operand" "0")
4003 (sign_extend:<VWIDE>
4005 (match_operand:VQ_HSI 2 "register_operand" "w")
4006 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4007 (sign_extend:<VWIDE>
4008 (vec_duplicate:<VHALF>
4010 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4011 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4016 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4018 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4020 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4023 (define_expand "aarch64_sqdmlal2_lane<mode>"
4024 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4025 (match_operand:<VWIDE> 1 "register_operand" "w")
4026 (match_operand:VQ_HSI 2 "register_operand" "w")
4027 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4028 (match_operand:SI 4 "immediate_operand" "i")]
4031 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4032 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
4033 operands[2], operands[3],
4038 (define_expand "aarch64_sqdmlal2_laneq<mode>"
4039 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4040 (match_operand:<VWIDE> 1 "register_operand" "w")
4041 (match_operand:VQ_HSI 2 "register_operand" "w")
4042 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4043 (match_operand:SI 4 "immediate_operand" "i")]
4046 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4047 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
4048 operands[2], operands[3],
4053 (define_expand "aarch64_sqdmlsl2_lane<mode>"
4054 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4055 (match_operand:<VWIDE> 1 "register_operand" "w")
4056 (match_operand:VQ_HSI 2 "register_operand" "w")
4057 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4058 (match_operand:SI 4 "immediate_operand" "i")]
4061 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4062 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
4063 operands[2], operands[3],
4068 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
4069 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4070 (match_operand:<VWIDE> 1 "register_operand" "w")
4071 (match_operand:VQ_HSI 2 "register_operand" "w")
4072 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4073 (match_operand:SI 4 "immediate_operand" "i")]
4076 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4077 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4078 operands[2], operands[3],
4083 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4084 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4086 (match_operand:<VWIDE> 1 "register_operand" "0")
4089 (sign_extend:<VWIDE>
4091 (match_operand:VQ_HSI 2 "register_operand" "w")
4092 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4093 (sign_extend:<VWIDE>
4094 (vec_duplicate:<VHALF>
4095 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4098 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4099 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4102 (define_expand "aarch64_sqdmlal2_n<mode>"
4103 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4104 (match_operand:<VWIDE> 1 "register_operand" "w")
4105 (match_operand:VQ_HSI 2 "register_operand" "w")
4106 (match_operand:<VEL> 3 "register_operand" "w")]
4109 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4110 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4111 operands[2], operands[3],
4116 (define_expand "aarch64_sqdmlsl2_n<mode>"
4117 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4118 (match_operand:<VWIDE> 1 "register_operand" "w")
4119 (match_operand:VQ_HSI 2 "register_operand" "w")
4120 (match_operand:<VEL> 3 "register_operand" "w")]
4123 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4124 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4125 operands[2], operands[3],
4132 (define_insn "aarch64_sqdmull<mode>"
4133 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4136 (sign_extend:<VWIDE>
4137 (match_operand:VSD_HSI 1 "register_operand" "w"))
4138 (sign_extend:<VWIDE>
4139 (match_operand:VSD_HSI 2 "register_operand" "w")))
4142 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4143 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4148 (define_insn "aarch64_sqdmull_lane<mode>"
4149 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4152 (sign_extend:<VWIDE>
4153 (match_operand:VD_HSI 1 "register_operand" "w"))
4154 (sign_extend:<VWIDE>
4155 (vec_duplicate:VD_HSI
4157 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4158 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4163 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4164 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4166 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4169 (define_insn "aarch64_sqdmull_laneq<mode>"
4170 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4173 (sign_extend:<VWIDE>
4174 (match_operand:VD_HSI 1 "register_operand" "w"))
4175 (sign_extend:<VWIDE>
4176 (vec_duplicate:VD_HSI
4178 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4179 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4184 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4185 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4187 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4190 (define_insn "aarch64_sqdmull_lane<mode>"
4191 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4194 (sign_extend:<VWIDE>
4195 (match_operand:SD_HSI 1 "register_operand" "w"))
4196 (sign_extend:<VWIDE>
4198 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4199 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4204 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4205 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4207 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4210 (define_insn "aarch64_sqdmull_laneq<mode>"
4211 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4214 (sign_extend:<VWIDE>
4215 (match_operand:SD_HSI 1 "register_operand" "w"))
4216 (sign_extend:<VWIDE>
4218 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4219 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4224 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4225 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4227 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4232 (define_insn "aarch64_sqdmull_n<mode>"
4233 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4236 (sign_extend:<VWIDE>
4237 (match_operand:VD_HSI 1 "register_operand" "w"))
4238 (sign_extend:<VWIDE>
4239 (vec_duplicate:VD_HSI
4240 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4244 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4245 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4252 (define_insn "aarch64_sqdmull2<mode>_internal"
4253 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4256 (sign_extend:<VWIDE>
4258 (match_operand:VQ_HSI 1 "register_operand" "w")
4259 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4260 (sign_extend:<VWIDE>
4262 (match_operand:VQ_HSI 2 "register_operand" "w")
4267 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4268 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4271 (define_expand "aarch64_sqdmull2<mode>"
4272 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4273 (match_operand:VQ_HSI 1 "register_operand" "w")
4274 (match_operand:VQ_HSI 2 "register_operand" "w")]
4277 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4278 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4285 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4286 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4289 (sign_extend:<VWIDE>
4291 (match_operand:VQ_HSI 1 "register_operand" "w")
4292 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4293 (sign_extend:<VWIDE>
4294 (vec_duplicate:<VHALF>
4296 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4297 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4302 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4303 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4305 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4308 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4309 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4312 (sign_extend:<VWIDE>
4314 (match_operand:VQ_HSI 1 "register_operand" "w")
4315 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4316 (sign_extend:<VWIDE>
4317 (vec_duplicate:<VHALF>
4319 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4320 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4325 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4326 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4328 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4331 (define_expand "aarch64_sqdmull2_lane<mode>"
4332 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4333 (match_operand:VQ_HSI 1 "register_operand" "w")
4334 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4335 (match_operand:SI 3 "immediate_operand" "i")]
4338 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4339 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4340 operands[2], operands[3],
4345 (define_expand "aarch64_sqdmull2_laneq<mode>"
4346 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4347 (match_operand:VQ_HSI 1 "register_operand" "w")
4348 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4349 (match_operand:SI 3 "immediate_operand" "i")]
4352 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4353 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4354 operands[2], operands[3],
4361 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4362 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4365 (sign_extend:<VWIDE>
4367 (match_operand:VQ_HSI 1 "register_operand" "w")
4368 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4369 (sign_extend:<VWIDE>
4370 (vec_duplicate:<VHALF>
4371 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4375 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4376 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4379 (define_expand "aarch64_sqdmull2_n<mode>"
4380 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4381 (match_operand:VQ_HSI 1 "register_operand" "w")
4382 (match_operand:<VEL> 2 "register_operand" "w")]
4385 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4386 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4393 (define_insn "aarch64_<sur>shl<mode>"
4394 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4396 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4397 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4400 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4401 [(set_attr "type" "neon_shift_reg<q>")]
4407 (define_insn "aarch64_<sur>q<r>shl<mode>"
4408 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4410 [(match_operand:VSDQ_I 1 "register_operand" "w")
4411 (match_operand:VSDQ_I 2 "register_operand" "w")]
4414 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4415 [(set_attr "type" "neon_sat_shift_reg<q>")]
4420 (define_insn "aarch64_<sur>shll_n<mode>"
4421 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4422 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4424 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4428 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4429 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4431 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4433 [(set_attr "type" "neon_shift_imm_long")]
4438 (define_insn "aarch64_<sur>shll2_n<mode>"
4439 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4440 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4441 (match_operand:SI 2 "immediate_operand" "i")]
4445 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4446 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4448 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4450 [(set_attr "type" "neon_shift_imm_long")]
4455 (define_insn "aarch64_<sur>shr_n<mode>"
4456 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4457 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4459 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4462 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4463 [(set_attr "type" "neon_sat_shift_imm<q>")]
4468 (define_insn "aarch64_<sur>sra_n<mode>"
4469 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4470 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4471 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4473 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4476 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4477 [(set_attr "type" "neon_shift_acc<q>")]
4482 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4483 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4484 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4485 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4487 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4490 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4491 [(set_attr "type" "neon_shift_imm<q>")]
4496 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4497 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4498 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4500 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4503 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4504 [(set_attr "type" "neon_sat_shift_imm<q>")]
4510 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4511 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4512 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4514 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4517 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4518 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4522 ;; cm(eq|ge|gt|lt|le)
4523 ;; Note, we have constraints for Dz and Z as different expanders
4524 ;; have different ideas of what should be passed to this pattern.
4526 (define_insn "aarch64_cm<optab><mode>"
4527 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4529 (COMPARISONS:<V_INT_EQUIV>
4530 (match_operand:VDQ_I 1 "register_operand" "w,w")
4531 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4535 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4536 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4537 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4540 (define_insn_and_split "aarch64_cm<optab>di"
4541 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4544 (match_operand:DI 1 "register_operand" "w,w,r")
4545 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4547 (clobber (reg:CC CC_REGNUM))]
4550 "&& reload_completed"
4551 [(set (match_operand:DI 0 "register_operand")
4554 (match_operand:DI 1 "register_operand")
4555 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4558 /* If we are in the general purpose register file,
4559 we split to a sequence of comparison and store. */
4560 if (GP_REGNUM_P (REGNO (operands[0]))
4561 && GP_REGNUM_P (REGNO (operands[1])))
4563 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4564 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4565 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4566 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4569 /* Otherwise, we expand to a similar pattern which does not
4570 clobber CC_REGNUM. */
4572 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4575 (define_insn "*aarch64_cm<optab>di"
4576 [(set (match_operand:DI 0 "register_operand" "=w,w")
4579 (match_operand:DI 1 "register_operand" "w,w")
4580 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4582 "TARGET_SIMD && reload_completed"
4584 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4585 cm<optab>\t%d0, %d1, #0"
4586 [(set_attr "type" "neon_compare, neon_compare_zero")]
4591 (define_insn "aarch64_cm<optab><mode>"
4592 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4594 (UCOMPARISONS:<V_INT_EQUIV>
4595 (match_operand:VDQ_I 1 "register_operand" "w")
4596 (match_operand:VDQ_I 2 "register_operand" "w")
4599 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4600 [(set_attr "type" "neon_compare<q>")]
4603 (define_insn_and_split "aarch64_cm<optab>di"
4604 [(set (match_operand:DI 0 "register_operand" "=w,r")
4607 (match_operand:DI 1 "register_operand" "w,r")
4608 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4610 (clobber (reg:CC CC_REGNUM))]
4613 "&& reload_completed"
4614 [(set (match_operand:DI 0 "register_operand")
4617 (match_operand:DI 1 "register_operand")
4618 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4621 /* If we are in the general purpose register file,
4622 we split to a sequence of comparison and store. */
4623 if (GP_REGNUM_P (REGNO (operands[0]))
4624 && GP_REGNUM_P (REGNO (operands[1])))
4626 machine_mode mode = CCmode;
4627 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4628 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4629 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4632 /* Otherwise, we expand to a similar pattern which does not
4633 clobber CC_REGNUM. */
4635 [(set_attr "type" "neon_compare,multiple")]
4638 (define_insn "*aarch64_cm<optab>di"
4639 [(set (match_operand:DI 0 "register_operand" "=w")
4642 (match_operand:DI 1 "register_operand" "w")
4643 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4645 "TARGET_SIMD && reload_completed"
4646 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4647 [(set_attr "type" "neon_compare")]
4652 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4653 ;; we don't have any insns using ne, and aarch64_vcond outputs
4654 ;; not (neg (eq (and x y) 0))
4655 ;; which is rewritten by simplify_rtx as
4656 ;; plus (eq (and x y) 0) -1.
4658 (define_insn "aarch64_cmtst<mode>"
4659 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4663 (match_operand:VDQ_I 1 "register_operand" "w")
4664 (match_operand:VDQ_I 2 "register_operand" "w"))
4665 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4666 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4669 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4670 [(set_attr "type" "neon_tst<q>")]
4673 (define_insn_and_split "aarch64_cmtstdi"
4674 [(set (match_operand:DI 0 "register_operand" "=w,r")
4678 (match_operand:DI 1 "register_operand" "w,r")
4679 (match_operand:DI 2 "register_operand" "w,r"))
4681 (clobber (reg:CC CC_REGNUM))]
4684 "&& reload_completed"
4685 [(set (match_operand:DI 0 "register_operand")
4689 (match_operand:DI 1 "register_operand")
4690 (match_operand:DI 2 "register_operand"))
4693 /* If we are in the general purpose register file,
4694 we split to a sequence of comparison and store. */
4695 if (GP_REGNUM_P (REGNO (operands[0]))
4696 && GP_REGNUM_P (REGNO (operands[1])))
4698 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4699 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4700 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4701 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4702 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4705 /* Otherwise, we expand to a similar pattern which does not
4706 clobber CC_REGNUM. */
4708 [(set_attr "type" "neon_tst,multiple")]
4711 (define_insn "*aarch64_cmtstdi"
4712 [(set (match_operand:DI 0 "register_operand" "=w")
4716 (match_operand:DI 1 "register_operand" "w")
4717 (match_operand:DI 2 "register_operand" "w"))
4720 "cmtst\t%d0, %d1, %d2"
4721 [(set_attr "type" "neon_tst")]
4724 ;; fcm(eq|ge|gt|le|lt)
4726 (define_insn "aarch64_cm<optab><mode>"
4727 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4729 (COMPARISONS:<V_INT_EQUIV>
4730 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4731 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4735 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4736 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4737 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4741 ;; Note we can also handle what would be fac(le|lt) by
4742 ;; generating fac(ge|gt).
4744 (define_insn "aarch64_fac<optab><mode>"
4745 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4747 (FAC_COMPARISONS:<V_INT_EQUIV>
4749 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4751 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4754 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4755 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4760 (define_insn "aarch64_addp<mode>"
4761 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4763 [(match_operand:VD_BHSI 1 "register_operand" "w")
4764 (match_operand:VD_BHSI 2 "register_operand" "w")]
4767 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4768 [(set_attr "type" "neon_reduc_add<q>")]
4771 (define_insn "aarch64_addpdi"
4772 [(set (match_operand:DI 0 "register_operand" "=w")
4774 [(match_operand:V2DI 1 "register_operand" "w")]
4778 [(set_attr "type" "neon_reduc_add")]
4783 (define_expand "sqrt<mode>2"
4784 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4785 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4788 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4792 (define_insn "*sqrt<mode>2"
4793 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4794 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4796 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4797 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4800 ;; Patterns for vector struct loads and stores.
4802 (define_insn "aarch64_simd_ld2<mode>"
4803 [(set (match_operand:OI 0 "register_operand" "=w")
4804 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4805 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4808 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4809 [(set_attr "type" "neon_load2_2reg<q>")]
4812 (define_insn "aarch64_simd_ld2r<mode>"
4813 [(set (match_operand:OI 0 "register_operand" "=w")
4814 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4815 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4818 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4819 [(set_attr "type" "neon_load2_all_lanes<q>")]
4822 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4823 [(set (match_operand:OI 0 "register_operand" "=w")
4824 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4825 (match_operand:OI 2 "register_operand" "0")
4826 (match_operand:SI 3 "immediate_operand" "i")
4827 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4831 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4832 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4834 [(set_attr "type" "neon_load2_one_lane")]
4837 (define_expand "vec_load_lanesoi<mode>"
4838 [(set (match_operand:OI 0 "register_operand" "=w")
4839 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4840 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4844 if (BYTES_BIG_ENDIAN)
4846 rtx tmp = gen_reg_rtx (OImode);
4847 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4848 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4849 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4852 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4856 (define_insn "aarch64_simd_st2<mode>"
4857 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4858 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4859 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4862 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4863 [(set_attr "type" "neon_store2_2reg<q>")]
4866 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4867 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4868 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4869 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4870 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4871 (match_operand:SI 2 "immediate_operand" "i")]
4875 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4876 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4878 [(set_attr "type" "neon_store2_one_lane<q>")]
4881 (define_expand "vec_store_lanesoi<mode>"
4882 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4883 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4884 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4888 if (BYTES_BIG_ENDIAN)
4890 rtx tmp = gen_reg_rtx (OImode);
4891 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4892 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4893 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4896 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4900 (define_insn "aarch64_simd_ld3<mode>"
4901 [(set (match_operand:CI 0 "register_operand" "=w")
4902 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4903 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4906 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4907 [(set_attr "type" "neon_load3_3reg<q>")]
4910 (define_insn "aarch64_simd_ld3r<mode>"
4911 [(set (match_operand:CI 0 "register_operand" "=w")
4912 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4913 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4916 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4917 [(set_attr "type" "neon_load3_all_lanes<q>")]
4920 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4921 [(set (match_operand:CI 0 "register_operand" "=w")
4922 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4923 (match_operand:CI 2 "register_operand" "0")
4924 (match_operand:SI 3 "immediate_operand" "i")
4925 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4929 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4930 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4932 [(set_attr "type" "neon_load3_one_lane")]
4935 (define_expand "vec_load_lanesci<mode>"
4936 [(set (match_operand:CI 0 "register_operand" "=w")
4937 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4938 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4942 if (BYTES_BIG_ENDIAN)
4944 rtx tmp = gen_reg_rtx (CImode);
4945 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4946 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4947 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4950 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4954 (define_insn "aarch64_simd_st3<mode>"
4955 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4956 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4957 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4960 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4961 [(set_attr "type" "neon_store3_3reg<q>")]
4964 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4965 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4966 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4967 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4968 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4969 (match_operand:SI 2 "immediate_operand" "i")]
4973 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4974 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4976 [(set_attr "type" "neon_store3_one_lane<q>")]
4979 (define_expand "vec_store_lanesci<mode>"
4980 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4981 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4982 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4986 if (BYTES_BIG_ENDIAN)
4988 rtx tmp = gen_reg_rtx (CImode);
4989 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4990 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4991 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4994 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4998 (define_insn "aarch64_simd_ld4<mode>"
4999 [(set (match_operand:XI 0 "register_operand" "=w")
5000 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5001 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5004 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5005 [(set_attr "type" "neon_load4_4reg<q>")]
5008 (define_insn "aarch64_simd_ld4r<mode>"
5009 [(set (match_operand:XI 0 "register_operand" "=w")
5010 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5011 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5014 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5015 [(set_attr "type" "neon_load4_all_lanes<q>")]
5018 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
5019 [(set (match_operand:XI 0 "register_operand" "=w")
5020 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5021 (match_operand:XI 2 "register_operand" "0")
5022 (match_operand:SI 3 "immediate_operand" "i")
5023 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5027 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5028 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
5030 [(set_attr "type" "neon_load4_one_lane")]
5033 (define_expand "vec_load_lanesxi<mode>"
5034 [(set (match_operand:XI 0 "register_operand" "=w")
5035 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5036 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5040 if (BYTES_BIG_ENDIAN)
5042 rtx tmp = gen_reg_rtx (XImode);
5043 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5044 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
5045 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
5048 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
5052 (define_insn "aarch64_simd_st4<mode>"
5053 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5054 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5055 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5058 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5059 [(set_attr "type" "neon_store4_4reg<q>")]
5062 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5063 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
5064 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5065 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5066 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5067 (match_operand:SI 2 "immediate_operand" "i")]
5071 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5072 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5074 [(set_attr "type" "neon_store4_one_lane<q>")]
5077 (define_expand "vec_store_lanesxi<mode>"
5078 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5079 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5080 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5084 if (BYTES_BIG_ENDIAN)
5086 rtx tmp = gen_reg_rtx (XImode);
5087 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5088 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5089 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5092 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5096 (define_insn_and_split "aarch64_rev_reglist<mode>"
5097 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5099 [(match_operand:VSTRUCT 1 "register_operand" "w")
5100 (match_operand:V16QI 2 "register_operand" "w")]
5101 UNSPEC_REV_REGLIST))]
5104 "&& reload_completed"
5108 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5109 for (i = 0; i < nregs; i++)
5111 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5112 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5113 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5117 [(set_attr "type" "neon_tbl1_q")
5118 (set_attr "length" "<insn_count>")]
5121 ;; Reload patterns for AdvSIMD register list operands.
5123 (define_expand "mov<mode>"
5124 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
5125 (match_operand:VSTRUCT 1 "general_operand" ""))]
5128 if (can_create_pseudo_p ())
5130 if (GET_CODE (operands[0]) != REG)
5131 operands[1] = force_reg (<MODE>mode, operands[1]);
5136 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5137 [(match_operand:CI 0 "register_operand" "=w")
5138 (match_operand:DI 1 "register_operand" "r")
5139 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5142 rtx mem = gen_rtx_MEM (CImode, operands[1]);
5143 emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5147 (define_insn "aarch64_ld1_x3_<mode>"
5148 [(set (match_operand:CI 0 "register_operand" "=w")
5150 [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5151 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5153 "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5154 [(set_attr "type" "neon_load1_3reg<q>")]
5157 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5158 [(match_operand:DI 0 "register_operand" "")
5159 (match_operand:OI 1 "register_operand" "")
5160 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5163 rtx mem = gen_rtx_MEM (OImode, operands[0]);
5164 emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5168 (define_insn "aarch64_st1_x2_<mode>"
5169 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5171 [(match_operand:OI 1 "register_operand" "w")
5172 (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5174 "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5175 [(set_attr "type" "neon_store1_2reg<q>")]
5178 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5179 [(match_operand:DI 0 "register_operand" "")
5180 (match_operand:CI 1 "register_operand" "")
5181 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5184 rtx mem = gen_rtx_MEM (CImode, operands[0]);
5185 emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5189 (define_insn "aarch64_st1_x3_<mode>"
5190 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5192 [(match_operand:CI 1 "register_operand" "w")
5193 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5195 "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5196 [(set_attr "type" "neon_store1_3reg<q>")]
5199 (define_insn "*aarch64_mov<mode>"
5200 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5201 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5202 "TARGET_SIMD && !BYTES_BIG_ENDIAN
5203 && (register_operand (operands[0], <MODE>mode)
5204 || register_operand (operands[1], <MODE>mode))"
5207 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5208 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5209 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5210 neon_load<nregs>_<nregs>reg_q")
5211 (set_attr "length" "<insn_count>,4,4")]
5214 (define_insn "aarch64_be_ld1<mode>"
5215 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
5216 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5217 "aarch64_simd_struct_operand" "Utv")]
5220 "ld1\\t{%0<Vmtype>}, %1"
5221 [(set_attr "type" "neon_load1_1reg<q>")]
5224 (define_insn "aarch64_be_st1<mode>"
5225 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5226 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5229 "st1\\t{%1<Vmtype>}, %0"
5230 [(set_attr "type" "neon_store1_1reg<q>")]
5233 (define_insn "*aarch64_be_movoi"
5234 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5235 (match_operand:OI 1 "general_operand" " w,w,m"))]
5236 "TARGET_SIMD && BYTES_BIG_ENDIAN
5237 && (register_operand (operands[0], OImode)
5238 || register_operand (operands[1], OImode))"
5243 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5244 (set_attr "length" "8,4,4")]
5247 (define_insn "*aarch64_be_movci"
5248 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5249 (match_operand:CI 1 "general_operand" " w,w,o"))]
5250 "TARGET_SIMD && BYTES_BIG_ENDIAN
5251 && (register_operand (operands[0], CImode)
5252 || register_operand (operands[1], CImode))"
5254 [(set_attr "type" "multiple")
5255 (set_attr "length" "12,4,4")]
5258 (define_insn "*aarch64_be_movxi"
5259 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5260 (match_operand:XI 1 "general_operand" " w,w,o"))]
5261 "TARGET_SIMD && BYTES_BIG_ENDIAN
5262 && (register_operand (operands[0], XImode)
5263 || register_operand (operands[1], XImode))"
5265 [(set_attr "type" "multiple")
5266 (set_attr "length" "16,4,4")]
5270 [(set (match_operand:OI 0 "register_operand")
5271 (match_operand:OI 1 "register_operand"))]
5272 "TARGET_SIMD && reload_completed"
5275 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5280 [(set (match_operand:CI 0 "nonimmediate_operand")
5281 (match_operand:CI 1 "general_operand"))]
5282 "TARGET_SIMD && reload_completed"
5285 if (register_operand (operands[0], CImode)
5286 && register_operand (operands[1], CImode))
5288 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5291 else if (BYTES_BIG_ENDIAN)
5293 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5294 simplify_gen_subreg (OImode, operands[1], CImode, 0));
5295 emit_move_insn (gen_lowpart (V16QImode,
5296 simplify_gen_subreg (TImode, operands[0],
5298 gen_lowpart (V16QImode,
5299 simplify_gen_subreg (TImode, operands[1],
5308 [(set (match_operand:XI 0 "nonimmediate_operand")
5309 (match_operand:XI 1 "general_operand"))]
5310 "TARGET_SIMD && reload_completed"
5313 if (register_operand (operands[0], XImode)
5314 && register_operand (operands[1], XImode))
5316 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5319 else if (BYTES_BIG_ENDIAN)
5321 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5322 simplify_gen_subreg (OImode, operands[1], XImode, 0));
5323 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5324 simplify_gen_subreg (OImode, operands[1], XImode, 32));
5331 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5332 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5333 (match_operand:DI 1 "register_operand" "w")
5334 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5337 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5338 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5341 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5346 (define_insn "aarch64_ld2<mode>_dreg"
5347 [(set (match_operand:OI 0 "register_operand" "=w")
5348 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5349 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5352 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5353 [(set_attr "type" "neon_load2_2reg<q>")]
5356 (define_insn "aarch64_ld2<mode>_dreg"
5357 [(set (match_operand:OI 0 "register_operand" "=w")
5358 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5359 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5362 "ld1\\t{%S0.1d - %T0.1d}, %1"
5363 [(set_attr "type" "neon_load1_2reg<q>")]
5366 (define_insn "aarch64_ld3<mode>_dreg"
5367 [(set (match_operand:CI 0 "register_operand" "=w")
5368 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5369 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5372 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5373 [(set_attr "type" "neon_load3_3reg<q>")]
5376 (define_insn "aarch64_ld3<mode>_dreg"
5377 [(set (match_operand:CI 0 "register_operand" "=w")
5378 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5379 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5382 "ld1\\t{%S0.1d - %U0.1d}, %1"
5383 [(set_attr "type" "neon_load1_3reg<q>")]
5386 (define_insn "aarch64_ld4<mode>_dreg"
5387 [(set (match_operand:XI 0 "register_operand" "=w")
5388 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5389 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5392 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5393 [(set_attr "type" "neon_load4_4reg<q>")]
5396 (define_insn "aarch64_ld4<mode>_dreg"
5397 [(set (match_operand:XI 0 "register_operand" "=w")
5398 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5399 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5402 "ld1\\t{%S0.1d - %V0.1d}, %1"
5403 [(set_attr "type" "neon_load1_4reg<q>")]
5406 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5407 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5408 (match_operand:DI 1 "register_operand" "r")
5409 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5412 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5413 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5415 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5419 (define_expand "aarch64_ld1<VALL_F16:mode>"
5420 [(match_operand:VALL_F16 0 "register_operand")
5421 (match_operand:DI 1 "register_operand")]
5424 machine_mode mode = <VALL_F16:MODE>mode;
5425 rtx mem = gen_rtx_MEM (mode, operands[1]);
5427 if (BYTES_BIG_ENDIAN)
5428 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5430 emit_move_insn (operands[0], mem);
5434 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5435 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5436 (match_operand:DI 1 "register_operand" "r")
5437 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5440 machine_mode mode = <VSTRUCT:MODE>mode;
5441 rtx mem = gen_rtx_MEM (mode, operands[1]);
5443 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5447 (define_expand "aarch64_ld1x2<VQ:mode>"
5448 [(match_operand:OI 0 "register_operand" "=w")
5449 (match_operand:DI 1 "register_operand" "r")
5450 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5453 machine_mode mode = OImode;
5454 rtx mem = gen_rtx_MEM (mode, operands[1]);
5456 emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5460 (define_expand "aarch64_ld1x2<VDC:mode>"
5461 [(match_operand:OI 0 "register_operand" "=w")
5462 (match_operand:DI 1 "register_operand" "r")
5463 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5466 machine_mode mode = OImode;
5467 rtx mem = gen_rtx_MEM (mode, operands[1]);
5469 emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5474 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5475 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5476 (match_operand:DI 1 "register_operand" "w")
5477 (match_operand:VSTRUCT 2 "register_operand" "0")
5478 (match_operand:SI 3 "immediate_operand" "i")
5479 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5482 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5483 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5486 aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5487 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5488 operands[0], mem, operands[2], operands[3]));
5492 ;; Expanders for builtins to extract vector registers from large
5493 ;; opaque integer modes.
5497 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5498 [(match_operand:VDC 0 "register_operand" "=w")
5499 (match_operand:VSTRUCT 1 "register_operand" "w")
5500 (match_operand:SI 2 "immediate_operand" "i")]
5503 int part = INTVAL (operands[2]);
5504 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5505 int offset = part * 16;
5507 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5508 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5514 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5515 [(match_operand:VQ 0 "register_operand" "=w")
5516 (match_operand:VSTRUCT 1 "register_operand" "w")
5517 (match_operand:SI 2 "immediate_operand" "i")]
5520 int part = INTVAL (operands[2]);
5521 int offset = part * 16;
5523 emit_move_insn (operands[0],
5524 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5528 ;; Permuted-store expanders for neon intrinsics.
5530 ;; Permute instructions
5534 (define_expand "vec_perm<mode>"
5535 [(match_operand:VB 0 "register_operand")
5536 (match_operand:VB 1 "register_operand")
5537 (match_operand:VB 2 "register_operand")
5538 (match_operand:VB 3 "register_operand")]
5541 aarch64_expand_vec_perm (operands[0], operands[1],
5542 operands[2], operands[3], <nunits>);
5546 (define_insn "aarch64_tbl1<mode>"
5547 [(set (match_operand:VB 0 "register_operand" "=w")
5548 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5549 (match_operand:VB 2 "register_operand" "w")]
5552 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5553 [(set_attr "type" "neon_tbl1<q>")]
5556 ;; Two source registers.
5558 (define_insn "aarch64_tbl2v16qi"
5559 [(set (match_operand:V16QI 0 "register_operand" "=w")
5560 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5561 (match_operand:V16QI 2 "register_operand" "w")]
5564 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5565 [(set_attr "type" "neon_tbl2_q")]
5568 (define_insn "aarch64_tbl3<mode>"
5569 [(set (match_operand:VB 0 "register_operand" "=w")
5570 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5571 (match_operand:VB 2 "register_operand" "w")]
5574 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5575 [(set_attr "type" "neon_tbl3")]
5578 (define_insn "aarch64_tbx4<mode>"
5579 [(set (match_operand:VB 0 "register_operand" "=w")
5580 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5581 (match_operand:OI 2 "register_operand" "w")
5582 (match_operand:VB 3 "register_operand" "w")]
5585 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5586 [(set_attr "type" "neon_tbl4")]
5589 ;; Three source registers.
5591 (define_insn "aarch64_qtbl3<mode>"
5592 [(set (match_operand:VB 0 "register_operand" "=w")
5593 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5594 (match_operand:VB 2 "register_operand" "w")]
5597 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5598 [(set_attr "type" "neon_tbl3")]
5601 (define_insn "aarch64_qtbx3<mode>"
5602 [(set (match_operand:VB 0 "register_operand" "=w")
5603 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5604 (match_operand:CI 2 "register_operand" "w")
5605 (match_operand:VB 3 "register_operand" "w")]
5608 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5609 [(set_attr "type" "neon_tbl3")]
5612 ;; Four source registers.
5614 (define_insn "aarch64_qtbl4<mode>"
5615 [(set (match_operand:VB 0 "register_operand" "=w")
5616 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5617 (match_operand:VB 2 "register_operand" "w")]
5620 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5621 [(set_attr "type" "neon_tbl4")]
5624 (define_insn "aarch64_qtbx4<mode>"
5625 [(set (match_operand:VB 0 "register_operand" "=w")
5626 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5627 (match_operand:XI 2 "register_operand" "w")
5628 (match_operand:VB 3 "register_operand" "w")]
5631 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5632 [(set_attr "type" "neon_tbl4")]
5635 (define_insn_and_split "aarch64_combinev16qi"
5636 [(set (match_operand:OI 0 "register_operand" "=w")
5637 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5638 (match_operand:V16QI 2 "register_operand" "w")]
5642 "&& reload_completed"
5645 aarch64_split_combinev16qi (operands);
5648 [(set_attr "type" "multiple")]
5651 ;; This instruction's pattern is generated directly by
5652 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5653 ;; need corresponding changes there.
5654 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5655 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5656 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5657 (match_operand:VALL_F16 2 "register_operand" "w")]
5660 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5661 [(set_attr "type" "neon_permute<q>")]
5664 ;; This instruction's pattern is generated directly by
5665 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5666 ;; need corresponding changes there. Note that the immediate (third)
5667 ;; operand is a lane index not a byte index.
5668 (define_insn "aarch64_ext<mode>"
5669 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5670 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5671 (match_operand:VALL_F16 2 "register_operand" "w")
5672 (match_operand:SI 3 "immediate_operand" "i")]
5676 operands[3] = GEN_INT (INTVAL (operands[3])
5677 * GET_MODE_UNIT_SIZE (<MODE>mode));
5678 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5680 [(set_attr "type" "neon_ext<q>")]
5683 ;; This instruction's pattern is generated directly by
5684 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5685 ;; need corresponding changes there.
5686 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5687 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5688 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5691 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5692 [(set_attr "type" "neon_rev<q>")]
5695 (define_insn "aarch64_st2<mode>_dreg"
5696 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5697 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5698 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5701 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5702 [(set_attr "type" "neon_store2_2reg")]
5705 (define_insn "aarch64_st2<mode>_dreg"
5706 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5707 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5708 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5711 "st1\\t{%S1.1d - %T1.1d}, %0"
5712 [(set_attr "type" "neon_store1_2reg")]
5715 (define_insn "aarch64_st3<mode>_dreg"
5716 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5717 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5718 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5721 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5722 [(set_attr "type" "neon_store3_3reg")]
5725 (define_insn "aarch64_st3<mode>_dreg"
5726 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5727 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5728 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5731 "st1\\t{%S1.1d - %U1.1d}, %0"
5732 [(set_attr "type" "neon_store1_3reg")]
5735 (define_insn "aarch64_st4<mode>_dreg"
5736 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5737 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5738 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5741 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5742 [(set_attr "type" "neon_store4_4reg")]
5745 (define_insn "aarch64_st4<mode>_dreg"
5746 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5747 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5748 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5751 "st1\\t{%S1.1d - %V1.1d}, %0"
5752 [(set_attr "type" "neon_store1_4reg")]
5755 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5756 [(match_operand:DI 0 "register_operand" "r")
5757 (match_operand:VSTRUCT 1 "register_operand" "w")
5758 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5761 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5762 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5764 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5768 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5769 [(match_operand:DI 0 "register_operand" "r")
5770 (match_operand:VSTRUCT 1 "register_operand" "w")
5771 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5774 machine_mode mode = <VSTRUCT:MODE>mode;
5775 rtx mem = gen_rtx_MEM (mode, operands[0]);
5777 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5781 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5782 [(match_operand:DI 0 "register_operand" "r")
5783 (match_operand:VSTRUCT 1 "register_operand" "w")
5784 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5785 (match_operand:SI 2 "immediate_operand")]
5788 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5789 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5792 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5793 mem, operands[1], operands[2]));
5797 (define_expand "aarch64_st1<VALL_F16:mode>"
5798 [(match_operand:DI 0 "register_operand")
5799 (match_operand:VALL_F16 1 "register_operand")]
5802 machine_mode mode = <VALL_F16:MODE>mode;
5803 rtx mem = gen_rtx_MEM (mode, operands[0]);
5805 if (BYTES_BIG_ENDIAN)
5806 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5808 emit_move_insn (mem, operands[1]);
5812 ;; Expander for builtins to insert vector registers into large
5813 ;; opaque integer modes.
5815 ;; Q-register list. We don't need a D-reg inserter as we zero
5816 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5818 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5819 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5820 (match_operand:VSTRUCT 1 "register_operand" "0")
5821 (match_operand:VQ 2 "register_operand" "w")
5822 (match_operand:SI 3 "immediate_operand" "i")]
5825 int part = INTVAL (operands[3]);
5826 int offset = part * 16;
5828 emit_move_insn (operands[0], operands[1]);
5829 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5834 ;; Standard pattern name vec_init<mode><Vel>.
5836 (define_expand "vec_init<mode><Vel>"
5837 [(match_operand:VALL_F16 0 "register_operand" "")
5838 (match_operand 1 "" "")]
5841 aarch64_expand_vector_init (operands[0], operands[1]);
5845 (define_insn "*aarch64_simd_ld1r<mode>"
5846 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5847 (vec_duplicate:VALL_F16
5848 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5850 "ld1r\\t{%0.<Vtype>}, %1"
5851 [(set_attr "type" "neon_load1_all_lanes")]
5854 (define_insn "aarch64_simd_ld1<mode>_x2"
5855 [(set (match_operand:OI 0 "register_operand" "=w")
5856 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5857 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5860 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5861 [(set_attr "type" "neon_load1_2reg<q>")]
5864 (define_insn "aarch64_simd_ld1<mode>_x2"
5865 [(set (match_operand:OI 0 "register_operand" "=w")
5866 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5867 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5870 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5871 [(set_attr "type" "neon_load1_2reg<q>")]
5875 (define_insn "@aarch64_frecpe<mode>"
5876 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5878 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
5881 "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
5882 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5885 (define_insn "aarch64_frecpx<mode>"
5886 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5887 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5890 "frecpx\t%<s>0, %<s>1"
5891 [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
5894 (define_insn "@aarch64_frecps<mode>"
5895 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5897 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5898 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5901 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5902 [(set_attr "type" "neon_fp_recps_<stype><q>")]
5905 (define_insn "aarch64_urecpe<mode>"
5906 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5907 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5910 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5911 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5913 ;; Standard pattern name vec_extract<mode><Vel>.
5915 (define_expand "vec_extract<mode><Vel>"
5916 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5917 (match_operand:VALL_F16 1 "register_operand" "")
5918 (match_operand:SI 2 "immediate_operand" "")]
5922 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5928 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5929 [(set (match_operand:V16QI 0 "register_operand" "=w")
5930 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%0")
5931 (match_operand:V16QI 2 "register_operand" "w")]
5933 "TARGET_SIMD && TARGET_AES"
5934 "aes<aes_op>\\t%0.16b, %2.16b"
5935 [(set_attr "type" "crypto_aese")]
5938 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
5939 [(set (match_operand:V16QI 0 "register_operand" "=w")
5940 (unspec:V16QI [(xor:V16QI
5941 (match_operand:V16QI 1 "register_operand" "%0")
5942 (match_operand:V16QI 2 "register_operand" "w"))
5943 (match_operand:V16QI 3 "aarch64_simd_imm_zero" "")]
5945 "TARGET_SIMD && TARGET_AES"
5946 "aes<aes_op>\\t%0.16b, %2.16b"
5947 [(set_attr "type" "crypto_aese")]
5950 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
5951 [(set (match_operand:V16QI 0 "register_operand" "=w")
5952 (unspec:V16QI [(match_operand:V16QI 3 "aarch64_simd_imm_zero" "")
5953 (xor:V16QI (match_operand:V16QI 1 "register_operand" "%0")
5954 (match_operand:V16QI 2 "register_operand" "w"))]
5956 "TARGET_SIMD && TARGET_AES"
5957 "aes<aes_op>\\t%0.16b, %2.16b"
5958 [(set_attr "type" "crypto_aese")]
5961 ;; When AES/AESMC fusion is enabled we want the register allocation to
5965 ;; So prefer to tie operand 1 to operand 0 when fusing.
5967 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5968 [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5969 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5971 "TARGET_SIMD && TARGET_AES"
5972 "aes<aesmc_op>\\t%0.16b, %1.16b"
5973 [(set_attr "type" "crypto_aesmc")
5974 (set_attr_alternative "enabled"
5975 [(if_then_else (match_test
5976 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5977 (const_string "yes" )
5978 (const_string "no"))
5979 (const_string "yes")])]
5982 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
5983 ;; and enforce the register dependency without scheduling or register
5984 ;; allocation messing up the order or introducing moves inbetween.
5985 ;; Mash the two together during combine.
5987 (define_insn "*aarch64_crypto_aese_fused"
5988 [(set (match_operand:V16QI 0 "register_operand" "=&w")
5991 [(match_operand:V16QI 1 "register_operand" "0")
5992 (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE)
5994 "TARGET_SIMD && TARGET_AES
5995 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
5996 "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
5997 [(set_attr "type" "crypto_aese")
5998 (set_attr "length" "8")]
6001 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
6002 ;; and enforce the register dependency without scheduling or register
6003 ;; allocation messing up the order or introducing moves inbetween.
6004 ;; Mash the two together during combine.
6006 (define_insn "*aarch64_crypto_aesd_fused"
6007 [(set (match_operand:V16QI 0 "register_operand" "=&w")
6010 [(match_operand:V16QI 1 "register_operand" "0")
6011 (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD)
6013 "TARGET_SIMD && TARGET_AES
6014 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6015 "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
6016 [(set_attr "type" "crypto_aese")
6017 (set_attr "length" "8")]
6022 (define_insn "aarch64_crypto_sha1hsi"
6023 [(set (match_operand:SI 0 "register_operand" "=w")
6024 (unspec:SI [(match_operand:SI 1
6025 "register_operand" "w")]
6027 "TARGET_SIMD && TARGET_SHA2"
6029 [(set_attr "type" "crypto_sha1_fast")]
6032 (define_insn "aarch64_crypto_sha1hv4si"
6033 [(set (match_operand:SI 0 "register_operand" "=w")
6034 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6035 (parallel [(const_int 0)]))]
6037 "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
6039 [(set_attr "type" "crypto_sha1_fast")]
6042 (define_insn "aarch64_be_crypto_sha1hv4si"
6043 [(set (match_operand:SI 0 "register_operand" "=w")
6044 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6045 (parallel [(const_int 3)]))]
6047 "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
6049 [(set_attr "type" "crypto_sha1_fast")]
6052 (define_insn "aarch64_crypto_sha1su1v4si"
6053 [(set (match_operand:V4SI 0 "register_operand" "=w")
6054 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6055 (match_operand:V4SI 2 "register_operand" "w")]
6057 "TARGET_SIMD && TARGET_SHA2"
6058 "sha1su1\\t%0.4s, %2.4s"
6059 [(set_attr "type" "crypto_sha1_fast")]
6062 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
6063 [(set (match_operand:V4SI 0 "register_operand" "=w")
6064 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6065 (match_operand:SI 2 "register_operand" "w")
6066 (match_operand:V4SI 3 "register_operand" "w")]
6068 "TARGET_SIMD && TARGET_SHA2"
6069 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
6070 [(set_attr "type" "crypto_sha1_slow")]
6073 (define_insn "aarch64_crypto_sha1su0v4si"
6074 [(set (match_operand:V4SI 0 "register_operand" "=w")
6075 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6076 (match_operand:V4SI 2 "register_operand" "w")
6077 (match_operand:V4SI 3 "register_operand" "w")]
6079 "TARGET_SIMD && TARGET_SHA2"
6080 "sha1su0\\t%0.4s, %2.4s, %3.4s"
6081 [(set_attr "type" "crypto_sha1_xor")]
6086 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6087 [(set (match_operand:V4SI 0 "register_operand" "=w")
6088 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6089 (match_operand:V4SI 2 "register_operand" "w")
6090 (match_operand:V4SI 3 "register_operand" "w")]
6092 "TARGET_SIMD && TARGET_SHA2"
6093 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
6094 [(set_attr "type" "crypto_sha256_slow")]
6097 (define_insn "aarch64_crypto_sha256su0v4si"
6098 [(set (match_operand:V4SI 0 "register_operand" "=w")
6099 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6100 (match_operand:V4SI 2 "register_operand" "w")]
6102 "TARGET_SIMD && TARGET_SHA2"
6103 "sha256su0\\t%0.4s, %2.4s"
6104 [(set_attr "type" "crypto_sha256_fast")]
6107 (define_insn "aarch64_crypto_sha256su1v4si"
6108 [(set (match_operand:V4SI 0 "register_operand" "=w")
6109 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6110 (match_operand:V4SI 2 "register_operand" "w")
6111 (match_operand:V4SI 3 "register_operand" "w")]
6113 "TARGET_SIMD && TARGET_SHA2"
6114 "sha256su1\\t%0.4s, %2.4s, %3.4s"
6115 [(set_attr "type" "crypto_sha256_slow")]
6120 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6121 [(set (match_operand:V2DI 0 "register_operand" "=w")
6122 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6123 (match_operand:V2DI 2 "register_operand" "w")
6124 (match_operand:V2DI 3 "register_operand" "w")]
6126 "TARGET_SIMD && TARGET_SHA3"
6127 "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6128 [(set_attr "type" "crypto_sha512")]
6131 (define_insn "aarch64_crypto_sha512su0qv2di"
6132 [(set (match_operand:V2DI 0 "register_operand" "=w")
6133 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6134 (match_operand:V2DI 2 "register_operand" "w")]
6136 "TARGET_SIMD && TARGET_SHA3"
6137 "sha512su0\\t%0.2d, %2.2d"
6138 [(set_attr "type" "crypto_sha512")]
6141 (define_insn "aarch64_crypto_sha512su1qv2di"
6142 [(set (match_operand:V2DI 0 "register_operand" "=w")
6143 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6144 (match_operand:V2DI 2 "register_operand" "w")
6145 (match_operand:V2DI 3 "register_operand" "w")]
6147 "TARGET_SIMD && TARGET_SHA3"
6148 "sha512su1\\t%0.2d, %2.2d, %3.2d"
6149 [(set_attr "type" "crypto_sha512")]
6154 (define_insn "eor3q<mode>4"
6155 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6158 (match_operand:VQ_I 2 "register_operand" "w")
6159 (match_operand:VQ_I 3 "register_operand" "w"))
6160 (match_operand:VQ_I 1 "register_operand" "w")))]
6161 "TARGET_SIMD && TARGET_SHA3"
6162 "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6163 [(set_attr "type" "crypto_sha3")]
6166 (define_insn "aarch64_rax1qv2di"
6167 [(set (match_operand:V2DI 0 "register_operand" "=w")
6170 (match_operand:V2DI 2 "register_operand" "w")
6172 (match_operand:V2DI 1 "register_operand" "w")))]
6173 "TARGET_SIMD && TARGET_SHA3"
6174 "rax1\\t%0.2d, %1.2d, %2.2d"
6175 [(set_attr "type" "crypto_sha3")]
6178 (define_insn "aarch64_xarqv2di"
6179 [(set (match_operand:V2DI 0 "register_operand" "=w")
6182 (match_operand:V2DI 1 "register_operand" "%w")
6183 (match_operand:V2DI 2 "register_operand" "w"))
6184 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6185 "TARGET_SIMD && TARGET_SHA3"
6186 "xar\\t%0.2d, %1.2d, %2.2d, %3"
6187 [(set_attr "type" "crypto_sha3")]
6190 (define_insn "bcaxq<mode>4"
6191 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6194 (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6195 (match_operand:VQ_I 2 "register_operand" "w"))
6196 (match_operand:VQ_I 1 "register_operand" "w")))]
6197 "TARGET_SIMD && TARGET_SHA3"
6198 "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6199 [(set_attr "type" "crypto_sha3")]
6204 (define_insn "aarch64_sm3ss1qv4si"
6205 [(set (match_operand:V4SI 0 "register_operand" "=w")
6206 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6207 (match_operand:V4SI 2 "register_operand" "w")
6208 (match_operand:V4SI 3 "register_operand" "w")]
6210 "TARGET_SIMD && TARGET_SM4"
6211 "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6212 [(set_attr "type" "crypto_sm3")]
6216 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6217 [(set (match_operand:V4SI 0 "register_operand" "=w")
6218 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6219 (match_operand:V4SI 2 "register_operand" "w")
6220 (match_operand:V4SI 3 "register_operand" "w")
6221 (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6223 "TARGET_SIMD && TARGET_SM4"
6224 "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6225 [(set_attr "type" "crypto_sm3")]
6228 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6229 [(set (match_operand:V4SI 0 "register_operand" "=w")
6230 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6231 (match_operand:V4SI 2 "register_operand" "w")
6232 (match_operand:V4SI 3 "register_operand" "w")]
6234 "TARGET_SIMD && TARGET_SM4"
6235 "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6236 [(set_attr "type" "crypto_sm3")]
6241 (define_insn "aarch64_sm4eqv4si"
6242 [(set (match_operand:V4SI 0 "register_operand" "=w")
6243 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6244 (match_operand:V4SI 2 "register_operand" "w")]
6246 "TARGET_SIMD && TARGET_SM4"
6247 "sm4e\\t%0.4s, %2.4s"
6248 [(set_attr "type" "crypto_sm4")]
6251 (define_insn "aarch64_sm4ekeyqv4si"
6252 [(set (match_operand:V4SI 0 "register_operand" "=w")
6253 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6254 (match_operand:V4SI 2 "register_operand" "w")]
6256 "TARGET_SIMD && TARGET_SM4"
6257 "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6258 [(set_attr "type" "crypto_sm4")]
6263 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6264 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6266 [(match_operand:VDQSF 1 "register_operand" "0")
6267 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6268 (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6272 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6273 <nunits> * 2, false);
6274 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6275 <nunits> * 2, false);
6277 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6286 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6287 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6289 [(match_operand:VDQSF 1 "register_operand" "0")
6290 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6291 (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6295 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6296 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6298 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6306 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6307 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6310 (vec_select:<VFMLA_SEL_W>
6311 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6312 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6314 (vec_select:<VFMLA_SEL_W>
6315 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6316 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6317 (match_operand:VDQSF 1 "register_operand" "0")))]
6319 "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6320 [(set_attr "type" "neon_fp_mul_s")]
6323 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6324 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6328 (vec_select:<VFMLA_SEL_W>
6329 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6330 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6332 (vec_select:<VFMLA_SEL_W>
6333 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6334 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6335 (match_operand:VDQSF 1 "register_operand" "0")))]
6337 "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6338 [(set_attr "type" "neon_fp_mul_s")]
6341 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6342 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6345 (vec_select:<VFMLA_SEL_W>
6346 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6347 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6349 (vec_select:<VFMLA_SEL_W>
6350 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6351 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6352 (match_operand:VDQSF 1 "register_operand" "0")))]
6354 "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6355 [(set_attr "type" "neon_fp_mul_s")]
6358 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6359 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6363 (vec_select:<VFMLA_SEL_W>
6364 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6365 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6367 (vec_select:<VFMLA_SEL_W>
6368 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6369 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6370 (match_operand:VDQSF 1 "register_operand" "0")))]
6372 "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6373 [(set_attr "type" "neon_fp_mul_s")]
6376 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6377 [(set (match_operand:V2SF 0 "register_operand" "")
6378 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6379 (match_operand:V4HF 2 "register_operand" "")
6380 (match_operand:V4HF 3 "register_operand" "")
6381 (match_operand:SI 4 "aarch64_imm2" "")]
6385 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6386 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6388 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6397 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6398 [(set (match_operand:V2SF 0 "register_operand" "")
6399 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6400 (match_operand:V4HF 2 "register_operand" "")
6401 (match_operand:V4HF 3 "register_operand" "")
6402 (match_operand:SI 4 "aarch64_imm2" "")]
6406 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6407 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6409 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6417 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6418 [(set (match_operand:V2SF 0 "register_operand" "=w")
6422 (match_operand:V4HF 2 "register_operand" "w")
6423 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6427 (match_operand:V4HF 3 "register_operand" "x")
6428 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6429 (match_operand:V2SF 1 "register_operand" "0")))]
6431 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6432 [(set_attr "type" "neon_fp_mul_s")]
6435 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6436 [(set (match_operand:V2SF 0 "register_operand" "=w")
6441 (match_operand:V4HF 2 "register_operand" "w")
6442 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6446 (match_operand:V4HF 3 "register_operand" "x")
6447 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6448 (match_operand:V2SF 1 "register_operand" "0")))]
6450 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6451 [(set_attr "type" "neon_fp_mul_s")]
6454 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6455 [(set (match_operand:V2SF 0 "register_operand" "=w")
6459 (match_operand:V4HF 2 "register_operand" "w")
6460 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6464 (match_operand:V4HF 3 "register_operand" "x")
6465 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6466 (match_operand:V2SF 1 "register_operand" "0")))]
6468 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6469 [(set_attr "type" "neon_fp_mul_s")]
6472 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6473 [(set (match_operand:V2SF 0 "register_operand" "=w")
6478 (match_operand:V4HF 2 "register_operand" "w")
6479 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6483 (match_operand:V4HF 3 "register_operand" "x")
6484 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6485 (match_operand:V2SF 1 "register_operand" "0")))]
6487 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6488 [(set_attr "type" "neon_fp_mul_s")]
6491 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6492 [(set (match_operand:V4SF 0 "register_operand" "")
6493 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6494 (match_operand:V8HF 2 "register_operand" "")
6495 (match_operand:V8HF 3 "register_operand" "")
6496 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6500 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6501 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6503 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6511 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6512 [(set (match_operand:V4SF 0 "register_operand" "")
6513 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6514 (match_operand:V8HF 2 "register_operand" "")
6515 (match_operand:V8HF 3 "register_operand" "")
6516 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6520 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6521 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6523 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6531 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6532 [(set (match_operand:V4SF 0 "register_operand" "=w")
6536 (match_operand:V8HF 2 "register_operand" "w")
6537 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6541 (match_operand:V8HF 3 "register_operand" "x")
6542 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6543 (match_operand:V4SF 1 "register_operand" "0")))]
6545 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6546 [(set_attr "type" "neon_fp_mul_s")]
6549 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6550 [(set (match_operand:V4SF 0 "register_operand" "=w")
6555 (match_operand:V8HF 2 "register_operand" "w")
6556 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6560 (match_operand:V8HF 3 "register_operand" "x")
6561 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6562 (match_operand:V4SF 1 "register_operand" "0")))]
6564 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6565 [(set_attr "type" "neon_fp_mul_s")]
6568 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6569 [(set (match_operand:V4SF 0 "register_operand" "=w")
6573 (match_operand:V8HF 2 "register_operand" "w")
6574 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6578 (match_operand:V8HF 3 "register_operand" "x")
6579 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6580 (match_operand:V4SF 1 "register_operand" "0")))]
6582 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6583 [(set_attr "type" "neon_fp_mul_s")]
6586 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6587 [(set (match_operand:V4SF 0 "register_operand" "=w")
6592 (match_operand:V8HF 2 "register_operand" "w")
6593 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6597 (match_operand:V8HF 3 "register_operand" "x")
6598 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6599 (match_operand:V4SF 1 "register_operand" "0")))]
6601 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6602 [(set_attr "type" "neon_fp_mul_s")]
6605 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6606 [(set (match_operand:V2SF 0 "register_operand" "")
6607 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6608 (match_operand:V4HF 2 "register_operand" "")
6609 (match_operand:V8HF 3 "register_operand" "")
6610 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6614 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6615 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6617 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6626 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6627 [(set (match_operand:V2SF 0 "register_operand" "")
6628 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6629 (match_operand:V4HF 2 "register_operand" "")
6630 (match_operand:V8HF 3 "register_operand" "")
6631 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6635 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6636 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6638 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6647 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6648 [(set (match_operand:V2SF 0 "register_operand" "=w")
6652 (match_operand:V4HF 2 "register_operand" "w")
6653 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6657 (match_operand:V8HF 3 "register_operand" "x")
6658 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6659 (match_operand:V2SF 1 "register_operand" "0")))]
6661 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6662 [(set_attr "type" "neon_fp_mul_s")]
6665 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6666 [(set (match_operand:V2SF 0 "register_operand" "=w")
6671 (match_operand:V4HF 2 "register_operand" "w")
6672 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6676 (match_operand:V8HF 3 "register_operand" "x")
6677 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6678 (match_operand:V2SF 1 "register_operand" "0")))]
6680 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6681 [(set_attr "type" "neon_fp_mul_s")]
6684 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6685 [(set (match_operand:V2SF 0 "register_operand" "=w")
6689 (match_operand:V4HF 2 "register_operand" "w")
6690 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6694 (match_operand:V8HF 3 "register_operand" "x")
6695 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6696 (match_operand:V2SF 1 "register_operand" "0")))]
6698 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6699 [(set_attr "type" "neon_fp_mul_s")]
6702 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6703 [(set (match_operand:V2SF 0 "register_operand" "=w")
6708 (match_operand:V4HF 2 "register_operand" "w")
6709 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6713 (match_operand:V8HF 3 "register_operand" "x")
6714 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6715 (match_operand:V2SF 1 "register_operand" "0")))]
6717 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6718 [(set_attr "type" "neon_fp_mul_s")]
6721 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6722 [(set (match_operand:V4SF 0 "register_operand" "")
6723 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6724 (match_operand:V8HF 2 "register_operand" "")
6725 (match_operand:V4HF 3 "register_operand" "")
6726 (match_operand:SI 4 "aarch64_imm2" "")]
6730 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6731 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6733 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6741 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6742 [(set (match_operand:V4SF 0 "register_operand" "")
6743 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6744 (match_operand:V8HF 2 "register_operand" "")
6745 (match_operand:V4HF 3 "register_operand" "")
6746 (match_operand:SI 4 "aarch64_imm2" "")]
6750 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6751 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6753 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6761 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6762 [(set (match_operand:V4SF 0 "register_operand" "=w")
6766 (match_operand:V8HF 2 "register_operand" "w")
6767 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6771 (match_operand:V4HF 3 "register_operand" "x")
6772 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6773 (match_operand:V4SF 1 "register_operand" "0")))]
6775 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6776 [(set_attr "type" "neon_fp_mul_s")]
6779 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6780 [(set (match_operand:V4SF 0 "register_operand" "=w")
6785 (match_operand:V8HF 2 "register_operand" "w")
6786 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6790 (match_operand:V4HF 3 "register_operand" "x")
6791 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6792 (match_operand:V4SF 1 "register_operand" "0")))]
6794 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6795 [(set_attr "type" "neon_fp_mul_s")]
6798 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6799 [(set (match_operand:V4SF 0 "register_operand" "=w")
6803 (match_operand:V8HF 2 "register_operand" "w")
6804 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6808 (match_operand:V4HF 3 "register_operand" "x")
6809 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6810 (match_operand:V4SF 1 "register_operand" "0")))]
6812 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6813 [(set_attr "type" "neon_fp_mul_s")]
6816 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6817 [(set (match_operand:V4SF 0 "register_operand" "=w")
6822 (match_operand:V8HF 2 "register_operand" "w")
6823 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6827 (match_operand:V4HF 3 "register_operand" "x")
6828 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6829 (match_operand:V4SF 1 "register_operand" "0")))]
6831 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6832 [(set_attr "type" "neon_fp_mul_s")]
6837 (define_insn "aarch64_crypto_pmulldi"
6838 [(set (match_operand:TI 0 "register_operand" "=w")
6839 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
6840 (match_operand:DI 2 "register_operand" "w")]
6842 "TARGET_SIMD && TARGET_AES"
6843 "pmull\\t%0.1q, %1.1d, %2.1d"
6844 [(set_attr "type" "crypto_pmull")]
6847 (define_insn "aarch64_crypto_pmullv2di"
6848 [(set (match_operand:TI 0 "register_operand" "=w")
6849 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
6850 (match_operand:V2DI 2 "register_operand" "w")]
6852 "TARGET_SIMD && TARGET_AES"
6853 "pmull2\\t%0.1q, %1.2d, %2.2d"
6854 [(set_attr "type" "crypto_pmull")]