1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2018 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16 1 "general_operand" ""))]
26 /* Force the operand into a register if it is not an
27 immediate whose use can be replaced with xzr.
28 If the mode is 16 bytes wide, then we will be doing
29 a stp in DI mode, so we check the validity of that.
30 If the mode is 8 bytes wide, then we will do doing a
31 normal str, so the check need not apply. */
32 if (GET_CODE (operands[0]) == MEM
33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34 && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
35 && aarch64_mem_pair_operand (operands[0], DImode))
36 || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
37 operands[1] = force_reg (<MODE>mode, operands[1]);
41 (define_expand "movmisalign<mode>"
42 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
43 (match_operand:VALL 1 "general_operand" ""))]
46 /* This pattern is not permitted to fail during expansion: if both arguments
47 are non-registers (e.g. memory := constant, which can be created by the
48 auto-vectorizer), force operand 1 into a register. */
49 if (!register_operand (operands[0], <MODE>mode)
50 && !register_operand (operands[1], <MODE>mode))
51 operands[1] = force_reg (<MODE>mode, operands[1]);
54 (define_insn "aarch64_simd_dup<mode>"
55 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
57 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
60 dup\\t%0.<Vtype>, %1.<Vetype>[0]
61 dup\\t%0.<Vtype>, %<vw>1"
62 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
65 (define_insn "aarch64_simd_dup<mode>"
66 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
67 (vec_duplicate:VDQF_F16
68 (match_operand:<VEL> 1 "register_operand" "w")))]
70 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
71 [(set_attr "type" "neon_dup<q>")]
74 (define_insn "aarch64_dup_lane<mode>"
75 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
76 (vec_duplicate:VALL_F16
78 (match_operand:VALL_F16 1 "register_operand" "w")
79 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
83 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
84 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
86 [(set_attr "type" "neon_dup<q>")]
89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
90 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
91 (vec_duplicate:VALL_F16_NO_V2Q
93 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
98 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
99 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
101 [(set_attr "type" "neon_dup<q>")]
104 (define_insn "*aarch64_simd_mov<VD:mode>"
105 [(set (match_operand:VD 0 "nonimmediate_operand"
106 "=w, m, m, w, ?r, ?w, ?r, w")
107 (match_operand:VD 1 "general_operand"
108 "m, Dz, w, w, w, r, r, Dn"))]
110 && (register_operand (operands[0], <MODE>mode)
111 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
113 switch (which_alternative)
115 case 0: return "ldr\t%d0, %1";
116 case 1: return "str\txzr, %0";
117 case 2: return "str\t%d1, %0";
118 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
119 case 4: return "umov\t%0, %1.d[0]";
120 case 5: return "fmov\t%d0, %1";
121 case 6: return "mov\t%0, %1";
123 return aarch64_output_simd_mov_immediate (operands[1], 64);
124 default: gcc_unreachable ();
127 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
128 neon_logic<q>, neon_to_gp<q>, f_mcr,\
129 mov_reg, neon_move<q>")]
132 (define_insn "*aarch64_simd_mov<VQ:mode>"
133 [(set (match_operand:VQ 0 "nonimmediate_operand"
134 "=w, Umq, m, w, ?r, ?w, ?r, w")
135 (match_operand:VQ 1 "general_operand"
136 "m, Dz, w, w, w, r, r, Dn"))]
138 && (register_operand (operands[0], <MODE>mode)
139 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
141 switch (which_alternative)
144 return "ldr\t%q0, %1";
146 return "stp\txzr, xzr, %0";
148 return "str\t%q1, %0";
150 return "mov\t%0.<Vbtype>, %1.<Vbtype>";
156 return aarch64_output_simd_mov_immediate (operands[1], 128);
161 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
162 neon_logic<q>, multiple, multiple,\
163 multiple, neon_move<q>")
164 (set_attr "length" "4,4,4,4,8,8,8,4")]
167 ;; When storing lane zero we can use the normal STR and its more permissive
170 (define_insn "aarch64_store_lane0<mode>"
171 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
172 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
173 (parallel [(match_operand 2 "const_int_operand" "n")])))]
175 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
176 "str\\t%<Vetype>1, %0"
177 [(set_attr "type" "neon_store1_1reg<q>")]
180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
181 [(set (match_operand:DREG 0 "register_operand" "=w")
182 (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
183 (set (match_operand:DREG2 2 "register_operand" "=w")
184 (match_operand:DREG2 3 "memory_operand" "m"))]
186 && rtx_equal_p (XEXP (operands[3], 0),
187 plus_constant (Pmode,
188 XEXP (operands[1], 0),
189 GET_MODE_SIZE (<DREG:MODE>mode)))"
191 [(set_attr "type" "neon_ldp")]
194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
195 [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
196 (match_operand:DREG 1 "register_operand" "w"))
197 (set (match_operand:DREG2 2 "memory_operand" "=m")
198 (match_operand:DREG2 3 "register_operand" "w"))]
200 && rtx_equal_p (XEXP (operands[2], 0),
201 plus_constant (Pmode,
202 XEXP (operands[0], 0),
203 GET_MODE_SIZE (<DREG:MODE>mode)))"
205 [(set_attr "type" "neon_stp")]
209 [(set (match_operand:VQ 0 "register_operand" "")
210 (match_operand:VQ 1 "register_operand" ""))]
211 "TARGET_SIMD && reload_completed
212 && GP_REGNUM_P (REGNO (operands[0]))
213 && GP_REGNUM_P (REGNO (operands[1]))"
216 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
221 [(set (match_operand:VQ 0 "register_operand" "")
222 (match_operand:VQ 1 "register_operand" ""))]
223 "TARGET_SIMD && reload_completed
224 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
225 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
228 aarch64_split_simd_move (operands[0], operands[1]);
232 (define_expand "aarch64_split_simd_mov<mode>"
233 [(set (match_operand:VQ 0)
234 (match_operand:VQ 1))]
237 rtx dst = operands[0];
238 rtx src = operands[1];
240 if (GP_REGNUM_P (REGNO (src)))
242 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
243 rtx src_high_part = gen_highpart (<VHALF>mode, src);
246 (gen_move_lo_quad_<mode> (dst, src_low_part));
248 (gen_move_hi_quad_<mode> (dst, src_high_part));
253 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
254 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
255 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
256 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
259 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
261 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
267 (define_insn "aarch64_simd_mov_from_<mode>low"
268 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
270 (match_operand:VQ 1 "register_operand" "w")
271 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
272 "TARGET_SIMD && reload_completed"
274 [(set_attr "type" "neon_to_gp<q>")
275 (set_attr "length" "4")
278 (define_insn "aarch64_simd_mov_from_<mode>high"
279 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
281 (match_operand:VQ 1 "register_operand" "w")
282 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
283 "TARGET_SIMD && reload_completed"
285 [(set_attr "type" "neon_to_gp<q>")
286 (set_attr "length" "4")
289 (define_insn "orn<mode>3"
290 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
291 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
292 (match_operand:VDQ_I 2 "register_operand" "w")))]
294 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
295 [(set_attr "type" "neon_logic<q>")]
298 (define_insn "bic<mode>3"
299 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
300 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
301 (match_operand:VDQ_I 2 "register_operand" "w")))]
303 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
304 [(set_attr "type" "neon_logic<q>")]
307 (define_insn "add<mode>3"
308 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
309 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
310 (match_operand:VDQ_I 2 "register_operand" "w")))]
312 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
313 [(set_attr "type" "neon_add<q>")]
316 (define_insn "sub<mode>3"
317 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
318 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
319 (match_operand:VDQ_I 2 "register_operand" "w")))]
321 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
322 [(set_attr "type" "neon_sub<q>")]
325 (define_insn "mul<mode>3"
326 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
327 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
328 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
330 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
331 [(set_attr "type" "neon_mul_<Vetype><q>")]
334 (define_insn "bswap<mode>2"
335 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
336 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
338 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
339 [(set_attr "type" "neon_rev<q>")]
342 (define_insn "aarch64_rbit<mode>"
343 [(set (match_operand:VB 0 "register_operand" "=w")
344 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
347 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
348 [(set_attr "type" "neon_rbit")]
351 (define_expand "ctz<mode>2"
352 [(set (match_operand:VS 0 "register_operand")
353 (ctz:VS (match_operand:VS 1 "register_operand")))]
356 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
357 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
359 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
360 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
365 (define_expand "xorsign<mode>3"
366 [(match_operand:VHSDF 0 "register_operand")
367 (match_operand:VHSDF 1 "register_operand")
368 (match_operand:VHSDF 2 "register_operand")]
372 machine_mode imode = <V_INT_EQUIV>mode;
373 rtx v_bitmask = gen_reg_rtx (imode);
374 rtx op1x = gen_reg_rtx (imode);
375 rtx op2x = gen_reg_rtx (imode);
377 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
378 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
380 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
382 emit_move_insn (v_bitmask,
383 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
384 HOST_WIDE_INT_M1U << bits));
386 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
387 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
388 emit_move_insn (operands[0],
389 lowpart_subreg (<MODE>mode, op1x, imode));
394 ;; These instructions map to the __builtins for the Dot Product operations.
395 (define_insn "aarch64_<sur>dot<vsi2qi>"
396 [(set (match_operand:VS 0 "register_operand" "=w")
397 (plus:VS (match_operand:VS 1 "register_operand" "0")
398 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
399 (match_operand:<VSI2QI> 3 "register_operand" "w")]
402 "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
403 [(set_attr "type" "neon_dot")]
406 ;; These expands map to the Dot Product optab the vectorizer checks for.
407 ;; The auto-vectorizer expects a dot product builtin that also does an
408 ;; accumulation into the provided register.
409 ;; Given the following pattern
411 ;; for (i=0; i<len; i++) {
417 ;; This can be auto-vectorized to
418 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
420 ;; given enough iterations. However the vectorizer can keep unrolling the loop
421 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
422 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
425 ;; and so the vectorizer provides r, in which the result has to be accumulated.
426 (define_expand "<sur>dot_prod<vsi2qi>"
427 [(set (match_operand:VS 0 "register_operand")
428 (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
429 (match_operand:<VSI2QI> 2 "register_operand")]
431 (match_operand:VS 3 "register_operand")))]
435 gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
437 emit_insn (gen_rtx_SET (operands[0], operands[3]));
441 ;; These instructions map to the __builtins for the Dot Product
442 ;; indexed operations.
443 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
444 [(set (match_operand:VS 0 "register_operand" "=w")
445 (plus:VS (match_operand:VS 1 "register_operand" "0")
446 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
447 (match_operand:V8QI 3 "register_operand" "<h_con>")
448 (match_operand:SI 4 "immediate_operand" "i")]
452 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
453 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
455 [(set_attr "type" "neon_dot")]
458 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
459 [(set (match_operand:VS 0 "register_operand" "=w")
460 (plus:VS (match_operand:VS 1 "register_operand" "0")
461 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
462 (match_operand:V16QI 3 "register_operand" "<h_con>")
463 (match_operand:SI 4 "immediate_operand" "i")]
467 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
468 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
470 [(set_attr "type" "neon_dot")]
473 (define_expand "copysign<mode>3"
474 [(match_operand:VHSDF 0 "register_operand")
475 (match_operand:VHSDF 1 "register_operand")
476 (match_operand:VHSDF 2 "register_operand")]
477 "TARGET_FLOAT && TARGET_SIMD"
479 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
480 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
482 emit_move_insn (v_bitmask,
483 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
484 HOST_WIDE_INT_M1U << bits));
485 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
486 operands[2], operands[1]));
491 (define_insn "*aarch64_mul3_elt<mode>"
492 [(set (match_operand:VMUL 0 "register_operand" "=w")
496 (match_operand:VMUL 1 "register_operand" "<h_con>")
497 (parallel [(match_operand:SI 2 "immediate_operand")])))
498 (match_operand:VMUL 3 "register_operand" "w")))]
501 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
502 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
504 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
507 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
508 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
509 (mult:VMUL_CHANGE_NLANES
510 (vec_duplicate:VMUL_CHANGE_NLANES
512 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
513 (parallel [(match_operand:SI 2 "immediate_operand")])))
514 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
517 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
518 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
520 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
523 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
524 [(set (match_operand:VMUL 0 "register_operand" "=w")
527 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
528 (match_operand:VMUL 2 "register_operand" "w")))]
530 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
531 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
534 (define_insn "aarch64_rsqrte<mode>"
535 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
536 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
539 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
540 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
542 (define_insn "aarch64_rsqrts<mode>"
543 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
544 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
545 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
548 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
549 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
551 (define_expand "rsqrt<mode>2"
552 [(set (match_operand:VALLF 0 "register_operand" "=w")
553 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
557 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
561 (define_insn "*aarch64_mul3_elt_to_64v2df"
562 [(set (match_operand:DF 0 "register_operand" "=w")
565 (match_operand:V2DF 1 "register_operand" "w")
566 (parallel [(match_operand:SI 2 "immediate_operand")]))
567 (match_operand:DF 3 "register_operand" "w")))]
570 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
571 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
573 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
576 (define_insn "neg<mode>2"
577 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
578 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
580 "neg\t%0.<Vtype>, %1.<Vtype>"
581 [(set_attr "type" "neon_neg<q>")]
584 (define_insn "abs<mode>2"
585 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
586 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
588 "abs\t%0.<Vtype>, %1.<Vtype>"
589 [(set_attr "type" "neon_abs<q>")]
592 ;; The intrinsic version of integer ABS must not be allowed to
593 ;; combine with any operation with an integerated ABS step, such
595 (define_insn "aarch64_abs<mode>"
596 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
598 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
601 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
602 [(set_attr "type" "neon_abs<q>")]
605 (define_insn "abd<mode>_3"
606 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
607 (abs:VDQ_BHSI (minus:VDQ_BHSI
608 (match_operand:VDQ_BHSI 1 "register_operand" "w")
609 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
611 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
612 [(set_attr "type" "neon_abd<q>")]
615 (define_insn "aarch64_<sur>abdl2<mode>_3"
616 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
617 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
618 (match_operand:VDQV_S 2 "register_operand" "w")]
621 "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
622 [(set_attr "type" "neon_abd<q>")]
625 (define_insn "aarch64_<sur>abal<mode>_4"
626 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
627 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
628 (match_operand:VDQV_S 2 "register_operand" "w")
629 (match_operand:<VDBLW> 3 "register_operand" "0")]
632 "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
633 [(set_attr "type" "neon_arith_acc<q>")]
636 (define_insn "aarch64_<sur>adalp<mode>_3"
637 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
638 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
639 (match_operand:<VDBLW> 2 "register_operand" "0")]
642 "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
643 [(set_attr "type" "neon_reduc_add<q>")]
646 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
647 ;; inputs in operands 1 and 2. The sequence also has to perform a widening
648 ;; reduction of the difference into a V4SI vector and accumulate that into
649 ;; operand 3 before copying that into the result operand 0.
650 ;; Perform that with a sequence of:
651 ;; UABDL2 tmp.8h, op1.16b, op2.16b
652 ;; UABAL tmp.8h, op1.16b, op2.16b
653 ;; UADALP op3.4s, tmp.8h
654 ;; MOV op0, op3 // should be eliminated in later passes.
655 ;; The signed version just uses the signed variants of the above instructions.
657 (define_expand "<sur>sadv16qi"
658 [(use (match_operand:V4SI 0 "register_operand"))
659 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
660 (use (match_operand:V16QI 2 "register_operand"))] ABAL)
661 (use (match_operand:V4SI 3 "register_operand"))]
664 rtx reduc = gen_reg_rtx (V8HImode);
665 emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
667 emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
668 operands[2], reduc));
669 emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
671 emit_move_insn (operands[0], operands[3]);
676 (define_insn "aba<mode>_3"
677 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
678 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
679 (match_operand:VDQ_BHSI 1 "register_operand" "w")
680 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
681 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
683 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
684 [(set_attr "type" "neon_arith_acc<q>")]
687 (define_insn "fabd<mode>3"
688 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
691 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
692 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
694 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
695 [(set_attr "type" "neon_fp_abd_<stype><q>")]
698 ;; For AND (vector, register) and BIC (vector, immediate)
699 (define_insn "and<mode>3"
700 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
701 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
702 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
705 switch (which_alternative)
708 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
710 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
716 [(set_attr "type" "neon_logic<q>")]
719 ;; For ORR (vector, register) and ORR (vector, immediate)
720 (define_insn "ior<mode>3"
721 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
722 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
723 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
726 switch (which_alternative)
729 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
731 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
737 [(set_attr "type" "neon_logic<q>")]
740 (define_insn "xor<mode>3"
741 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
742 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
743 (match_operand:VDQ_I 2 "register_operand" "w")))]
745 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
746 [(set_attr "type" "neon_logic<q>")]
749 (define_insn "one_cmpl<mode>2"
750 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
751 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
753 "not\t%0.<Vbtype>, %1.<Vbtype>"
754 [(set_attr "type" "neon_logic<q>")]
757 (define_insn "aarch64_simd_vec_set<mode>"
758 [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
760 (vec_duplicate:VALL_F16
761 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
762 (match_operand:VALL_F16 3 "register_operand" "0,0,0")
763 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
766 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
767 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
768 switch (which_alternative)
771 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
773 return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
775 return "ld1\\t{%0.<Vetype>}[%p2], %1";
780 [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
783 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
784 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
786 (vec_duplicate:VALL_F16
788 (match_operand:VALL_F16 3 "register_operand" "w")
790 [(match_operand:SI 4 "immediate_operand" "i")])))
791 (match_operand:VALL_F16 1 "register_operand" "0")
792 (match_operand:SI 2 "immediate_operand" "i")))]
795 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
796 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
797 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
799 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
801 [(set_attr "type" "neon_ins<q>")]
804 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
805 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
806 (vec_merge:VALL_F16_NO_V2Q
807 (vec_duplicate:VALL_F16_NO_V2Q
809 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
811 [(match_operand:SI 4 "immediate_operand" "i")])))
812 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
813 (match_operand:SI 2 "immediate_operand" "i")))]
816 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
817 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
818 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
819 INTVAL (operands[4]));
821 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
823 [(set_attr "type" "neon_ins<q>")]
826 (define_insn "aarch64_simd_lshr<mode>"
827 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
828 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
829 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
831 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
832 [(set_attr "type" "neon_shift_imm<q>")]
835 (define_insn "aarch64_simd_ashr<mode>"
836 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
837 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
838 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
840 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
841 [(set_attr "type" "neon_shift_imm<q>")]
844 (define_insn "aarch64_simd_imm_shl<mode>"
845 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
846 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
847 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
849 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
850 [(set_attr "type" "neon_shift_imm<q>")]
853 (define_insn "aarch64_simd_reg_sshl<mode>"
854 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
855 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
856 (match_operand:VDQ_I 2 "register_operand" "w")))]
858 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
859 [(set_attr "type" "neon_shift_reg<q>")]
862 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
863 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
864 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
865 (match_operand:VDQ_I 2 "register_operand" "w")]
866 UNSPEC_ASHIFT_UNSIGNED))]
868 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
869 [(set_attr "type" "neon_shift_reg<q>")]
872 (define_insn "aarch64_simd_reg_shl<mode>_signed"
873 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
874 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
875 (match_operand:VDQ_I 2 "register_operand" "w")]
876 UNSPEC_ASHIFT_SIGNED))]
878 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
879 [(set_attr "type" "neon_shift_reg<q>")]
882 (define_expand "ashl<mode>3"
883 [(match_operand:VDQ_I 0 "register_operand" "")
884 (match_operand:VDQ_I 1 "register_operand" "")
885 (match_operand:SI 2 "general_operand" "")]
888 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
891 if (CONST_INT_P (operands[2]))
893 shift_amount = INTVAL (operands[2]);
894 if (shift_amount >= 0 && shift_amount < bit_width)
896 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
898 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
905 operands[2] = force_reg (SImode, operands[2]);
908 else if (MEM_P (operands[2]))
910 operands[2] = force_reg (SImode, operands[2]);
913 if (REG_P (operands[2]))
915 rtx tmp = gen_reg_rtx (<MODE>mode);
916 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
917 convert_to_mode (<VEL>mode,
920 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
929 (define_expand "lshr<mode>3"
930 [(match_operand:VDQ_I 0 "register_operand" "")
931 (match_operand:VDQ_I 1 "register_operand" "")
932 (match_operand:SI 2 "general_operand" "")]
935 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
938 if (CONST_INT_P (operands[2]))
940 shift_amount = INTVAL (operands[2]);
941 if (shift_amount > 0 && shift_amount <= bit_width)
943 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
945 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
951 operands[2] = force_reg (SImode, operands[2]);
953 else if (MEM_P (operands[2]))
955 operands[2] = force_reg (SImode, operands[2]);
958 if (REG_P (operands[2]))
960 rtx tmp = gen_reg_rtx (SImode);
961 rtx tmp1 = gen_reg_rtx (<MODE>mode);
962 emit_insn (gen_negsi2 (tmp, operands[2]));
963 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
964 convert_to_mode (<VEL>mode,
966 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
976 (define_expand "ashr<mode>3"
977 [(match_operand:VDQ_I 0 "register_operand" "")
978 (match_operand:VDQ_I 1 "register_operand" "")
979 (match_operand:SI 2 "general_operand" "")]
982 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
985 if (CONST_INT_P (operands[2]))
987 shift_amount = INTVAL (operands[2]);
988 if (shift_amount > 0 && shift_amount <= bit_width)
990 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
992 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
998 operands[2] = force_reg (SImode, operands[2]);
1000 else if (MEM_P (operands[2]))
1002 operands[2] = force_reg (SImode, operands[2]);
1005 if (REG_P (operands[2]))
1007 rtx tmp = gen_reg_rtx (SImode);
1008 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1009 emit_insn (gen_negsi2 (tmp, operands[2]));
1010 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1011 convert_to_mode (<VEL>mode,
1013 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
1023 (define_expand "vashl<mode>3"
1024 [(match_operand:VDQ_I 0 "register_operand" "")
1025 (match_operand:VDQ_I 1 "register_operand" "")
1026 (match_operand:VDQ_I 2 "register_operand" "")]
1029 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1034 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1035 ;; Negating individual lanes most certainly offsets the
1036 ;; gain from vectorization.
1037 (define_expand "vashr<mode>3"
1038 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1039 (match_operand:VDQ_BHSI 1 "register_operand" "")
1040 (match_operand:VDQ_BHSI 2 "register_operand" "")]
1043 rtx neg = gen_reg_rtx (<MODE>mode);
1044 emit (gen_neg<mode>2 (neg, operands[2]));
1045 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1051 (define_expand "aarch64_ashr_simddi"
1052 [(match_operand:DI 0 "register_operand" "=w")
1053 (match_operand:DI 1 "register_operand" "w")
1054 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1057 /* An arithmetic shift right by 64 fills the result with copies of the sign
1058 bit, just like asr by 63 - however the standard pattern does not handle
1060 if (INTVAL (operands[2]) == 64)
1061 operands[2] = GEN_INT (63);
1062 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1067 (define_expand "vlshr<mode>3"
1068 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1069 (match_operand:VDQ_BHSI 1 "register_operand" "")
1070 (match_operand:VDQ_BHSI 2 "register_operand" "")]
1073 rtx neg = gen_reg_rtx (<MODE>mode);
1074 emit (gen_neg<mode>2 (neg, operands[2]));
1075 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1080 (define_expand "aarch64_lshr_simddi"
1081 [(match_operand:DI 0 "register_operand" "=w")
1082 (match_operand:DI 1 "register_operand" "w")
1083 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1086 if (INTVAL (operands[2]) == 64)
1087 emit_move_insn (operands[0], const0_rtx);
1089 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1094 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1095 (define_insn "vec_shr_<mode>"
1096 [(set (match_operand:VD 0 "register_operand" "=w")
1097 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1098 (match_operand:SI 2 "immediate_operand" "i")]
1102 if (BYTES_BIG_ENDIAN)
1103 return "shl %d0, %d1, %2";
1105 return "ushr %d0, %d1, %2";
1107 [(set_attr "type" "neon_shift_imm")]
1110 (define_expand "vec_set<mode>"
1111 [(match_operand:VALL_F16 0 "register_operand" "+w")
1112 (match_operand:<VEL> 1 "register_operand" "w")
1113 (match_operand:SI 2 "immediate_operand" "")]
1116 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1117 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1118 GEN_INT (elem), operands[0]));
1124 (define_insn "aarch64_mla<mode>"
1125 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1126 (plus:VDQ_BHSI (mult:VDQ_BHSI
1127 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1128 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1129 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1131 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1132 [(set_attr "type" "neon_mla_<Vetype><q>")]
1135 (define_insn "*aarch64_mla_elt<mode>"
1136 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1139 (vec_duplicate:VDQHS
1141 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1142 (parallel [(match_operand:SI 2 "immediate_operand")])))
1143 (match_operand:VDQHS 3 "register_operand" "w"))
1144 (match_operand:VDQHS 4 "register_operand" "0")))]
1147 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1148 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1150 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1153 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1154 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1157 (vec_duplicate:VDQHS
1159 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1160 (parallel [(match_operand:SI 2 "immediate_operand")])))
1161 (match_operand:VDQHS 3 "register_operand" "w"))
1162 (match_operand:VDQHS 4 "register_operand" "0")))]
1165 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1166 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1168 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1171 (define_insn "*aarch64_mla_elt_merge<mode>"
1172 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1174 (mult:VDQHS (vec_duplicate:VDQHS
1175 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1176 (match_operand:VDQHS 2 "register_operand" "w"))
1177 (match_operand:VDQHS 3 "register_operand" "0")))]
1179 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1180 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1183 (define_insn "aarch64_mls<mode>"
1184 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1185 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1186 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1187 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1189 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1190 [(set_attr "type" "neon_mla_<Vetype><q>")]
1193 (define_insn "*aarch64_mls_elt<mode>"
1194 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1196 (match_operand:VDQHS 4 "register_operand" "0")
1198 (vec_duplicate:VDQHS
1200 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1201 (parallel [(match_operand:SI 2 "immediate_operand")])))
1202 (match_operand:VDQHS 3 "register_operand" "w"))))]
1205 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1206 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1208 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1211 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1212 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1214 (match_operand:VDQHS 4 "register_operand" "0")
1216 (vec_duplicate:VDQHS
1218 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1219 (parallel [(match_operand:SI 2 "immediate_operand")])))
1220 (match_operand:VDQHS 3 "register_operand" "w"))))]
1223 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1224 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1226 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1229 (define_insn "*aarch64_mls_elt_merge<mode>"
1230 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1232 (match_operand:VDQHS 1 "register_operand" "0")
1233 (mult:VDQHS (vec_duplicate:VDQHS
1234 (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1235 (match_operand:VDQHS 3 "register_operand" "w"))))]
1237 "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1238 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1241 ;; Max/Min operations.
1242 (define_insn "<su><maxmin><mode>3"
1243 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1244 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1245 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1247 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1248 [(set_attr "type" "neon_minmax<q>")]
1251 (define_expand "<su><maxmin>v2di3"
1252 [(set (match_operand:V2DI 0 "register_operand" "")
1253 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1254 (match_operand:V2DI 2 "register_operand" "")))]
1257 enum rtx_code cmp_operator;
1278 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1279 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1280 operands[2], cmp_fmt, operands[1], operands[2]));
1284 ;; Pairwise Integer Max/Min operations.
1285 (define_insn "aarch64_<maxmin_uns>p<mode>"
1286 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1287 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1288 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1291 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1292 [(set_attr "type" "neon_minmax<q>")]
1295 ;; Pairwise FP Max/Min operations.
1296 (define_insn "aarch64_<maxmin_uns>p<mode>"
1297 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1298 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1299 (match_operand:VHSDF 2 "register_operand" "w")]
1302 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1303 [(set_attr "type" "neon_minmax<q>")]
1306 ;; vec_concat gives a new vector with the low elements from operand 1, and
1307 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1308 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1309 ;; What that means, is that the RTL descriptions of the below patterns
1310 ;; need to change depending on endianness.
1312 ;; Move to the low architectural bits of the register.
1313 ;; On little-endian this is { operand, zeroes }
1314 ;; On big-endian this is { zeroes, operand }
1316 (define_insn "move_lo_quad_internal_<mode>"
1317 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1319 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1320 (vec_duplicate:<VHALF> (const_int 0))))]
1321 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1326 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1327 (set_attr "simd" "yes,*,yes")
1328 (set_attr "fp" "*,yes,*")
1329 (set_attr "length" "4")]
1332 (define_insn "move_lo_quad_internal_<mode>"
1333 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1335 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1337 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1342 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1343 (set_attr "simd" "yes,*,yes")
1344 (set_attr "fp" "*,yes,*")
1345 (set_attr "length" "4")]
1348 (define_insn "move_lo_quad_internal_be_<mode>"
1349 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1351 (vec_duplicate:<VHALF> (const_int 0))
1352 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1353 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1358 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1359 (set_attr "simd" "yes,*,yes")
1360 (set_attr "fp" "*,yes,*")
1361 (set_attr "length" "4")]
1364 (define_insn "move_lo_quad_internal_be_<mode>"
1365 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1368 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1369 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1374 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1375 (set_attr "simd" "yes,*,yes")
1376 (set_attr "fp" "*,yes,*")
1377 (set_attr "length" "4")]
1380 (define_expand "move_lo_quad_<mode>"
1381 [(match_operand:VQ 0 "register_operand")
1382 (match_operand:VQ 1 "register_operand")]
1385 if (BYTES_BIG_ENDIAN)
1386 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1388 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1393 ;; Move operand1 to the high architectural bits of the register, keeping
1394 ;; the low architectural bits of operand2.
1395 ;; For little-endian this is { operand2, operand1 }
1396 ;; For big-endian this is { operand1, operand2 }
1398 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1399 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1403 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1404 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1405 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1407 ins\\t%0.d[1], %1.d[0]
1409 [(set_attr "type" "neon_ins")]
1412 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1413 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1415 (match_operand:<VHALF> 1 "register_operand" "w,r")
1418 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1419 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1421 ins\\t%0.d[1], %1.d[0]
1423 [(set_attr "type" "neon_ins")]
1426 (define_expand "move_hi_quad_<mode>"
1427 [(match_operand:VQ 0 "register_operand" "")
1428 (match_operand:<VHALF> 1 "register_operand" "")]
1431 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1432 if (BYTES_BIG_ENDIAN)
1433 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1436 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1441 ;; Narrowing operations.
1444 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1445 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1446 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1448 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1449 [(set_attr "type" "neon_shift_imm_narrow_q")]
1452 (define_expand "vec_pack_trunc_<mode>"
1453 [(match_operand:<VNARROWD> 0 "register_operand" "")
1454 (match_operand:VDN 1 "register_operand" "")
1455 (match_operand:VDN 2 "register_operand" "")]
1458 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1459 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1460 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1462 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1463 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1464 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1470 (define_insn "vec_pack_trunc_<mode>"
1471 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1472 (vec_concat:<VNARROWQ2>
1473 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1474 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1477 if (BYTES_BIG_ENDIAN)
1478 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1480 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1482 [(set_attr "type" "multiple")
1483 (set_attr "length" "8")]
1486 ;; Widening operations.
1488 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1489 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1490 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1491 (match_operand:VQW 1 "register_operand" "w")
1492 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1495 "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1496 [(set_attr "type" "neon_shift_imm_long")]
1499 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1500 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1501 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1502 (match_operand:VQW 1 "register_operand" "w")
1503 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1506 "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1507 [(set_attr "type" "neon_shift_imm_long")]
1510 (define_expand "vec_unpack<su>_hi_<mode>"
1511 [(match_operand:<VWIDE> 0 "register_operand" "")
1512 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1515 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1516 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1522 (define_expand "vec_unpack<su>_lo_<mode>"
1523 [(match_operand:<VWIDE> 0 "register_operand" "")
1524 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1527 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1528 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1534 ;; Widening arithmetic.
1536 (define_insn "*aarch64_<su>mlal_lo<mode>"
1537 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1540 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1541 (match_operand:VQW 2 "register_operand" "w")
1542 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1543 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1544 (match_operand:VQW 4 "register_operand" "w")
1546 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1548 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1549 [(set_attr "type" "neon_mla_<Vetype>_long")]
1552 (define_insn "*aarch64_<su>mlal_hi<mode>"
1553 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1556 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1557 (match_operand:VQW 2 "register_operand" "w")
1558 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1559 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1560 (match_operand:VQW 4 "register_operand" "w")
1562 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1564 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1565 [(set_attr "type" "neon_mla_<Vetype>_long")]
1568 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1569 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1571 (match_operand:<VWIDE> 1 "register_operand" "0")
1573 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1574 (match_operand:VQW 2 "register_operand" "w")
1575 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1576 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1577 (match_operand:VQW 4 "register_operand" "w")
1580 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1581 [(set_attr "type" "neon_mla_<Vetype>_long")]
1584 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1585 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1587 (match_operand:<VWIDE> 1 "register_operand" "0")
1589 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1590 (match_operand:VQW 2 "register_operand" "w")
1591 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1592 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1593 (match_operand:VQW 4 "register_operand" "w")
1596 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1597 [(set_attr "type" "neon_mla_<Vetype>_long")]
1600 (define_insn "*aarch64_<su>mlal<mode>"
1601 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1605 (match_operand:VD_BHSI 1 "register_operand" "w"))
1607 (match_operand:VD_BHSI 2 "register_operand" "w")))
1608 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1610 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1611 [(set_attr "type" "neon_mla_<Vetype>_long")]
1614 (define_insn "*aarch64_<su>mlsl<mode>"
1615 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1617 (match_operand:<VWIDE> 1 "register_operand" "0")
1620 (match_operand:VD_BHSI 2 "register_operand" "w"))
1622 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1624 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1625 [(set_attr "type" "neon_mla_<Vetype>_long")]
1628 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1629 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1630 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1631 (match_operand:VQW 1 "register_operand" "w")
1632 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1633 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1634 (match_operand:VQW 2 "register_operand" "w")
1637 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1638 [(set_attr "type" "neon_mul_<Vetype>_long")]
1641 (define_expand "vec_widen_<su>mult_lo_<mode>"
1642 [(match_operand:<VWIDE> 0 "register_operand" "")
1643 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1644 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1647 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1648 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1655 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1656 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1657 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1658 (match_operand:VQW 1 "register_operand" "w")
1659 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1660 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1661 (match_operand:VQW 2 "register_operand" "w")
1664 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1665 [(set_attr "type" "neon_mul_<Vetype>_long")]
1668 (define_expand "vec_widen_<su>mult_hi_<mode>"
1669 [(match_operand:<VWIDE> 0 "register_operand" "")
1670 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1671 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1674 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1675 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1683 ;; FP vector operations.
1684 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1685 ;; double-precision (64-bit) floating-point data types and arithmetic as
1686 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1687 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1689 ;; Floating-point operations can raise an exception. Vectorizing such
1690 ;; operations are safe because of reasons explained below.
1692 ;; ARMv8 permits an extension to enable trapped floating-point
1693 ;; exception handling, however this is an optional feature. In the
1694 ;; event of a floating-point exception being raised by vectorised
1696 ;; 1. If trapped floating-point exceptions are available, then a trap
1697 ;; will be taken when any lane raises an enabled exception. A trap
1698 ;; handler may determine which lane raised the exception.
1699 ;; 2. Alternatively a sticky exception flag is set in the
1700 ;; floating-point status register (FPSR). Software may explicitly
1701 ;; test the exception flags, in which case the tests will either
1702 ;; prevent vectorisation, allowing precise identification of the
1703 ;; failing operation, or if tested outside of vectorisable regions
1704 ;; then the specific operation and lane are not of interest.
1706 ;; FP arithmetic operations.
1708 (define_insn "add<mode>3"
1709 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1710 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1711 (match_operand:VHSDF 2 "register_operand" "w")))]
1713 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1714 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1717 (define_insn "sub<mode>3"
1718 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1719 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1720 (match_operand:VHSDF 2 "register_operand" "w")))]
1722 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1723 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1726 (define_insn "mul<mode>3"
1727 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1728 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1729 (match_operand:VHSDF 2 "register_operand" "w")))]
1731 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1732 [(set_attr "type" "neon_fp_mul_<stype><q>")]
1735 (define_expand "div<mode>3"
1736 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1737 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1738 (match_operand:VHSDF 2 "register_operand" "w")))]
1741 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1744 operands[1] = force_reg (<MODE>mode, operands[1]);
1747 (define_insn "*div<mode>3"
1748 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1749 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1750 (match_operand:VHSDF 2 "register_operand" "w")))]
1752 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1753 [(set_attr "type" "neon_fp_div_<stype><q>")]
1756 (define_insn "neg<mode>2"
1757 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1758 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1760 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1761 [(set_attr "type" "neon_fp_neg_<stype><q>")]
1764 (define_insn "abs<mode>2"
1765 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1766 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1768 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1769 [(set_attr "type" "neon_fp_abs_<stype><q>")]
1772 (define_insn "fma<mode>4"
1773 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1774 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1775 (match_operand:VHSDF 2 "register_operand" "w")
1776 (match_operand:VHSDF 3 "register_operand" "0")))]
1778 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1779 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1782 (define_insn "*aarch64_fma4_elt<mode>"
1783 [(set (match_operand:VDQF 0 "register_operand" "=w")
1787 (match_operand:VDQF 1 "register_operand" "<h_con>")
1788 (parallel [(match_operand:SI 2 "immediate_operand")])))
1789 (match_operand:VDQF 3 "register_operand" "w")
1790 (match_operand:VDQF 4 "register_operand" "0")))]
1793 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1794 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1796 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1799 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1800 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1802 (vec_duplicate:VDQSF
1804 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1805 (parallel [(match_operand:SI 2 "immediate_operand")])))
1806 (match_operand:VDQSF 3 "register_operand" "w")
1807 (match_operand:VDQSF 4 "register_operand" "0")))]
1810 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1811 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1813 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1816 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1817 [(set (match_operand:VMUL 0 "register_operand" "=w")
1820 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1821 (match_operand:VMUL 2 "register_operand" "w")
1822 (match_operand:VMUL 3 "register_operand" "0")))]
1824 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1825 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1828 (define_insn "*aarch64_fma4_elt_to_64v2df"
1829 [(set (match_operand:DF 0 "register_operand" "=w")
1832 (match_operand:V2DF 1 "register_operand" "w")
1833 (parallel [(match_operand:SI 2 "immediate_operand")]))
1834 (match_operand:DF 3 "register_operand" "w")
1835 (match_operand:DF 4 "register_operand" "0")))]
1838 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1839 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1841 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1844 (define_insn "fnma<mode>4"
1845 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1847 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1848 (match_operand:VHSDF 2 "register_operand" "w")
1849 (match_operand:VHSDF 3 "register_operand" "0")))]
1851 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1852 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1855 (define_insn "*aarch64_fnma4_elt<mode>"
1856 [(set (match_operand:VDQF 0 "register_operand" "=w")
1859 (match_operand:VDQF 3 "register_operand" "w"))
1862 (match_operand:VDQF 1 "register_operand" "<h_con>")
1863 (parallel [(match_operand:SI 2 "immediate_operand")])))
1864 (match_operand:VDQF 4 "register_operand" "0")))]
1867 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1868 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1870 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1873 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1874 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1877 (match_operand:VDQSF 3 "register_operand" "w"))
1878 (vec_duplicate:VDQSF
1880 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1881 (parallel [(match_operand:SI 2 "immediate_operand")])))
1882 (match_operand:VDQSF 4 "register_operand" "0")))]
1885 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1886 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1888 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1891 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1892 [(set (match_operand:VMUL 0 "register_operand" "=w")
1895 (match_operand:VMUL 2 "register_operand" "w"))
1897 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1898 (match_operand:VMUL 3 "register_operand" "0")))]
1900 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1901 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1904 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1905 [(set (match_operand:DF 0 "register_operand" "=w")
1908 (match_operand:V2DF 1 "register_operand" "w")
1909 (parallel [(match_operand:SI 2 "immediate_operand")]))
1911 (match_operand:DF 3 "register_operand" "w"))
1912 (match_operand:DF 4 "register_operand" "0")))]
1915 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1916 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1918 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1921 ;; Vector versions of the floating-point frint patterns.
1922 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1923 (define_insn "<frint_pattern><mode>2"
1924 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1925 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1928 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1929 [(set_attr "type" "neon_fp_round_<stype><q>")]
1932 ;; Vector versions of the fcvt standard patterns.
1933 ;; Expands to lbtrunc, lround, lceil, lfloor
1934 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1935 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1936 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1937 [(match_operand:VHSDF 1 "register_operand" "w")]
1940 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1941 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1944 ;; HF Scalar variants of related SIMD instructions.
1945 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1946 [(set (match_operand:HI 0 "register_operand" "=w")
1947 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1949 "TARGET_SIMD_F16INST"
1950 "fcvt<frint_suffix><su>\t%h0, %h1"
1951 [(set_attr "type" "neon_fp_to_int_s")]
1954 (define_insn "<optab>_trunchfhi2"
1955 [(set (match_operand:HI 0 "register_operand" "=w")
1956 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1957 "TARGET_SIMD_F16INST"
1958 "fcvtz<su>\t%h0, %h1"
1959 [(set_attr "type" "neon_fp_to_int_s")]
1962 (define_insn "<optab>hihf2"
1963 [(set (match_operand:HF 0 "register_operand" "=w")
1964 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1965 "TARGET_SIMD_F16INST"
1966 "<su_optab>cvtf\t%h0, %h1"
1967 [(set_attr "type" "neon_int_to_fp_s")]
1970 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1971 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1972 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1974 (match_operand:VDQF 1 "register_operand" "w")
1975 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1978 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1979 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1981 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1983 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1984 output_asm_insn (buf, operands);
1987 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1990 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
1991 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1992 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1993 [(match_operand:VHSDF 1 "register_operand")]
1998 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
1999 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2000 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2001 [(match_operand:VHSDF 1 "register_operand")]
2006 (define_expand "ftrunc<VHSDF:mode>2"
2007 [(set (match_operand:VHSDF 0 "register_operand")
2008 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2013 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2014 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2016 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2018 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2019 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2022 ;; Conversions between vectors of floats and doubles.
2023 ;; Contains a mix of patterns to match standard pattern names
2024 ;; and those for intrinsics.
2026 ;; Float widening operations.
2028 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2029 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2030 (float_extend:<VWIDE> (vec_select:<VHALF>
2031 (match_operand:VQ_HSF 1 "register_operand" "w")
2032 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2035 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2036 [(set_attr "type" "neon_fp_cvt_widen_s")]
2039 ;; Convert between fixed-point and floating-point (vector modes)
2041 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2042 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2043 (unspec:<VHSDF:FCVT_TARGET>
2044 [(match_operand:VHSDF 1 "register_operand" "w")
2045 (match_operand:SI 2 "immediate_operand" "i")]
2048 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2049 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2052 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2053 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2054 (unspec:<VDQ_HSDI:FCVT_TARGET>
2055 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2056 (match_operand:SI 2 "immediate_operand" "i")]
2059 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2060 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2063 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2064 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2065 ;; the meaning of HI and LO changes depending on the target endianness.
2066 ;; While elsewhere we map the higher numbered elements of a vector to
2067 ;; the lower architectural lanes of the vector, for these patterns we want
2068 ;; to always treat "hi" as referring to the higher architectural lanes.
2069 ;; Consequently, while the patterns below look inconsistent with our
2070 ;; other big-endian patterns their behavior is as required.
2072 (define_expand "vec_unpacks_lo_<mode>"
2073 [(match_operand:<VWIDE> 0 "register_operand" "")
2074 (match_operand:VQ_HSF 1 "register_operand" "")]
2077 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2078 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2084 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2085 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2086 (float_extend:<VWIDE> (vec_select:<VHALF>
2087 (match_operand:VQ_HSF 1 "register_operand" "w")
2088 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2091 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2092 [(set_attr "type" "neon_fp_cvt_widen_s")]
2095 (define_expand "vec_unpacks_hi_<mode>"
2096 [(match_operand:<VWIDE> 0 "register_operand" "")
2097 (match_operand:VQ_HSF 1 "register_operand" "")]
2100 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2101 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2106 (define_insn "aarch64_float_extend_lo_<Vwide>"
2107 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2108 (float_extend:<VWIDE>
2109 (match_operand:VDF 1 "register_operand" "w")))]
2111 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2112 [(set_attr "type" "neon_fp_cvt_widen_s")]
2115 ;; Float narrowing operations.
2117 (define_insn "aarch64_float_truncate_lo_<mode>"
2118 [(set (match_operand:VDF 0 "register_operand" "=w")
2120 (match_operand:<VWIDE> 1 "register_operand" "w")))]
2122 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2123 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2126 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2127 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2129 (match_operand:VDF 1 "register_operand" "0")
2131 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2132 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2133 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2134 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2137 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2138 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2141 (match_operand:<VWIDE> 2 "register_operand" "w"))
2142 (match_operand:VDF 1 "register_operand" "0")))]
2143 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2144 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2145 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2148 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2149 [(match_operand:<VDBL> 0 "register_operand" "=w")
2150 (match_operand:VDF 1 "register_operand" "0")
2151 (match_operand:<VWIDE> 2 "register_operand" "w")]
2154 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2155 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2156 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2157 emit_insn (gen (operands[0], operands[1], operands[2]));
2162 (define_expand "vec_pack_trunc_v2df"
2163 [(set (match_operand:V4SF 0 "register_operand")
2165 (float_truncate:V2SF
2166 (match_operand:V2DF 1 "register_operand"))
2167 (float_truncate:V2SF
2168 (match_operand:V2DF 2 "register_operand"))
2172 rtx tmp = gen_reg_rtx (V2SFmode);
2173 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2174 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2176 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2177 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2178 tmp, operands[hi]));
2183 (define_expand "vec_pack_trunc_df"
2184 [(set (match_operand:V2SF 0 "register_operand")
2187 (match_operand:DF 1 "register_operand"))
2189 (match_operand:DF 2 "register_operand"))
2193 rtx tmp = gen_reg_rtx (V2SFmode);
2194 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2195 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2197 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2198 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2199 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2205 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2207 ;; a = (b < c) ? b : c;
2208 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
2209 ;; either explicitly or indirectly via -ffast-math.
2211 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2212 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2213 ;; operand will be returned when both operands are zero (i.e. they may not
2214 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2215 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2218 (define_insn "<su><maxmin><mode>3"
2219 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2220 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2221 (match_operand:VHSDF 2 "register_operand" "w")))]
2223 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2224 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2227 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2228 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2229 ;; which implement the IEEE fmax ()/fmin () functions.
2230 (define_insn "<maxmin_uns><mode>3"
2231 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2232 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2233 (match_operand:VHSDF 2 "register_operand" "w")]
2236 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2237 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2240 ;; 'across lanes' add.
2242 (define_expand "reduc_plus_scal_<mode>"
2243 [(match_operand:<VEL> 0 "register_operand" "=w")
2244 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2248 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2249 rtx scratch = gen_reg_rtx (<MODE>mode);
2250 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2251 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2256 (define_insn "aarch64_faddp<mode>"
2257 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2258 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2259 (match_operand:VHSDF 2 "register_operand" "w")]
2262 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2263 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2266 (define_insn "aarch64_reduc_plus_internal<mode>"
2267 [(set (match_operand:VDQV 0 "register_operand" "=w")
2268 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2271 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2272 [(set_attr "type" "neon_reduc_add<q>")]
2275 (define_insn "aarch64_reduc_plus_internalv2si"
2276 [(set (match_operand:V2SI 0 "register_operand" "=w")
2277 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2280 "addp\\t%0.2s, %1.2s, %1.2s"
2281 [(set_attr "type" "neon_reduc_add")]
2284 (define_insn "reduc_plus_scal_<mode>"
2285 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2286 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2289 "faddp\\t%<Vetype>0, %1.<Vtype>"
2290 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2293 (define_expand "reduc_plus_scal_v4sf"
2294 [(set (match_operand:SF 0 "register_operand")
2295 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2299 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2300 rtx scratch = gen_reg_rtx (V4SFmode);
2301 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2302 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2303 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2307 (define_insn "clrsb<mode>2"
2308 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2309 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2311 "cls\\t%0.<Vtype>, %1.<Vtype>"
2312 [(set_attr "type" "neon_cls<q>")]
2315 (define_insn "clz<mode>2"
2316 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2317 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2319 "clz\\t%0.<Vtype>, %1.<Vtype>"
2320 [(set_attr "type" "neon_cls<q>")]
2323 (define_insn "popcount<mode>2"
2324 [(set (match_operand:VB 0 "register_operand" "=w")
2325 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2327 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2328 [(set_attr "type" "neon_cnt<q>")]
2331 ;; 'across lanes' max and min ops.
2333 ;; Template for outputting a scalar, so we can create __builtins which can be
2334 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
2335 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2336 [(match_operand:<VEL> 0 "register_operand")
2337 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2341 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2342 rtx scratch = gen_reg_rtx (<MODE>mode);
2343 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2345 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2350 ;; Likewise for integer cases, signed and unsigned.
2351 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2352 [(match_operand:<VEL> 0 "register_operand")
2353 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2357 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2358 rtx scratch = gen_reg_rtx (<MODE>mode);
2359 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2361 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2366 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2367 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2368 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2371 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2372 [(set_attr "type" "neon_reduc_minmax<q>")]
2375 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2376 [(set (match_operand:V2SI 0 "register_operand" "=w")
2377 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2380 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2381 [(set_attr "type" "neon_reduc_minmax")]
2384 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2385 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2386 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2389 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2390 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2393 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2395 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2398 ;; Thus our BSL is of the form:
2399 ;; op0 = bsl (mask, op2, op3)
2400 ;; We can use any of:
2403 ;; bsl mask, op1, op2
2404 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2405 ;; bit op0, op2, mask
2406 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2407 ;; bif op0, op1, mask
2409 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2410 ;; Some forms of straight-line code may generate the equivalent form
2411 ;; in *aarch64_simd_bsl<mode>_alt.
2413 (define_insn "aarch64_simd_bsl<mode>_internal"
2414 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2418 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2419 (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2420 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2421 (match_dup:<V_INT_EQUIV> 3)
2425 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2426 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2427 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2428 [(set_attr "type" "neon_bsl<q>")]
2431 ;; We need this form in addition to the above pattern to match the case
2432 ;; when combine tries merging three insns such that the second operand of
2433 ;; the outer XOR matches the second operand of the inner XOR rather than
2434 ;; the first. The two are equivalent but since recog doesn't try all
2435 ;; permutations of commutative operations, we have to have a separate pattern.
2437 (define_insn "*aarch64_simd_bsl<mode>_alt"
2438 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2442 (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2443 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2444 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2445 (match_dup:<V_INT_EQUIV> 2)))]
2448 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2449 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2450 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2451 [(set_attr "type" "neon_bsl<q>")]
2454 ;; DImode is special, we want to avoid computing operations which are
2455 ;; more naturally computed in general purpose registers in the vector
2456 ;; registers. If we do that, we need to move all three operands from general
2457 ;; purpose registers to vector registers, then back again. However, we
2458 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2459 ;; optimizations based on the component operations of a BSL.
2461 ;; That means we need a splitter back to the individual operations, if they
2462 ;; would be better calculated on the integer side.
2464 (define_insn_and_split "aarch64_simd_bsldi_internal"
2465 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2469 (match_operand:DI 3 "register_operand" "w,0,w,r")
2470 (match_operand:DI 2 "register_operand" "w,w,0,r"))
2471 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2476 bsl\\t%0.8b, %2.8b, %3.8b
2477 bit\\t%0.8b, %2.8b, %1.8b
2478 bif\\t%0.8b, %3.8b, %1.8b
2480 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2481 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2483 /* Split back to individual operations. If we're before reload, and
2484 able to create a temporary register, do so. If we're after reload,
2485 we've got an early-clobber destination register, so use that.
2486 Otherwise, we can't create pseudos and we can't yet guarantee that
2487 operands[0] is safe to write, so FAIL to split. */
2490 if (reload_completed)
2491 scratch = operands[0];
2492 else if (can_create_pseudo_p ())
2493 scratch = gen_reg_rtx (DImode);
2497 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2498 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2499 emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2502 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2503 (set_attr "length" "4,4,4,12")]
2506 (define_insn_and_split "aarch64_simd_bsldi_alt"
2507 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2511 (match_operand:DI 3 "register_operand" "w,w,0,r")
2512 (match_operand:DI 2 "register_operand" "w,0,w,r"))
2513 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2518 bsl\\t%0.8b, %3.8b, %2.8b
2519 bit\\t%0.8b, %3.8b, %1.8b
2520 bif\\t%0.8b, %2.8b, %1.8b
2522 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2523 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2525 /* Split back to individual operations. If we're before reload, and
2526 able to create a temporary register, do so. If we're after reload,
2527 we've got an early-clobber destination register, so use that.
2528 Otherwise, we can't create pseudos and we can't yet guarantee that
2529 operands[0] is safe to write, so FAIL to split. */
2532 if (reload_completed)
2533 scratch = operands[0];
2534 else if (can_create_pseudo_p ())
2535 scratch = gen_reg_rtx (DImode);
2539 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2540 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2541 emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2544 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2545 (set_attr "length" "4,4,4,12")]
2548 (define_expand "aarch64_simd_bsl<mode>"
2549 [(match_operand:VALLDIF 0 "register_operand")
2550 (match_operand:<V_INT_EQUIV> 1 "register_operand")
2551 (match_operand:VALLDIF 2 "register_operand")
2552 (match_operand:VALLDIF 3 "register_operand")]
2555 /* We can't alias operands together if they have different modes. */
2556 rtx tmp = operands[0];
2557 if (FLOAT_MODE_P (<MODE>mode))
2559 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2560 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2561 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2563 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2564 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2568 if (tmp != operands[0])
2569 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2574 (define_expand "vcond_mask_<mode><v_int_equiv>"
2575 [(match_operand:VALLDI 0 "register_operand")
2576 (match_operand:VALLDI 1 "nonmemory_operand")
2577 (match_operand:VALLDI 2 "nonmemory_operand")
2578 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2581 /* If we have (a = (P) ? -1 : 0);
2582 Then we can simply move the generated mask (result must be int). */
2583 if (operands[1] == CONSTM1_RTX (<MODE>mode)
2584 && operands[2] == CONST0_RTX (<MODE>mode))
2585 emit_move_insn (operands[0], operands[3]);
2586 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
2587 else if (operands[1] == CONST0_RTX (<MODE>mode)
2588 && operands[2] == CONSTM1_RTX (<MODE>mode))
2589 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2592 if (!REG_P (operands[1]))
2593 operands[1] = force_reg (<MODE>mode, operands[1]);
2594 if (!REG_P (operands[2]))
2595 operands[2] = force_reg (<MODE>mode, operands[2]);
2596 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2597 operands[1], operands[2]));
2603 ;; Patterns comparing two vectors to produce a mask.
2605 (define_expand "vec_cmp<mode><mode>"
2606 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2607 (match_operator 1 "comparison_operator"
2608 [(match_operand:VSDQ_I_DI 2 "register_operand")
2609 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2612 rtx mask = operands[0];
2613 enum rtx_code code = GET_CODE (operands[1]);
2623 if (operands[3] == CONST0_RTX (<MODE>mode))
2628 if (!REG_P (operands[3]))
2629 operands[3] = force_reg (<MODE>mode, operands[3]);
2637 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2641 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2645 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2649 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2653 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2657 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2661 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2665 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2669 /* Handle NE as !EQ. */
2670 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2671 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2675 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2685 (define_expand "vec_cmp<mode><v_int_equiv>"
2686 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2687 (match_operator 1 "comparison_operator"
2688 [(match_operand:VDQF 2 "register_operand")
2689 (match_operand:VDQF 3 "nonmemory_operand")]))]
2692 int use_zero_form = 0;
2693 enum rtx_code code = GET_CODE (operands[1]);
2694 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2696 rtx (*comparison) (rtx, rtx, rtx) = NULL;
2705 if (operands[3] == CONST0_RTX (<MODE>mode))
2712 if (!REG_P (operands[3]))
2713 operands[3] = force_reg (<MODE>mode, operands[3]);
2723 comparison = gen_aarch64_cmlt<mode>;
2728 std::swap (operands[2], operands[3]);
2732 comparison = gen_aarch64_cmgt<mode>;
2737 comparison = gen_aarch64_cmle<mode>;
2742 std::swap (operands[2], operands[3]);
2746 comparison = gen_aarch64_cmge<mode>;
2750 comparison = gen_aarch64_cmeq<mode>;
2768 /* All of the above must not raise any FP exceptions. Thus we first
2769 check each operand for NaNs and force any elements containing NaN to
2770 zero before using them in the compare.
2771 Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2772 (cm<cc> (isnan (a) ? 0.0 : a,
2773 isnan (b) ? 0.0 : b))
2774 We use the following transformations for doing the comparisions:
2778 a UNLT b -> b GT a. */
2780 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2781 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2782 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2783 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2784 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2785 emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2786 emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2787 lowpart_subreg (<V_INT_EQUIV>mode,
2790 emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2791 lowpart_subreg (<V_INT_EQUIV>mode,
2794 gcc_assert (comparison != NULL);
2795 emit_insn (comparison (operands[0],
2796 lowpart_subreg (<MODE>mode,
2797 tmp0, <V_INT_EQUIV>mode),
2798 lowpart_subreg (<MODE>mode,
2799 tmp1, <V_INT_EQUIV>mode)));
2800 emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2810 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2811 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2817 a NE b -> ~(a EQ b) */
2818 gcc_assert (comparison != NULL);
2819 emit_insn (comparison (operands[0], operands[2], operands[3]));
2821 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2825 /* LTGT is not guranteed to not generate a FP exception. So let's
2826 go the faster way : ((a > b) || (b > a)). */
2827 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2828 operands[2], operands[3]));
2829 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2830 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2836 /* cmeq (a, a) & cmeq (b, b). */
2837 emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2838 operands[2], operands[2]));
2839 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2840 emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2842 if (code == UNORDERED)
2843 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2844 else if (code == UNEQ)
2846 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
2847 emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
2858 (define_expand "vec_cmpu<mode><mode>"
2859 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2860 (match_operator 1 "comparison_operator"
2861 [(match_operand:VSDQ_I_DI 2 "register_operand")
2862 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2865 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2866 operands[2], operands[3]));
2870 (define_expand "vcond<mode><mode>"
2871 [(set (match_operand:VALLDI 0 "register_operand")
2872 (if_then_else:VALLDI
2873 (match_operator 3 "comparison_operator"
2874 [(match_operand:VALLDI 4 "register_operand")
2875 (match_operand:VALLDI 5 "nonmemory_operand")])
2876 (match_operand:VALLDI 1 "nonmemory_operand")
2877 (match_operand:VALLDI 2 "nonmemory_operand")))]
2880 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2881 enum rtx_code code = GET_CODE (operands[3]);
2883 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2884 it as well as switch operands 1/2 in order to avoid the additional
2888 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2889 operands[4], operands[5]);
2890 std::swap (operands[1], operands[2]);
2892 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2893 operands[4], operands[5]));
2894 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2895 operands[2], mask));
2900 (define_expand "vcond<v_cmp_mixed><mode>"
2901 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2902 (if_then_else:<V_cmp_mixed>
2903 (match_operator 3 "comparison_operator"
2904 [(match_operand:VDQF_COND 4 "register_operand")
2905 (match_operand:VDQF_COND 5 "nonmemory_operand")])
2906 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2907 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2910 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2911 enum rtx_code code = GET_CODE (operands[3]);
2913 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2914 it as well as switch operands 1/2 in order to avoid the additional
2918 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2919 operands[4], operands[5]);
2920 std::swap (operands[1], operands[2]);
2922 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2923 operands[4], operands[5]));
2924 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
2925 operands[0], operands[1],
2926 operands[2], mask));
2931 (define_expand "vcondu<mode><mode>"
2932 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2933 (if_then_else:VSDQ_I_DI
2934 (match_operator 3 "comparison_operator"
2935 [(match_operand:VSDQ_I_DI 4 "register_operand")
2936 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2937 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2938 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2941 rtx mask = gen_reg_rtx (<MODE>mode);
2942 enum rtx_code code = GET_CODE (operands[3]);
2944 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2945 it as well as switch operands 1/2 in order to avoid the additional
2949 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2950 operands[4], operands[5]);
2951 std::swap (operands[1], operands[2]);
2953 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2954 operands[4], operands[5]));
2955 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2956 operands[2], mask));
2960 (define_expand "vcondu<mode><v_cmp_mixed>"
2961 [(set (match_operand:VDQF 0 "register_operand")
2963 (match_operator 3 "comparison_operator"
2964 [(match_operand:<V_cmp_mixed> 4 "register_operand")
2965 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2966 (match_operand:VDQF 1 "nonmemory_operand")
2967 (match_operand:VDQF 2 "nonmemory_operand")))]
2970 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2971 enum rtx_code code = GET_CODE (operands[3]);
2973 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2974 it as well as switch operands 1/2 in order to avoid the additional
2978 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2979 operands[4], operands[5]);
2980 std::swap (operands[1], operands[2]);
2982 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
2984 operands[4], operands[5]));
2985 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2986 operands[2], mask));
2990 ;; Patterns for AArch64 SIMD Intrinsics.
2992 ;; Lane extraction with sign extension to general purpose register.
2993 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2994 [(set (match_operand:GPI 0 "register_operand" "=r")
2997 (match_operand:VDQQH 1 "register_operand" "w")
2998 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3001 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3002 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3004 [(set_attr "type" "neon_to_gp<q>")]
3007 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
3008 [(set (match_operand:SI 0 "register_operand" "=r")
3011 (match_operand:VDQQH 1 "register_operand" "w")
3012 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3015 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3016 return "umov\\t%w0, %1.<Vetype>[%2]";
3018 [(set_attr "type" "neon_to_gp<q>")]
3021 ;; Lane extraction of a value, neither sign nor zero extension
3022 ;; is guaranteed so upper bits should be considered undefined.
3023 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3024 (define_insn "aarch64_get_lane<mode>"
3025 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3027 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3028 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3031 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3032 switch (which_alternative)
3035 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3037 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3039 return "st1\\t{%1.<Vetype>}[%2], %0";
3044 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3047 (define_insn "load_pair_lanes<mode>"
3048 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3050 (match_operand:VDC 1 "memory_operand" "Utq")
3051 (match_operand:VDC 2 "memory_operand" "m")))]
3052 "TARGET_SIMD && !STRICT_ALIGNMENT
3053 && rtx_equal_p (XEXP (operands[2], 0),
3054 plus_constant (Pmode,
3055 XEXP (operands[1], 0),
3056 GET_MODE_SIZE (<MODE>mode)))"
3058 [(set_attr "type" "neon_load1_1reg_q")]
3061 (define_insn "store_pair_lanes<mode>"
3062 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Uml, Uml")
3064 (match_operand:VDC 1 "register_operand" "w, r")
3065 (match_operand:VDC 2 "register_operand" "w, r")))]
3069 stp\\t%x1, %x2, %y0"
3070 [(set_attr "type" "neon_stp, store_16")]
3073 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3076 (define_insn "*aarch64_combinez<mode>"
3077 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3079 (match_operand:VDC 1 "general_operand" "w,?r,m")
3080 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3081 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3086 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3087 (set_attr "simd" "yes,*,yes")
3088 (set_attr "fp" "*,yes,*")]
3091 (define_insn "*aarch64_combinez_be<mode>"
3092 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3094 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3095 (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3096 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3101 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3102 (set_attr "simd" "yes,*,yes")
3103 (set_attr "fp" "*,yes,*")]
3106 (define_expand "aarch64_combine<mode>"
3107 [(match_operand:<VDBL> 0 "register_operand")
3108 (match_operand:VDC 1 "register_operand")
3109 (match_operand:VDC 2 "register_operand")]
3112 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3118 (define_expand "aarch64_simd_combine<mode>"
3119 [(match_operand:<VDBL> 0 "register_operand")
3120 (match_operand:VDC 1 "register_operand")
3121 (match_operand:VDC 2 "register_operand")]
3124 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3125 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3128 [(set_attr "type" "multiple")]
3131 ;; <su><addsub>l<q>.
3133 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3134 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3135 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3136 (match_operand:VQW 1 "register_operand" "w")
3137 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3138 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3139 (match_operand:VQW 2 "register_operand" "w")
3142 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3143 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3146 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3147 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3148 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3149 (match_operand:VQW 1 "register_operand" "w")
3150 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3151 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3152 (match_operand:VQW 2 "register_operand" "w")
3155 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3156 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3160 (define_expand "aarch64_saddl2<mode>"
3161 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3162 (match_operand:VQW 1 "register_operand" "w")
3163 (match_operand:VQW 2 "register_operand" "w")]
3166 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3167 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3172 (define_expand "aarch64_uaddl2<mode>"
3173 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3174 (match_operand:VQW 1 "register_operand" "w")
3175 (match_operand:VQW 2 "register_operand" "w")]
3178 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3179 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3184 (define_expand "aarch64_ssubl2<mode>"
3185 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3186 (match_operand:VQW 1 "register_operand" "w")
3187 (match_operand:VQW 2 "register_operand" "w")]
3190 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3191 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3196 (define_expand "aarch64_usubl2<mode>"
3197 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3198 (match_operand:VQW 1 "register_operand" "w")
3199 (match_operand:VQW 2 "register_operand" "w")]
3202 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3203 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3208 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3209 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3210 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3211 (match_operand:VD_BHSI 1 "register_operand" "w"))
3213 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3215 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3216 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3219 ;; <su><addsub>w<q>.
3221 (define_expand "widen_ssum<mode>3"
3222 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3223 (plus:<VDBLW> (sign_extend:<VDBLW>
3224 (match_operand:VQW 1 "register_operand" ""))
3225 (match_operand:<VDBLW> 2 "register_operand" "")))]
3228 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3229 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3231 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3233 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3238 (define_expand "widen_ssum<mode>3"
3239 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3240 (plus:<VWIDE> (sign_extend:<VWIDE>
3241 (match_operand:VD_BHSI 1 "register_operand" ""))
3242 (match_operand:<VWIDE> 2 "register_operand" "")))]
3245 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3249 (define_expand "widen_usum<mode>3"
3250 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3251 (plus:<VDBLW> (zero_extend:<VDBLW>
3252 (match_operand:VQW 1 "register_operand" ""))
3253 (match_operand:<VDBLW> 2 "register_operand" "")))]
3256 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3257 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3259 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3261 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3266 (define_expand "widen_usum<mode>3"
3267 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3268 (plus:<VWIDE> (zero_extend:<VWIDE>
3269 (match_operand:VD_BHSI 1 "register_operand" ""))
3270 (match_operand:<VWIDE> 2 "register_operand" "")))]
3273 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3277 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
3278 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3279 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3281 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3283 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3284 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3287 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
3288 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3289 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3292 (match_operand:VQW 2 "register_operand" "w")
3293 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3295 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3296 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3299 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
3300 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3301 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3304 (match_operand:VQW 2 "register_operand" "w")
3305 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3307 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3308 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3311 (define_expand "aarch64_saddw2<mode>"
3312 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3313 (match_operand:<VWIDE> 1 "register_operand" "w")
3314 (match_operand:VQW 2 "register_operand" "w")]
3317 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3318 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3323 (define_expand "aarch64_uaddw2<mode>"
3324 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3325 (match_operand:<VWIDE> 1 "register_operand" "w")
3326 (match_operand:VQW 2 "register_operand" "w")]
3329 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3330 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3336 (define_expand "aarch64_ssubw2<mode>"
3337 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3338 (match_operand:<VWIDE> 1 "register_operand" "w")
3339 (match_operand:VQW 2 "register_operand" "w")]
3342 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3343 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3348 (define_expand "aarch64_usubw2<mode>"
3349 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3350 (match_operand:<VWIDE> 1 "register_operand" "w")
3351 (match_operand:VQW 2 "register_operand" "w")]
3354 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3355 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3360 ;; <su><r>h<addsub>.
3362 (define_insn "aarch64_<sur>h<addsub><mode>"
3363 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3364 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3365 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3368 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3369 [(set_attr "type" "neon_<addsub>_halve<q>")]
3372 ;; <r><addsub>hn<q>.
3374 (define_insn "aarch64_<sur><addsub>hn<mode>"
3375 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3376 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3377 (match_operand:VQN 2 "register_operand" "w")]
3380 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3381 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3384 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3385 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3386 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3387 (match_operand:VQN 2 "register_operand" "w")
3388 (match_operand:VQN 3 "register_operand" "w")]
3391 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3392 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3397 (define_insn "aarch64_pmul<mode>"
3398 [(set (match_operand:VB 0 "register_operand" "=w")
3399 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3400 (match_operand:VB 2 "register_operand" "w")]
3403 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3404 [(set_attr "type" "neon_mul_<Vetype><q>")]
3409 (define_insn "aarch64_fmulx<mode>"
3410 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3412 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3413 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3416 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3417 [(set_attr "type" "neon_fp_mul_<stype>")]
3420 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3422 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3423 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3425 [(match_operand:VDQSF 1 "register_operand" "w")
3426 (vec_duplicate:VDQSF
3428 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3429 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3433 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3434 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3436 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3439 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3441 (define_insn "*aarch64_mulx_elt<mode>"
3442 [(set (match_operand:VDQF 0 "register_operand" "=w")
3444 [(match_operand:VDQF 1 "register_operand" "w")
3447 (match_operand:VDQF 2 "register_operand" "w")
3448 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3452 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3453 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3455 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3460 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3461 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3463 [(match_operand:VHSDF 1 "register_operand" "w")
3464 (vec_duplicate:VHSDF
3465 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3468 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3469 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3472 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3473 ;; vmulxd_lane_f64 == vmulx_lane_f64
3474 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3476 (define_insn "*aarch64_vgetfmulx<mode>"
3477 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3479 [(match_operand:<VEL> 1 "register_operand" "w")
3481 (match_operand:VDQF 2 "register_operand" "w")
3482 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3486 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3487 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3489 [(set_attr "type" "fmul<Vetype>")]
3493 (define_insn "aarch64_<su_optab><optab><mode>"
3494 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3495 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3496 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3498 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3499 [(set_attr "type" "neon_<optab><q>")]
3502 ;; suqadd and usqadd
3504 (define_insn "aarch64_<sur>qadd<mode>"
3505 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3506 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3507 (match_operand:VSDQ_I 2 "register_operand" "w")]
3510 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3511 [(set_attr "type" "neon_qadd<q>")]
3516 (define_insn "aarch64_sqmovun<mode>"
3517 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3518 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3521 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3522 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3525 ;; sqmovn and uqmovn
3527 (define_insn "aarch64_<sur>qmovn<mode>"
3528 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3529 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3532 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3533 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3538 (define_insn "aarch64_s<optab><mode>"
3539 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3541 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3543 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3544 [(set_attr "type" "neon_<optab><q>")]
3549 (define_insn "aarch64_sq<r>dmulh<mode>"
3550 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3552 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3553 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3556 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3557 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3562 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3563 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3565 [(match_operand:VDQHS 1 "register_operand" "w")
3567 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3568 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3572 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3573 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3574 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3577 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3578 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3580 [(match_operand:VDQHS 1 "register_operand" "w")
3582 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3583 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3587 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3588 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3589 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3592 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3593 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3595 [(match_operand:SD_HSI 1 "register_operand" "w")
3597 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3598 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3602 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3603 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3604 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3607 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3608 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3610 [(match_operand:SD_HSI 1 "register_operand" "w")
3612 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3613 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3617 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3618 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3619 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3624 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3625 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3627 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3628 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3629 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3632 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3633 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3636 ;; sqrdml[as]h_lane.
3638 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3639 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3641 [(match_operand:VDQHS 1 "register_operand" "0")
3642 (match_operand:VDQHS 2 "register_operand" "w")
3644 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3645 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3649 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3651 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3653 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3656 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3657 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3659 [(match_operand:SD_HSI 1 "register_operand" "0")
3660 (match_operand:SD_HSI 2 "register_operand" "w")
3662 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3663 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3667 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3669 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3671 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3674 ;; sqrdml[as]h_laneq.
3676 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3677 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3679 [(match_operand:VDQHS 1 "register_operand" "0")
3680 (match_operand:VDQHS 2 "register_operand" "w")
3682 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3683 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3687 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3689 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3691 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3694 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3695 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3697 [(match_operand:SD_HSI 1 "register_operand" "0")
3698 (match_operand:SD_HSI 2 "register_operand" "w")
3700 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3701 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3705 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3707 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3709 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3714 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3715 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3717 (match_operand:<VWIDE> 1 "register_operand" "0")
3720 (sign_extend:<VWIDE>
3721 (match_operand:VSD_HSI 2 "register_operand" "w"))
3722 (sign_extend:<VWIDE>
3723 (match_operand:VSD_HSI 3 "register_operand" "w")))
3726 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3727 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3732 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3733 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3735 (match_operand:<VWIDE> 1 "register_operand" "0")
3738 (sign_extend:<VWIDE>
3739 (match_operand:VD_HSI 2 "register_operand" "w"))
3740 (sign_extend:<VWIDE>
3741 (vec_duplicate:VD_HSI
3743 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3744 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3749 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3751 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3753 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3756 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3757 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3759 (match_operand:<VWIDE> 1 "register_operand" "0")
3762 (sign_extend:<VWIDE>
3763 (match_operand:VD_HSI 2 "register_operand" "w"))
3764 (sign_extend:<VWIDE>
3765 (vec_duplicate:VD_HSI
3767 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3768 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3773 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3775 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3777 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3780 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3781 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3783 (match_operand:<VWIDE> 1 "register_operand" "0")
3786 (sign_extend:<VWIDE>
3787 (match_operand:SD_HSI 2 "register_operand" "w"))
3788 (sign_extend:<VWIDE>
3790 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3791 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3796 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3798 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3800 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3803 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3804 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3806 (match_operand:<VWIDE> 1 "register_operand" "0")
3809 (sign_extend:<VWIDE>
3810 (match_operand:SD_HSI 2 "register_operand" "w"))
3811 (sign_extend:<VWIDE>
3813 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3814 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3819 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3821 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3823 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3828 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3829 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3831 (match_operand:<VWIDE> 1 "register_operand" "0")
3834 (sign_extend:<VWIDE>
3835 (match_operand:VD_HSI 2 "register_operand" "w"))
3836 (sign_extend:<VWIDE>
3837 (vec_duplicate:VD_HSI
3838 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3841 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3842 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3847 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3848 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3850 (match_operand:<VWIDE> 1 "register_operand" "0")
3853 (sign_extend:<VWIDE>
3855 (match_operand:VQ_HSI 2 "register_operand" "w")
3856 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3857 (sign_extend:<VWIDE>
3859 (match_operand:VQ_HSI 3 "register_operand" "w")
3863 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3864 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3867 (define_expand "aarch64_sqdmlal2<mode>"
3868 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3869 (match_operand:<VWIDE> 1 "register_operand" "w")
3870 (match_operand:VQ_HSI 2 "register_operand" "w")
3871 (match_operand:VQ_HSI 3 "register_operand" "w")]
3874 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3875 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3876 operands[2], operands[3], p));
3880 (define_expand "aarch64_sqdmlsl2<mode>"
3881 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3882 (match_operand:<VWIDE> 1 "register_operand" "w")
3883 (match_operand:VQ_HSI 2 "register_operand" "w")
3884 (match_operand:VQ_HSI 3 "register_operand" "w")]
3887 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3888 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3889 operands[2], operands[3], p));
3895 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3896 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3898 (match_operand:<VWIDE> 1 "register_operand" "0")
3901 (sign_extend:<VWIDE>
3903 (match_operand:VQ_HSI 2 "register_operand" "w")
3904 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3905 (sign_extend:<VWIDE>
3906 (vec_duplicate:<VHALF>
3908 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3909 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3914 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3916 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3918 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3921 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3922 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3924 (match_operand:<VWIDE> 1 "register_operand" "0")
3927 (sign_extend:<VWIDE>
3929 (match_operand:VQ_HSI 2 "register_operand" "w")
3930 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3931 (sign_extend:<VWIDE>
3932 (vec_duplicate:<VHALF>
3934 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3935 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3940 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3942 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3944 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3947 (define_expand "aarch64_sqdmlal2_lane<mode>"
3948 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3949 (match_operand:<VWIDE> 1 "register_operand" "w")
3950 (match_operand:VQ_HSI 2 "register_operand" "w")
3951 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3952 (match_operand:SI 4 "immediate_operand" "i")]
3955 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3956 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3957 operands[2], operands[3],
3962 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3963 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3964 (match_operand:<VWIDE> 1 "register_operand" "w")
3965 (match_operand:VQ_HSI 2 "register_operand" "w")
3966 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3967 (match_operand:SI 4 "immediate_operand" "i")]
3970 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3971 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3972 operands[2], operands[3],
3977 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3978 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3979 (match_operand:<VWIDE> 1 "register_operand" "w")
3980 (match_operand:VQ_HSI 2 "register_operand" "w")
3981 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3982 (match_operand:SI 4 "immediate_operand" "i")]
3985 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3986 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3987 operands[2], operands[3],
3992 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3993 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3994 (match_operand:<VWIDE> 1 "register_operand" "w")
3995 (match_operand:VQ_HSI 2 "register_operand" "w")
3996 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3997 (match_operand:SI 4 "immediate_operand" "i")]
4000 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4001 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4002 operands[2], operands[3],
4007 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4008 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4010 (match_operand:<VWIDE> 1 "register_operand" "0")
4013 (sign_extend:<VWIDE>
4015 (match_operand:VQ_HSI 2 "register_operand" "w")
4016 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4017 (sign_extend:<VWIDE>
4018 (vec_duplicate:<VHALF>
4019 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4022 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4023 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4026 (define_expand "aarch64_sqdmlal2_n<mode>"
4027 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4028 (match_operand:<VWIDE> 1 "register_operand" "w")
4029 (match_operand:VQ_HSI 2 "register_operand" "w")
4030 (match_operand:<VEL> 3 "register_operand" "w")]
4033 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4034 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4035 operands[2], operands[3],
4040 (define_expand "aarch64_sqdmlsl2_n<mode>"
4041 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4042 (match_operand:<VWIDE> 1 "register_operand" "w")
4043 (match_operand:VQ_HSI 2 "register_operand" "w")
4044 (match_operand:<VEL> 3 "register_operand" "w")]
4047 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4048 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4049 operands[2], operands[3],
4056 (define_insn "aarch64_sqdmull<mode>"
4057 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4060 (sign_extend:<VWIDE>
4061 (match_operand:VSD_HSI 1 "register_operand" "w"))
4062 (sign_extend:<VWIDE>
4063 (match_operand:VSD_HSI 2 "register_operand" "w")))
4066 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4067 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4072 (define_insn "aarch64_sqdmull_lane<mode>"
4073 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4076 (sign_extend:<VWIDE>
4077 (match_operand:VD_HSI 1 "register_operand" "w"))
4078 (sign_extend:<VWIDE>
4079 (vec_duplicate:VD_HSI
4081 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4082 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4087 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4088 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4090 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4093 (define_insn "aarch64_sqdmull_laneq<mode>"
4094 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4097 (sign_extend:<VWIDE>
4098 (match_operand:VD_HSI 1 "register_operand" "w"))
4099 (sign_extend:<VWIDE>
4100 (vec_duplicate:VD_HSI
4102 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4103 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4108 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4109 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4111 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4114 (define_insn "aarch64_sqdmull_lane<mode>"
4115 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4118 (sign_extend:<VWIDE>
4119 (match_operand:SD_HSI 1 "register_operand" "w"))
4120 (sign_extend:<VWIDE>
4122 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4123 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4128 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4129 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4131 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4134 (define_insn "aarch64_sqdmull_laneq<mode>"
4135 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4138 (sign_extend:<VWIDE>
4139 (match_operand:SD_HSI 1 "register_operand" "w"))
4140 (sign_extend:<VWIDE>
4142 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4143 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4148 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4149 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4151 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4156 (define_insn "aarch64_sqdmull_n<mode>"
4157 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4160 (sign_extend:<VWIDE>
4161 (match_operand:VD_HSI 1 "register_operand" "w"))
4162 (sign_extend:<VWIDE>
4163 (vec_duplicate:VD_HSI
4164 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4168 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4169 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4176 (define_insn "aarch64_sqdmull2<mode>_internal"
4177 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4180 (sign_extend:<VWIDE>
4182 (match_operand:VQ_HSI 1 "register_operand" "w")
4183 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4184 (sign_extend:<VWIDE>
4186 (match_operand:VQ_HSI 2 "register_operand" "w")
4191 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4192 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4195 (define_expand "aarch64_sqdmull2<mode>"
4196 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4197 (match_operand:VQ_HSI 1 "register_operand" "w")
4198 (match_operand:VQ_HSI 2 "register_operand" "w")]
4201 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4202 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4209 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4210 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4213 (sign_extend:<VWIDE>
4215 (match_operand:VQ_HSI 1 "register_operand" "w")
4216 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4217 (sign_extend:<VWIDE>
4218 (vec_duplicate:<VHALF>
4220 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4221 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4226 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4227 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4229 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4232 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4233 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4236 (sign_extend:<VWIDE>
4238 (match_operand:VQ_HSI 1 "register_operand" "w")
4239 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4240 (sign_extend:<VWIDE>
4241 (vec_duplicate:<VHALF>
4243 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4244 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4249 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4250 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4252 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4255 (define_expand "aarch64_sqdmull2_lane<mode>"
4256 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4257 (match_operand:VQ_HSI 1 "register_operand" "w")
4258 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4259 (match_operand:SI 3 "immediate_operand" "i")]
4262 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4263 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4264 operands[2], operands[3],
4269 (define_expand "aarch64_sqdmull2_laneq<mode>"
4270 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4271 (match_operand:VQ_HSI 1 "register_operand" "w")
4272 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4273 (match_operand:SI 3 "immediate_operand" "i")]
4276 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4277 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4278 operands[2], operands[3],
4285 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4286 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4289 (sign_extend:<VWIDE>
4291 (match_operand:VQ_HSI 1 "register_operand" "w")
4292 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4293 (sign_extend:<VWIDE>
4294 (vec_duplicate:<VHALF>
4295 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4299 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4300 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4303 (define_expand "aarch64_sqdmull2_n<mode>"
4304 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4305 (match_operand:VQ_HSI 1 "register_operand" "w")
4306 (match_operand:<VEL> 2 "register_operand" "w")]
4309 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4310 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4317 (define_insn "aarch64_<sur>shl<mode>"
4318 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4320 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4321 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4324 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4325 [(set_attr "type" "neon_shift_reg<q>")]
4331 (define_insn "aarch64_<sur>q<r>shl<mode>"
4332 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4334 [(match_operand:VSDQ_I 1 "register_operand" "w")
4335 (match_operand:VSDQ_I 2 "register_operand" "w")]
4338 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4339 [(set_attr "type" "neon_sat_shift_reg<q>")]
4344 (define_insn "aarch64_<sur>shll_n<mode>"
4345 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4346 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4348 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4352 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4353 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4355 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4357 [(set_attr "type" "neon_shift_imm_long")]
4362 (define_insn "aarch64_<sur>shll2_n<mode>"
4363 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4364 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4365 (match_operand:SI 2 "immediate_operand" "i")]
4369 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4370 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4372 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4374 [(set_attr "type" "neon_shift_imm_long")]
4379 (define_insn "aarch64_<sur>shr_n<mode>"
4380 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4381 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4383 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4386 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4387 [(set_attr "type" "neon_sat_shift_imm<q>")]
4392 (define_insn "aarch64_<sur>sra_n<mode>"
4393 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4394 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4395 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4397 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4400 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4401 [(set_attr "type" "neon_shift_acc<q>")]
4406 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4407 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4408 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4409 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4411 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4414 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4415 [(set_attr "type" "neon_shift_imm<q>")]
4420 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4421 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4422 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4424 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4427 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4428 [(set_attr "type" "neon_sat_shift_imm<q>")]
4434 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4435 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4436 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4438 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4441 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4442 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4446 ;; cm(eq|ge|gt|lt|le)
4447 ;; Note, we have constraints for Dz and Z as different expanders
4448 ;; have different ideas of what should be passed to this pattern.
4450 (define_insn "aarch64_cm<optab><mode>"
4451 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4453 (COMPARISONS:<V_INT_EQUIV>
4454 (match_operand:VDQ_I 1 "register_operand" "w,w")
4455 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4459 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4460 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4461 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4464 (define_insn_and_split "aarch64_cm<optab>di"
4465 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4468 (match_operand:DI 1 "register_operand" "w,w,r")
4469 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4471 (clobber (reg:CC CC_REGNUM))]
4474 "&& reload_completed"
4475 [(set (match_operand:DI 0 "register_operand")
4478 (match_operand:DI 1 "register_operand")
4479 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4482 /* If we are in the general purpose register file,
4483 we split to a sequence of comparison and store. */
4484 if (GP_REGNUM_P (REGNO (operands[0]))
4485 && GP_REGNUM_P (REGNO (operands[1])))
4487 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4488 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4489 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4490 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4493 /* Otherwise, we expand to a similar pattern which does not
4494 clobber CC_REGNUM. */
4496 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4499 (define_insn "*aarch64_cm<optab>di"
4500 [(set (match_operand:DI 0 "register_operand" "=w,w")
4503 (match_operand:DI 1 "register_operand" "w,w")
4504 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4506 "TARGET_SIMD && reload_completed"
4508 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4509 cm<optab>\t%d0, %d1, #0"
4510 [(set_attr "type" "neon_compare, neon_compare_zero")]
4515 (define_insn "aarch64_cm<optab><mode>"
4516 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4518 (UCOMPARISONS:<V_INT_EQUIV>
4519 (match_operand:VDQ_I 1 "register_operand" "w")
4520 (match_operand:VDQ_I 2 "register_operand" "w")
4523 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4524 [(set_attr "type" "neon_compare<q>")]
4527 (define_insn_and_split "aarch64_cm<optab>di"
4528 [(set (match_operand:DI 0 "register_operand" "=w,r")
4531 (match_operand:DI 1 "register_operand" "w,r")
4532 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4534 (clobber (reg:CC CC_REGNUM))]
4537 "&& reload_completed"
4538 [(set (match_operand:DI 0 "register_operand")
4541 (match_operand:DI 1 "register_operand")
4542 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4545 /* If we are in the general purpose register file,
4546 we split to a sequence of comparison and store. */
4547 if (GP_REGNUM_P (REGNO (operands[0]))
4548 && GP_REGNUM_P (REGNO (operands[1])))
4550 machine_mode mode = CCmode;
4551 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4552 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4553 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4556 /* Otherwise, we expand to a similar pattern which does not
4557 clobber CC_REGNUM. */
4559 [(set_attr "type" "neon_compare,multiple")]
4562 (define_insn "*aarch64_cm<optab>di"
4563 [(set (match_operand:DI 0 "register_operand" "=w")
4566 (match_operand:DI 1 "register_operand" "w")
4567 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4569 "TARGET_SIMD && reload_completed"
4570 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4571 [(set_attr "type" "neon_compare")]
4576 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4577 ;; we don't have any insns using ne, and aarch64_vcond outputs
4578 ;; not (neg (eq (and x y) 0))
4579 ;; which is rewritten by simplify_rtx as
4580 ;; plus (eq (and x y) 0) -1.
4582 (define_insn "aarch64_cmtst<mode>"
4583 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4587 (match_operand:VDQ_I 1 "register_operand" "w")
4588 (match_operand:VDQ_I 2 "register_operand" "w"))
4589 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4590 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4593 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4594 [(set_attr "type" "neon_tst<q>")]
4597 (define_insn_and_split "aarch64_cmtstdi"
4598 [(set (match_operand:DI 0 "register_operand" "=w,r")
4602 (match_operand:DI 1 "register_operand" "w,r")
4603 (match_operand:DI 2 "register_operand" "w,r"))
4605 (clobber (reg:CC CC_REGNUM))]
4608 "&& reload_completed"
4609 [(set (match_operand:DI 0 "register_operand")
4613 (match_operand:DI 1 "register_operand")
4614 (match_operand:DI 2 "register_operand"))
4617 /* If we are in the general purpose register file,
4618 we split to a sequence of comparison and store. */
4619 if (GP_REGNUM_P (REGNO (operands[0]))
4620 && GP_REGNUM_P (REGNO (operands[1])))
4622 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4623 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4624 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4625 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4626 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4629 /* Otherwise, we expand to a similar pattern which does not
4630 clobber CC_REGNUM. */
4632 [(set_attr "type" "neon_tst,multiple")]
4635 (define_insn "*aarch64_cmtstdi"
4636 [(set (match_operand:DI 0 "register_operand" "=w")
4640 (match_operand:DI 1 "register_operand" "w")
4641 (match_operand:DI 2 "register_operand" "w"))
4644 "cmtst\t%d0, %d1, %d2"
4645 [(set_attr "type" "neon_tst")]
4648 ;; fcm(eq|ge|gt|le|lt)
4650 (define_insn "aarch64_cm<optab><mode>"
4651 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4653 (COMPARISONS:<V_INT_EQUIV>
4654 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4655 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4659 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4660 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4661 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4665 ;; Note we can also handle what would be fac(le|lt) by
4666 ;; generating fac(ge|gt).
4668 (define_insn "aarch64_fac<optab><mode>"
4669 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4671 (FAC_COMPARISONS:<V_INT_EQUIV>
4673 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4675 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4678 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4679 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4684 (define_insn "aarch64_addp<mode>"
4685 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4687 [(match_operand:VD_BHSI 1 "register_operand" "w")
4688 (match_operand:VD_BHSI 2 "register_operand" "w")]
4691 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4692 [(set_attr "type" "neon_reduc_add<q>")]
4695 (define_insn "aarch64_addpdi"
4696 [(set (match_operand:DI 0 "register_operand" "=w")
4698 [(match_operand:V2DI 1 "register_operand" "w")]
4702 [(set_attr "type" "neon_reduc_add")]
4707 (define_expand "sqrt<mode>2"
4708 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4709 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4712 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4716 (define_insn "*sqrt<mode>2"
4717 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4718 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4720 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4721 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4724 ;; Patterns for vector struct loads and stores.
4726 (define_insn "aarch64_simd_ld2<mode>"
4727 [(set (match_operand:OI 0 "register_operand" "=w")
4728 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4729 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4732 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4733 [(set_attr "type" "neon_load2_2reg<q>")]
4736 (define_insn "aarch64_simd_ld2r<mode>"
4737 [(set (match_operand:OI 0 "register_operand" "=w")
4738 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4739 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4742 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4743 [(set_attr "type" "neon_load2_all_lanes<q>")]
4746 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4747 [(set (match_operand:OI 0 "register_operand" "=w")
4748 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4749 (match_operand:OI 2 "register_operand" "0")
4750 (match_operand:SI 3 "immediate_operand" "i")
4751 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4755 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4756 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4758 [(set_attr "type" "neon_load2_one_lane")]
4761 (define_expand "vec_load_lanesoi<mode>"
4762 [(set (match_operand:OI 0 "register_operand" "=w")
4763 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4764 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4768 if (BYTES_BIG_ENDIAN)
4770 rtx tmp = gen_reg_rtx (OImode);
4771 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4772 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4773 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4776 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4780 (define_insn "aarch64_simd_st2<mode>"
4781 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4782 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4783 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4786 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4787 [(set_attr "type" "neon_store2_2reg<q>")]
4790 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4791 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4792 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4793 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4794 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4795 (match_operand:SI 2 "immediate_operand" "i")]
4799 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4800 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4802 [(set_attr "type" "neon_store2_one_lane<q>")]
4805 (define_expand "vec_store_lanesoi<mode>"
4806 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4807 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4808 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4812 if (BYTES_BIG_ENDIAN)
4814 rtx tmp = gen_reg_rtx (OImode);
4815 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4816 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4817 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4820 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4824 (define_insn "aarch64_simd_ld3<mode>"
4825 [(set (match_operand:CI 0 "register_operand" "=w")
4826 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4827 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4830 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4831 [(set_attr "type" "neon_load3_3reg<q>")]
4834 (define_insn "aarch64_simd_ld3r<mode>"
4835 [(set (match_operand:CI 0 "register_operand" "=w")
4836 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4837 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4840 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4841 [(set_attr "type" "neon_load3_all_lanes<q>")]
4844 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4845 [(set (match_operand:CI 0 "register_operand" "=w")
4846 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4847 (match_operand:CI 2 "register_operand" "0")
4848 (match_operand:SI 3 "immediate_operand" "i")
4849 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4853 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4854 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4856 [(set_attr "type" "neon_load3_one_lane")]
4859 (define_expand "vec_load_lanesci<mode>"
4860 [(set (match_operand:CI 0 "register_operand" "=w")
4861 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4862 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4866 if (BYTES_BIG_ENDIAN)
4868 rtx tmp = gen_reg_rtx (CImode);
4869 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4870 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4871 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4874 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4878 (define_insn "aarch64_simd_st3<mode>"
4879 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4880 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4881 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4884 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4885 [(set_attr "type" "neon_store3_3reg<q>")]
4888 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4889 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4890 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4891 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4892 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4893 (match_operand:SI 2 "immediate_operand" "i")]
4897 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4898 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4900 [(set_attr "type" "neon_store3_one_lane<q>")]
4903 (define_expand "vec_store_lanesci<mode>"
4904 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4905 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4906 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4910 if (BYTES_BIG_ENDIAN)
4912 rtx tmp = gen_reg_rtx (CImode);
4913 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4914 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4915 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4918 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4922 (define_insn "aarch64_simd_ld4<mode>"
4923 [(set (match_operand:XI 0 "register_operand" "=w")
4924 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4925 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4928 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4929 [(set_attr "type" "neon_load4_4reg<q>")]
4932 (define_insn "aarch64_simd_ld4r<mode>"
4933 [(set (match_operand:XI 0 "register_operand" "=w")
4934 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4935 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4938 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4939 [(set_attr "type" "neon_load4_all_lanes<q>")]
4942 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4943 [(set (match_operand:XI 0 "register_operand" "=w")
4944 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4945 (match_operand:XI 2 "register_operand" "0")
4946 (match_operand:SI 3 "immediate_operand" "i")
4947 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4951 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4952 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4954 [(set_attr "type" "neon_load4_one_lane")]
4957 (define_expand "vec_load_lanesxi<mode>"
4958 [(set (match_operand:XI 0 "register_operand" "=w")
4959 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4960 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4964 if (BYTES_BIG_ENDIAN)
4966 rtx tmp = gen_reg_rtx (XImode);
4967 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4968 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4969 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4972 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4976 (define_insn "aarch64_simd_st4<mode>"
4977 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4978 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4979 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4982 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4983 [(set_attr "type" "neon_store4_4reg<q>")]
4986 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4987 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4988 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4989 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4990 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4991 (match_operand:SI 2 "immediate_operand" "i")]
4995 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4996 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
4998 [(set_attr "type" "neon_store4_one_lane<q>")]
5001 (define_expand "vec_store_lanesxi<mode>"
5002 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5003 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5004 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5008 if (BYTES_BIG_ENDIAN)
5010 rtx tmp = gen_reg_rtx (XImode);
5011 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5012 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5013 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5016 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5020 (define_insn_and_split "aarch64_rev_reglist<mode>"
5021 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5023 [(match_operand:VSTRUCT 1 "register_operand" "w")
5024 (match_operand:V16QI 2 "register_operand" "w")]
5025 UNSPEC_REV_REGLIST))]
5028 "&& reload_completed"
5032 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5033 for (i = 0; i < nregs; i++)
5035 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5036 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5037 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5041 [(set_attr "type" "neon_tbl1_q")
5042 (set_attr "length" "<insn_count>")]
5045 ;; Reload patterns for AdvSIMD register list operands.
5047 (define_expand "mov<mode>"
5048 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
5049 (match_operand:VSTRUCT 1 "general_operand" ""))]
5052 if (can_create_pseudo_p ())
5054 if (GET_CODE (operands[0]) != REG)
5055 operands[1] = force_reg (<MODE>mode, operands[1]);
5060 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5061 [(match_operand:CI 0 "register_operand" "=w")
5062 (match_operand:DI 1 "register_operand" "r")
5063 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5066 rtx mem = gen_rtx_MEM (CImode, operands[1]);
5067 emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5071 (define_insn "aarch64_ld1_x3_<mode>"
5072 [(set (match_operand:CI 0 "register_operand" "=w")
5074 [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5075 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5077 "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5078 [(set_attr "type" "neon_load1_3reg<q>")]
5081 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5082 [(match_operand:DI 0 "register_operand" "")
5083 (match_operand:OI 1 "register_operand" "")
5084 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5087 rtx mem = gen_rtx_MEM (OImode, operands[0]);
5088 emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5092 (define_insn "aarch64_st1_x2_<mode>"
5093 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5095 [(match_operand:OI 1 "register_operand" "w")
5096 (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5098 "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5099 [(set_attr "type" "neon_store1_2reg<q>")]
5102 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5103 [(match_operand:DI 0 "register_operand" "")
5104 (match_operand:CI 1 "register_operand" "")
5105 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5108 rtx mem = gen_rtx_MEM (CImode, operands[0]);
5109 emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5113 (define_insn "aarch64_st1_x3_<mode>"
5114 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5116 [(match_operand:CI 1 "register_operand" "w")
5117 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5119 "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5120 [(set_attr "type" "neon_store1_3reg<q>")]
5123 (define_insn "*aarch64_mov<mode>"
5124 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5125 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5126 "TARGET_SIMD && !BYTES_BIG_ENDIAN
5127 && (register_operand (operands[0], <MODE>mode)
5128 || register_operand (operands[1], <MODE>mode))"
5131 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5132 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5133 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5134 neon_load<nregs>_<nregs>reg_q")
5135 (set_attr "length" "<insn_count>,4,4")]
5138 (define_insn "aarch64_be_ld1<mode>"
5139 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
5140 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5141 "aarch64_simd_struct_operand" "Utv")]
5144 "ld1\\t{%0<Vmtype>}, %1"
5145 [(set_attr "type" "neon_load1_1reg<q>")]
5148 (define_insn "aarch64_be_st1<mode>"
5149 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5150 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5153 "st1\\t{%1<Vmtype>}, %0"
5154 [(set_attr "type" "neon_store1_1reg<q>")]
5157 (define_insn "*aarch64_be_movoi"
5158 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5159 (match_operand:OI 1 "general_operand" " w,w,m"))]
5160 "TARGET_SIMD && BYTES_BIG_ENDIAN
5161 && (register_operand (operands[0], OImode)
5162 || register_operand (operands[1], OImode))"
5167 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5168 (set_attr "length" "8,4,4")]
5171 (define_insn "*aarch64_be_movci"
5172 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5173 (match_operand:CI 1 "general_operand" " w,w,o"))]
5174 "TARGET_SIMD && BYTES_BIG_ENDIAN
5175 && (register_operand (operands[0], CImode)
5176 || register_operand (operands[1], CImode))"
5178 [(set_attr "type" "multiple")
5179 (set_attr "length" "12,4,4")]
5182 (define_insn "*aarch64_be_movxi"
5183 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5184 (match_operand:XI 1 "general_operand" " w,w,o"))]
5185 "TARGET_SIMD && BYTES_BIG_ENDIAN
5186 && (register_operand (operands[0], XImode)
5187 || register_operand (operands[1], XImode))"
5189 [(set_attr "type" "multiple")
5190 (set_attr "length" "16,4,4")]
5194 [(set (match_operand:OI 0 "register_operand")
5195 (match_operand:OI 1 "register_operand"))]
5196 "TARGET_SIMD && reload_completed"
5199 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5204 [(set (match_operand:CI 0 "nonimmediate_operand")
5205 (match_operand:CI 1 "general_operand"))]
5206 "TARGET_SIMD && reload_completed"
5209 if (register_operand (operands[0], CImode)
5210 && register_operand (operands[1], CImode))
5212 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5215 else if (BYTES_BIG_ENDIAN)
5217 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5218 simplify_gen_subreg (OImode, operands[1], CImode, 0));
5219 emit_move_insn (gen_lowpart (V16QImode,
5220 simplify_gen_subreg (TImode, operands[0],
5222 gen_lowpart (V16QImode,
5223 simplify_gen_subreg (TImode, operands[1],
5232 [(set (match_operand:XI 0 "nonimmediate_operand")
5233 (match_operand:XI 1 "general_operand"))]
5234 "TARGET_SIMD && reload_completed"
5237 if (register_operand (operands[0], XImode)
5238 && register_operand (operands[1], XImode))
5240 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5243 else if (BYTES_BIG_ENDIAN)
5245 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5246 simplify_gen_subreg (OImode, operands[1], XImode, 0));
5247 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5248 simplify_gen_subreg (OImode, operands[1], XImode, 32));
5255 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5256 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5257 (match_operand:DI 1 "register_operand" "w")
5258 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5261 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5262 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5265 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5270 (define_insn "aarch64_ld2<mode>_dreg"
5271 [(set (match_operand:OI 0 "register_operand" "=w")
5272 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5273 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5276 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5277 [(set_attr "type" "neon_load2_2reg<q>")]
5280 (define_insn "aarch64_ld2<mode>_dreg"
5281 [(set (match_operand:OI 0 "register_operand" "=w")
5282 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5283 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5286 "ld1\\t{%S0.1d - %T0.1d}, %1"
5287 [(set_attr "type" "neon_load1_2reg<q>")]
5290 (define_insn "aarch64_ld3<mode>_dreg"
5291 [(set (match_operand:CI 0 "register_operand" "=w")
5292 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5293 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5296 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5297 [(set_attr "type" "neon_load3_3reg<q>")]
5300 (define_insn "aarch64_ld3<mode>_dreg"
5301 [(set (match_operand:CI 0 "register_operand" "=w")
5302 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5303 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5306 "ld1\\t{%S0.1d - %U0.1d}, %1"
5307 [(set_attr "type" "neon_load1_3reg<q>")]
5310 (define_insn "aarch64_ld4<mode>_dreg"
5311 [(set (match_operand:XI 0 "register_operand" "=w")
5312 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5313 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5316 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5317 [(set_attr "type" "neon_load4_4reg<q>")]
5320 (define_insn "aarch64_ld4<mode>_dreg"
5321 [(set (match_operand:XI 0 "register_operand" "=w")
5322 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5323 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5326 "ld1\\t{%S0.1d - %V0.1d}, %1"
5327 [(set_attr "type" "neon_load1_4reg<q>")]
5330 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5331 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5332 (match_operand:DI 1 "register_operand" "r")
5333 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5336 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5337 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5339 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5343 (define_expand "aarch64_ld1<VALL_F16:mode>"
5344 [(match_operand:VALL_F16 0 "register_operand")
5345 (match_operand:DI 1 "register_operand")]
5348 machine_mode mode = <VALL_F16:MODE>mode;
5349 rtx mem = gen_rtx_MEM (mode, operands[1]);
5351 if (BYTES_BIG_ENDIAN)
5352 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5354 emit_move_insn (operands[0], mem);
5358 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5359 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5360 (match_operand:DI 1 "register_operand" "r")
5361 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5364 machine_mode mode = <VSTRUCT:MODE>mode;
5365 rtx mem = gen_rtx_MEM (mode, operands[1]);
5367 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5371 (define_expand "aarch64_ld1x2<VQ:mode>"
5372 [(match_operand:OI 0 "register_operand" "=w")
5373 (match_operand:DI 1 "register_operand" "r")
5374 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5377 machine_mode mode = OImode;
5378 rtx mem = gen_rtx_MEM (mode, operands[1]);
5380 emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5384 (define_expand "aarch64_ld1x2<VDC:mode>"
5385 [(match_operand:OI 0 "register_operand" "=w")
5386 (match_operand:DI 1 "register_operand" "r")
5387 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5390 machine_mode mode = OImode;
5391 rtx mem = gen_rtx_MEM (mode, operands[1]);
5393 emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5398 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5399 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5400 (match_operand:DI 1 "register_operand" "w")
5401 (match_operand:VSTRUCT 2 "register_operand" "0")
5402 (match_operand:SI 3 "immediate_operand" "i")
5403 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5406 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5407 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5410 aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5411 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5412 operands[0], mem, operands[2], operands[3]));
5416 ;; Expanders for builtins to extract vector registers from large
5417 ;; opaque integer modes.
5421 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5422 [(match_operand:VDC 0 "register_operand" "=w")
5423 (match_operand:VSTRUCT 1 "register_operand" "w")
5424 (match_operand:SI 2 "immediate_operand" "i")]
5427 int part = INTVAL (operands[2]);
5428 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5429 int offset = part * 16;
5431 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5432 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5438 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5439 [(match_operand:VQ 0 "register_operand" "=w")
5440 (match_operand:VSTRUCT 1 "register_operand" "w")
5441 (match_operand:SI 2 "immediate_operand" "i")]
5444 int part = INTVAL (operands[2]);
5445 int offset = part * 16;
5447 emit_move_insn (operands[0],
5448 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5452 ;; Permuted-store expanders for neon intrinsics.
5454 ;; Permute instructions
5458 (define_expand "vec_perm<mode>"
5459 [(match_operand:VB 0 "register_operand")
5460 (match_operand:VB 1 "register_operand")
5461 (match_operand:VB 2 "register_operand")
5462 (match_operand:VB 3 "register_operand")]
5465 aarch64_expand_vec_perm (operands[0], operands[1],
5466 operands[2], operands[3], <nunits>);
5470 (define_insn "aarch64_tbl1<mode>"
5471 [(set (match_operand:VB 0 "register_operand" "=w")
5472 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5473 (match_operand:VB 2 "register_operand" "w")]
5476 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5477 [(set_attr "type" "neon_tbl1<q>")]
5480 ;; Two source registers.
5482 (define_insn "aarch64_tbl2v16qi"
5483 [(set (match_operand:V16QI 0 "register_operand" "=w")
5484 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5485 (match_operand:V16QI 2 "register_operand" "w")]
5488 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5489 [(set_attr "type" "neon_tbl2_q")]
5492 (define_insn "aarch64_tbl3<mode>"
5493 [(set (match_operand:VB 0 "register_operand" "=w")
5494 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5495 (match_operand:VB 2 "register_operand" "w")]
5498 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5499 [(set_attr "type" "neon_tbl3")]
5502 (define_insn "aarch64_tbx4<mode>"
5503 [(set (match_operand:VB 0 "register_operand" "=w")
5504 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5505 (match_operand:OI 2 "register_operand" "w")
5506 (match_operand:VB 3 "register_operand" "w")]
5509 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5510 [(set_attr "type" "neon_tbl4")]
5513 ;; Three source registers.
5515 (define_insn "aarch64_qtbl3<mode>"
5516 [(set (match_operand:VB 0 "register_operand" "=w")
5517 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5518 (match_operand:VB 2 "register_operand" "w")]
5521 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5522 [(set_attr "type" "neon_tbl3")]
5525 (define_insn "aarch64_qtbx3<mode>"
5526 [(set (match_operand:VB 0 "register_operand" "=w")
5527 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5528 (match_operand:CI 2 "register_operand" "w")
5529 (match_operand:VB 3 "register_operand" "w")]
5532 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5533 [(set_attr "type" "neon_tbl3")]
5536 ;; Four source registers.
5538 (define_insn "aarch64_qtbl4<mode>"
5539 [(set (match_operand:VB 0 "register_operand" "=w")
5540 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5541 (match_operand:VB 2 "register_operand" "w")]
5544 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5545 [(set_attr "type" "neon_tbl4")]
5548 (define_insn "aarch64_qtbx4<mode>"
5549 [(set (match_operand:VB 0 "register_operand" "=w")
5550 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5551 (match_operand:XI 2 "register_operand" "w")
5552 (match_operand:VB 3 "register_operand" "w")]
5555 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5556 [(set_attr "type" "neon_tbl4")]
5559 (define_insn_and_split "aarch64_combinev16qi"
5560 [(set (match_operand:OI 0 "register_operand" "=w")
5561 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5562 (match_operand:V16QI 2 "register_operand" "w")]
5566 "&& reload_completed"
5569 aarch64_split_combinev16qi (operands);
5572 [(set_attr "type" "multiple")]
5575 ;; This instruction's pattern is generated directly by
5576 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5577 ;; need corresponding changes there.
5578 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5579 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5580 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5581 (match_operand:VALL_F16 2 "register_operand" "w")]
5584 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5585 [(set_attr "type" "neon_permute<q>")]
5588 ;; This instruction's pattern is generated directly by
5589 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5590 ;; need corresponding changes there. Note that the immediate (third)
5591 ;; operand is a lane index not a byte index.
5592 (define_insn "aarch64_ext<mode>"
5593 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5594 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5595 (match_operand:VALL_F16 2 "register_operand" "w")
5596 (match_operand:SI 3 "immediate_operand" "i")]
5600 operands[3] = GEN_INT (INTVAL (operands[3])
5601 * GET_MODE_UNIT_SIZE (<MODE>mode));
5602 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5604 [(set_attr "type" "neon_ext<q>")]
5607 ;; This instruction's pattern is generated directly by
5608 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5609 ;; need corresponding changes there.
5610 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5611 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5612 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5615 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5616 [(set_attr "type" "neon_rev<q>")]
5619 (define_insn "aarch64_st2<mode>_dreg"
5620 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5621 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5622 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5625 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5626 [(set_attr "type" "neon_store2_2reg")]
5629 (define_insn "aarch64_st2<mode>_dreg"
5630 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5631 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5632 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5635 "st1\\t{%S1.1d - %T1.1d}, %0"
5636 [(set_attr "type" "neon_store1_2reg")]
5639 (define_insn "aarch64_st3<mode>_dreg"
5640 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5641 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5642 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5645 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5646 [(set_attr "type" "neon_store3_3reg")]
5649 (define_insn "aarch64_st3<mode>_dreg"
5650 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5651 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5652 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5655 "st1\\t{%S1.1d - %U1.1d}, %0"
5656 [(set_attr "type" "neon_store1_3reg")]
5659 (define_insn "aarch64_st4<mode>_dreg"
5660 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5661 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5662 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5665 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5666 [(set_attr "type" "neon_store4_4reg")]
5669 (define_insn "aarch64_st4<mode>_dreg"
5670 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5671 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5672 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5675 "st1\\t{%S1.1d - %V1.1d}, %0"
5676 [(set_attr "type" "neon_store1_4reg")]
5679 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5680 [(match_operand:DI 0 "register_operand" "r")
5681 (match_operand:VSTRUCT 1 "register_operand" "w")
5682 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5685 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5686 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5688 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5692 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5693 [(match_operand:DI 0 "register_operand" "r")
5694 (match_operand:VSTRUCT 1 "register_operand" "w")
5695 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5698 machine_mode mode = <VSTRUCT:MODE>mode;
5699 rtx mem = gen_rtx_MEM (mode, operands[0]);
5701 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5705 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5706 [(match_operand:DI 0 "register_operand" "r")
5707 (match_operand:VSTRUCT 1 "register_operand" "w")
5708 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5709 (match_operand:SI 2 "immediate_operand")]
5712 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5713 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5716 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5717 mem, operands[1], operands[2]));
5721 (define_expand "aarch64_st1<VALL_F16:mode>"
5722 [(match_operand:DI 0 "register_operand")
5723 (match_operand:VALL_F16 1 "register_operand")]
5726 machine_mode mode = <VALL_F16:MODE>mode;
5727 rtx mem = gen_rtx_MEM (mode, operands[0]);
5729 if (BYTES_BIG_ENDIAN)
5730 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5732 emit_move_insn (mem, operands[1]);
5736 ;; Expander for builtins to insert vector registers into large
5737 ;; opaque integer modes.
5739 ;; Q-register list. We don't need a D-reg inserter as we zero
5740 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5742 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5743 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5744 (match_operand:VSTRUCT 1 "register_operand" "0")
5745 (match_operand:VQ 2 "register_operand" "w")
5746 (match_operand:SI 3 "immediate_operand" "i")]
5749 int part = INTVAL (operands[3]);
5750 int offset = part * 16;
5752 emit_move_insn (operands[0], operands[1]);
5753 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5758 ;; Standard pattern name vec_init<mode><Vel>.
5760 (define_expand "vec_init<mode><Vel>"
5761 [(match_operand:VALL_F16 0 "register_operand" "")
5762 (match_operand 1 "" "")]
5765 aarch64_expand_vector_init (operands[0], operands[1]);
5769 (define_insn "*aarch64_simd_ld1r<mode>"
5770 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5771 (vec_duplicate:VALL_F16
5772 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5774 "ld1r\\t{%0.<Vtype>}, %1"
5775 [(set_attr "type" "neon_load1_all_lanes")]
5778 (define_insn "aarch64_simd_ld1<mode>_x2"
5779 [(set (match_operand:OI 0 "register_operand" "=w")
5780 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5781 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5784 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5785 [(set_attr "type" "neon_load1_2reg<q>")]
5788 (define_insn "aarch64_simd_ld1<mode>_x2"
5789 [(set (match_operand:OI 0 "register_operand" "=w")
5790 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5791 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5794 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5795 [(set_attr "type" "neon_load1_2reg<q>")]
5799 (define_insn "aarch64_frecpe<mode>"
5800 [(set (match_operand:VHSDF 0 "register_operand" "=w")
5801 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5804 "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5805 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5808 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5809 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5810 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5813 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5814 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5817 (define_insn "aarch64_frecps<mode>"
5818 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5820 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5821 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5824 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5825 [(set_attr "type" "neon_fp_recps_<stype><q>")]
5828 (define_insn "aarch64_urecpe<mode>"
5829 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5830 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5833 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5834 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5836 ;; Standard pattern name vec_extract<mode><Vel>.
5838 (define_expand "vec_extract<mode><Vel>"
5839 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5840 (match_operand:VALL_F16 1 "register_operand" "")
5841 (match_operand:SI 2 "immediate_operand" "")]
5845 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5851 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5852 [(set (match_operand:V16QI 0 "register_operand" "=w")
5853 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5854 (match_operand:V16QI 2 "register_operand" "w")]
5856 "TARGET_SIMD && TARGET_AES"
5857 "aes<aes_op>\\t%0.16b, %2.16b"
5858 [(set_attr "type" "crypto_aese")]
5861 ;; When AES/AESMC fusion is enabled we want the register allocation to
5865 ;; So prefer to tie operand 1 to operand 0 when fusing.
5867 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5868 [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5869 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5871 "TARGET_SIMD && TARGET_AES"
5872 "aes<aesmc_op>\\t%0.16b, %1.16b"
5873 [(set_attr "type" "crypto_aesmc")
5874 (set_attr_alternative "enabled"
5875 [(if_then_else (match_test
5876 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5877 (const_string "yes" )
5878 (const_string "no"))
5879 (const_string "yes")])]
5882 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
5883 ;; and enforce the register dependency without scheduling or register
5884 ;; allocation messing up the order or introducing moves inbetween.
5885 ;; Mash the two together during combine.
5887 (define_insn "*aarch64_crypto_aese_fused"
5888 [(set (match_operand:V16QI 0 "register_operand" "=&w")
5891 [(match_operand:V16QI 1 "register_operand" "0")
5892 (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE)
5894 "TARGET_SIMD && TARGET_AES
5895 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
5896 "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
5897 [(set_attr "type" "crypto_aese")
5898 (set_attr "length" "8")]
5901 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
5902 ;; and enforce the register dependency without scheduling or register
5903 ;; allocation messing up the order or introducing moves inbetween.
5904 ;; Mash the two together during combine.
5906 (define_insn "*aarch64_crypto_aesd_fused"
5907 [(set (match_operand:V16QI 0 "register_operand" "=&w")
5910 [(match_operand:V16QI 1 "register_operand" "0")
5911 (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD)
5913 "TARGET_SIMD && TARGET_AES
5914 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
5915 "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
5916 [(set_attr "type" "crypto_aese")
5917 (set_attr "length" "8")]
5922 (define_insn "aarch64_crypto_sha1hsi"
5923 [(set (match_operand:SI 0 "register_operand" "=w")
5924 (unspec:SI [(match_operand:SI 1
5925 "register_operand" "w")]
5927 "TARGET_SIMD && TARGET_SHA2"
5929 [(set_attr "type" "crypto_sha1_fast")]
5932 (define_insn "aarch64_crypto_sha1hv4si"
5933 [(set (match_operand:SI 0 "register_operand" "=w")
5934 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5935 (parallel [(const_int 0)]))]
5937 "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
5939 [(set_attr "type" "crypto_sha1_fast")]
5942 (define_insn "aarch64_be_crypto_sha1hv4si"
5943 [(set (match_operand:SI 0 "register_operand" "=w")
5944 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5945 (parallel [(const_int 3)]))]
5947 "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
5949 [(set_attr "type" "crypto_sha1_fast")]
5952 (define_insn "aarch64_crypto_sha1su1v4si"
5953 [(set (match_operand:V4SI 0 "register_operand" "=w")
5954 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5955 (match_operand:V4SI 2 "register_operand" "w")]
5957 "TARGET_SIMD && TARGET_SHA2"
5958 "sha1su1\\t%0.4s, %2.4s"
5959 [(set_attr "type" "crypto_sha1_fast")]
5962 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5963 [(set (match_operand:V4SI 0 "register_operand" "=w")
5964 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5965 (match_operand:SI 2 "register_operand" "w")
5966 (match_operand:V4SI 3 "register_operand" "w")]
5968 "TARGET_SIMD && TARGET_SHA2"
5969 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5970 [(set_attr "type" "crypto_sha1_slow")]
5973 (define_insn "aarch64_crypto_sha1su0v4si"
5974 [(set (match_operand:V4SI 0 "register_operand" "=w")
5975 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5976 (match_operand:V4SI 2 "register_operand" "w")
5977 (match_operand:V4SI 3 "register_operand" "w")]
5979 "TARGET_SIMD && TARGET_SHA2"
5980 "sha1su0\\t%0.4s, %2.4s, %3.4s"
5981 [(set_attr "type" "crypto_sha1_xor")]
5986 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
5987 [(set (match_operand:V4SI 0 "register_operand" "=w")
5988 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5989 (match_operand:V4SI 2 "register_operand" "w")
5990 (match_operand:V4SI 3 "register_operand" "w")]
5992 "TARGET_SIMD && TARGET_SHA2"
5993 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5994 [(set_attr "type" "crypto_sha256_slow")]
5997 (define_insn "aarch64_crypto_sha256su0v4si"
5998 [(set (match_operand:V4SI 0 "register_operand" "=w")
5999 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6000 (match_operand:V4SI 2 "register_operand" "w")]
6002 "TARGET_SIMD && TARGET_SHA2"
6003 "sha256su0\\t%0.4s, %2.4s"
6004 [(set_attr "type" "crypto_sha256_fast")]
6007 (define_insn "aarch64_crypto_sha256su1v4si"
6008 [(set (match_operand:V4SI 0 "register_operand" "=w")
6009 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6010 (match_operand:V4SI 2 "register_operand" "w")
6011 (match_operand:V4SI 3 "register_operand" "w")]
6013 "TARGET_SIMD && TARGET_SHA2"
6014 "sha256su1\\t%0.4s, %2.4s, %3.4s"
6015 [(set_attr "type" "crypto_sha256_slow")]
6020 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6021 [(set (match_operand:V2DI 0 "register_operand" "=w")
6022 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6023 (match_operand:V2DI 2 "register_operand" "w")
6024 (match_operand:V2DI 3 "register_operand" "w")]
6026 "TARGET_SIMD && TARGET_SHA3"
6027 "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6028 [(set_attr "type" "crypto_sha512")]
6031 (define_insn "aarch64_crypto_sha512su0qv2di"
6032 [(set (match_operand:V2DI 0 "register_operand" "=w")
6033 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6034 (match_operand:V2DI 2 "register_operand" "w")]
6036 "TARGET_SIMD && TARGET_SHA3"
6037 "sha512su0\\t%0.2d, %2.2d"
6038 [(set_attr "type" "crypto_sha512")]
6041 (define_insn "aarch64_crypto_sha512su1qv2di"
6042 [(set (match_operand:V2DI 0 "register_operand" "=w")
6043 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6044 (match_operand:V2DI 2 "register_operand" "w")
6045 (match_operand:V2DI 3 "register_operand" "w")]
6047 "TARGET_SIMD && TARGET_SHA3"
6048 "sha512su1\\t%0.2d, %2.2d, %3.2d"
6049 [(set_attr "type" "crypto_sha512")]
6054 (define_insn "eor3q<mode>4"
6055 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6058 (match_operand:VQ_I 2 "register_operand" "w")
6059 (match_operand:VQ_I 3 "register_operand" "w"))
6060 (match_operand:VQ_I 1 "register_operand" "w")))]
6061 "TARGET_SIMD && TARGET_SHA3"
6062 "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6063 [(set_attr "type" "crypto_sha3")]
6066 (define_insn "aarch64_rax1qv2di"
6067 [(set (match_operand:V2DI 0 "register_operand" "=w")
6070 (match_operand:V2DI 2 "register_operand" "w")
6072 (match_operand:V2DI 1 "register_operand" "w")))]
6073 "TARGET_SIMD && TARGET_SHA3"
6074 "rax1\\t%0.2d, %1.2d, %2.2d"
6075 [(set_attr "type" "crypto_sha3")]
6078 (define_insn "aarch64_xarqv2di"
6079 [(set (match_operand:V2DI 0 "register_operand" "=w")
6082 (match_operand:V2DI 1 "register_operand" "%w")
6083 (match_operand:V2DI 2 "register_operand" "w"))
6084 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6085 "TARGET_SIMD && TARGET_SHA3"
6086 "xar\\t%0.2d, %1.2d, %2.2d, %3"
6087 [(set_attr "type" "crypto_sha3")]
6090 (define_insn "bcaxq<mode>4"
6091 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6094 (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6095 (match_operand:VQ_I 2 "register_operand" "w"))
6096 (match_operand:VQ_I 1 "register_operand" "w")))]
6097 "TARGET_SIMD && TARGET_SHA3"
6098 "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6099 [(set_attr "type" "crypto_sha3")]
6104 (define_insn "aarch64_sm3ss1qv4si"
6105 [(set (match_operand:V4SI 0 "register_operand" "=w")
6106 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6107 (match_operand:V4SI 2 "register_operand" "w")
6108 (match_operand:V4SI 3 "register_operand" "w")]
6110 "TARGET_SIMD && TARGET_SM4"
6111 "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6112 [(set_attr "type" "crypto_sm3")]
6116 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6117 [(set (match_operand:V4SI 0 "register_operand" "=w")
6118 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6119 (match_operand:V4SI 2 "register_operand" "w")
6120 (match_operand:V4SI 3 "register_operand" "w")
6121 (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6123 "TARGET_SIMD && TARGET_SM4"
6124 "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6125 [(set_attr "type" "crypto_sm3")]
6128 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6129 [(set (match_operand:V4SI 0 "register_operand" "=w")
6130 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6131 (match_operand:V4SI 2 "register_operand" "w")
6132 (match_operand:V4SI 3 "register_operand" "w")]
6134 "TARGET_SIMD && TARGET_SM4"
6135 "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6136 [(set_attr "type" "crypto_sm3")]
6141 (define_insn "aarch64_sm4eqv4si"
6142 [(set (match_operand:V4SI 0 "register_operand" "=w")
6143 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6144 (match_operand:V4SI 2 "register_operand" "w")]
6146 "TARGET_SIMD && TARGET_SM4"
6147 "sm4e\\t%0.4s, %2.4s"
6148 [(set_attr "type" "crypto_sm4")]
6151 (define_insn "aarch64_sm4ekeyqv4si"
6152 [(set (match_operand:V4SI 0 "register_operand" "=w")
6153 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6154 (match_operand:V4SI 2 "register_operand" "w")]
6156 "TARGET_SIMD && TARGET_SM4"
6157 "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6158 [(set_attr "type" "crypto_sm4")]
6163 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6164 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6166 [(match_operand:VDQSF 1 "register_operand" "0")
6167 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6168 (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6172 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6173 <nunits> * 2, false);
6174 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6175 <nunits> * 2, false);
6177 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6186 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6187 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6189 [(match_operand:VDQSF 1 "register_operand" "0")
6190 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6191 (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6195 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6196 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6198 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6206 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6207 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6210 (vec_select:<VFMLA_SEL_W>
6211 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6212 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6214 (vec_select:<VFMLA_SEL_W>
6215 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6216 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6217 (match_operand:VDQSF 1 "register_operand" "0")))]
6219 "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6220 [(set_attr "type" "neon_fp_mul_s")]
6223 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6224 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6228 (vec_select:<VFMLA_SEL_W>
6229 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6230 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6232 (vec_select:<VFMLA_SEL_W>
6233 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6234 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6235 (match_operand:VDQSF 1 "register_operand" "0")))]
6237 "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6238 [(set_attr "type" "neon_fp_mul_s")]
6241 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6242 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6245 (vec_select:<VFMLA_SEL_W>
6246 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6247 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6249 (vec_select:<VFMLA_SEL_W>
6250 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6251 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6252 (match_operand:VDQSF 1 "register_operand" "0")))]
6254 "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6255 [(set_attr "type" "neon_fp_mul_s")]
6258 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6259 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6263 (vec_select:<VFMLA_SEL_W>
6264 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6265 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6267 (vec_select:<VFMLA_SEL_W>
6268 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6269 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6270 (match_operand:VDQSF 1 "register_operand" "0")))]
6272 "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6273 [(set_attr "type" "neon_fp_mul_s")]
6276 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6277 [(set (match_operand:V2SF 0 "register_operand" "")
6278 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6279 (match_operand:V4HF 2 "register_operand" "")
6280 (match_operand:V4HF 3 "register_operand" "")
6281 (match_operand:SI 4 "aarch64_imm2" "")]
6285 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6286 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6288 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6297 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6298 [(set (match_operand:V2SF 0 "register_operand" "")
6299 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6300 (match_operand:V4HF 2 "register_operand" "")
6301 (match_operand:V4HF 3 "register_operand" "")
6302 (match_operand:SI 4 "aarch64_imm2" "")]
6306 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6307 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6309 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6317 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6318 [(set (match_operand:V2SF 0 "register_operand" "=w")
6322 (match_operand:V4HF 2 "register_operand" "w")
6323 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6327 (match_operand:V4HF 3 "register_operand" "x")
6328 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6329 (match_operand:V2SF 1 "register_operand" "0")))]
6331 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6332 [(set_attr "type" "neon_fp_mul_s")]
6335 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6336 [(set (match_operand:V2SF 0 "register_operand" "=w")
6341 (match_operand:V4HF 2 "register_operand" "w")
6342 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6346 (match_operand:V4HF 3 "register_operand" "x")
6347 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6348 (match_operand:V2SF 1 "register_operand" "0")))]
6350 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6351 [(set_attr "type" "neon_fp_mul_s")]
6354 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6355 [(set (match_operand:V2SF 0 "register_operand" "=w")
6359 (match_operand:V4HF 2 "register_operand" "w")
6360 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6364 (match_operand:V4HF 3 "register_operand" "x")
6365 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6366 (match_operand:V2SF 1 "register_operand" "0")))]
6368 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6369 [(set_attr "type" "neon_fp_mul_s")]
6372 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6373 [(set (match_operand:V2SF 0 "register_operand" "=w")
6378 (match_operand:V4HF 2 "register_operand" "w")
6379 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6383 (match_operand:V4HF 3 "register_operand" "x")
6384 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6385 (match_operand:V2SF 1 "register_operand" "0")))]
6387 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6388 [(set_attr "type" "neon_fp_mul_s")]
6391 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6392 [(set (match_operand:V4SF 0 "register_operand" "")
6393 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6394 (match_operand:V8HF 2 "register_operand" "")
6395 (match_operand:V8HF 3 "register_operand" "")
6396 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6400 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6401 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6403 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6411 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6412 [(set (match_operand:V4SF 0 "register_operand" "")
6413 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6414 (match_operand:V8HF 2 "register_operand" "")
6415 (match_operand:V8HF 3 "register_operand" "")
6416 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6420 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6421 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6423 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6431 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6432 [(set (match_operand:V4SF 0 "register_operand" "=w")
6436 (match_operand:V8HF 2 "register_operand" "w")
6437 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6441 (match_operand:V8HF 3 "register_operand" "x")
6442 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6443 (match_operand:V4SF 1 "register_operand" "0")))]
6445 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6446 [(set_attr "type" "neon_fp_mul_s")]
6449 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6450 [(set (match_operand:V4SF 0 "register_operand" "=w")
6455 (match_operand:V8HF 2 "register_operand" "w")
6456 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6460 (match_operand:V8HF 3 "register_operand" "x")
6461 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6462 (match_operand:V4SF 1 "register_operand" "0")))]
6464 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6465 [(set_attr "type" "neon_fp_mul_s")]
6468 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6469 [(set (match_operand:V4SF 0 "register_operand" "=w")
6473 (match_operand:V8HF 2 "register_operand" "w")
6474 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6478 (match_operand:V8HF 3 "register_operand" "x")
6479 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6480 (match_operand:V4SF 1 "register_operand" "0")))]
6482 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6483 [(set_attr "type" "neon_fp_mul_s")]
6486 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6487 [(set (match_operand:V4SF 0 "register_operand" "=w")
6492 (match_operand:V8HF 2 "register_operand" "w")
6493 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6497 (match_operand:V8HF 3 "register_operand" "x")
6498 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6499 (match_operand:V4SF 1 "register_operand" "0")))]
6501 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6502 [(set_attr "type" "neon_fp_mul_s")]
6505 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6506 [(set (match_operand:V2SF 0 "register_operand" "")
6507 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6508 (match_operand:V4HF 2 "register_operand" "")
6509 (match_operand:V8HF 3 "register_operand" "")
6510 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6514 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6515 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6517 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6526 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6527 [(set (match_operand:V2SF 0 "register_operand" "")
6528 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6529 (match_operand:V4HF 2 "register_operand" "")
6530 (match_operand:V8HF 3 "register_operand" "")
6531 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6535 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6536 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6538 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6547 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6548 [(set (match_operand:V2SF 0 "register_operand" "=w")
6552 (match_operand:V4HF 2 "register_operand" "w")
6553 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6557 (match_operand:V8HF 3 "register_operand" "x")
6558 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6559 (match_operand:V2SF 1 "register_operand" "0")))]
6561 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6562 [(set_attr "type" "neon_fp_mul_s")]
6565 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6566 [(set (match_operand:V2SF 0 "register_operand" "=w")
6571 (match_operand:V4HF 2 "register_operand" "w")
6572 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6576 (match_operand:V8HF 3 "register_operand" "x")
6577 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6578 (match_operand:V2SF 1 "register_operand" "0")))]
6580 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6581 [(set_attr "type" "neon_fp_mul_s")]
6584 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6585 [(set (match_operand:V2SF 0 "register_operand" "=w")
6589 (match_operand:V4HF 2 "register_operand" "w")
6590 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6594 (match_operand:V8HF 3 "register_operand" "x")
6595 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6596 (match_operand:V2SF 1 "register_operand" "0")))]
6598 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6599 [(set_attr "type" "neon_fp_mul_s")]
6602 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6603 [(set (match_operand:V2SF 0 "register_operand" "=w")
6608 (match_operand:V4HF 2 "register_operand" "w")
6609 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6613 (match_operand:V8HF 3 "register_operand" "x")
6614 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6615 (match_operand:V2SF 1 "register_operand" "0")))]
6617 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6618 [(set_attr "type" "neon_fp_mul_s")]
6621 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6622 [(set (match_operand:V4SF 0 "register_operand" "")
6623 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6624 (match_operand:V8HF 2 "register_operand" "")
6625 (match_operand:V4HF 3 "register_operand" "")
6626 (match_operand:SI 4 "aarch64_imm2" "")]
6630 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6631 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6633 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6641 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6642 [(set (match_operand:V4SF 0 "register_operand" "")
6643 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6644 (match_operand:V8HF 2 "register_operand" "")
6645 (match_operand:V4HF 3 "register_operand" "")
6646 (match_operand:SI 4 "aarch64_imm2" "")]
6650 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6651 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6653 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6661 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6662 [(set (match_operand:V4SF 0 "register_operand" "=w")
6666 (match_operand:V8HF 2 "register_operand" "w")
6667 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6671 (match_operand:V4HF 3 "register_operand" "x")
6672 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6673 (match_operand:V4SF 1 "register_operand" "0")))]
6675 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6676 [(set_attr "type" "neon_fp_mul_s")]
6679 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6680 [(set (match_operand:V4SF 0 "register_operand" "=w")
6685 (match_operand:V8HF 2 "register_operand" "w")
6686 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6690 (match_operand:V4HF 3 "register_operand" "x")
6691 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6692 (match_operand:V4SF 1 "register_operand" "0")))]
6694 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6695 [(set_attr "type" "neon_fp_mul_s")]
6698 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6699 [(set (match_operand:V4SF 0 "register_operand" "=w")
6703 (match_operand:V8HF 2 "register_operand" "w")
6704 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6708 (match_operand:V4HF 3 "register_operand" "x")
6709 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6710 (match_operand:V4SF 1 "register_operand" "0")))]
6712 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6713 [(set_attr "type" "neon_fp_mul_s")]
6716 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6717 [(set (match_operand:V4SF 0 "register_operand" "=w")
6722 (match_operand:V8HF 2 "register_operand" "w")
6723 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6727 (match_operand:V4HF 3 "register_operand" "x")
6728 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6729 (match_operand:V4SF 1 "register_operand" "0")))]
6731 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6732 [(set_attr "type" "neon_fp_mul_s")]
6737 (define_insn "aarch64_crypto_pmulldi"
6738 [(set (match_operand:TI 0 "register_operand" "=w")
6739 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
6740 (match_operand:DI 2 "register_operand" "w")]
6742 "TARGET_SIMD && TARGET_AES"
6743 "pmull\\t%0.1q, %1.1d, %2.1d"
6744 [(set_attr "type" "crypto_pmull")]
6747 (define_insn "aarch64_crypto_pmullv2di"
6748 [(set (match_operand:TI 0 "register_operand" "=w")
6749 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
6750 (match_operand:V2DI 2 "register_operand" "w")]
6752 "TARGET_SIMD && TARGET_AES"
6753 "pmull2\\t%0.1q, %1.2d, %2.2d"
6754 [(set_attr "type" "crypto_pmull")]