1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2018 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16 1 "general_operand" ""))]
26 /* Force the operand into a register if it is not an
27 immediate whose use can be replaced with xzr.
28 If the mode is 16 bytes wide, then we will be doing
29 a stp in DI mode, so we check the validity of that.
30 If the mode is 8 bytes wide, then we will do doing a
31 normal str, so the check need not apply. */
32 if (GET_CODE (operands[0]) == MEM
33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34 && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
35 && aarch64_mem_pair_operand (operands[0], DImode))
36 || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
37 operands[1] = force_reg (<MODE>mode, operands[1]);
41 (define_expand "movmisalign<mode>"
42 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
43 (match_operand:VALL 1 "general_operand" ""))]
46 /* This pattern is not permitted to fail during expansion: if both arguments
47 are non-registers (e.g. memory := constant, which can be created by the
48 auto-vectorizer), force operand 1 into a register. */
49 if (!register_operand (operands[0], <MODE>mode)
50 && !register_operand (operands[1], <MODE>mode))
51 operands[1] = force_reg (<MODE>mode, operands[1]);
54 (define_insn "aarch64_simd_dup<mode>"
55 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
57 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
60 dup\\t%0.<Vtype>, %1.<Vetype>[0]
61 dup\\t%0.<Vtype>, %<vw>1"
62 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
65 (define_insn "aarch64_simd_dup<mode>"
66 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
67 (vec_duplicate:VDQF_F16
68 (match_operand:<VEL> 1 "register_operand" "w")))]
70 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
71 [(set_attr "type" "neon_dup<q>")]
74 (define_insn "aarch64_dup_lane<mode>"
75 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
76 (vec_duplicate:VALL_F16
78 (match_operand:VALL_F16 1 "register_operand" "w")
79 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
83 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
84 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
86 [(set_attr "type" "neon_dup<q>")]
89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
90 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
91 (vec_duplicate:VALL_F16_NO_V2Q
93 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
98 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
99 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
101 [(set_attr "type" "neon_dup<q>")]
104 (define_insn "*aarch64_simd_mov<VD:mode>"
105 [(set (match_operand:VD 0 "nonimmediate_operand"
106 "=w, m, m, w, ?r, ?w, ?r, w")
107 (match_operand:VD 1 "general_operand"
108 "m, Dz, w, w, w, r, r, Dn"))]
110 && (register_operand (operands[0], <MODE>mode)
111 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
113 switch (which_alternative)
115 case 0: return "ldr\t%d0, %1";
116 case 1: return "str\txzr, %0";
117 case 2: return "str\t%d1, %0";
118 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
119 case 4: return "umov\t%0, %1.d[0]";
120 case 5: return "fmov\t%d0, %1";
121 case 6: return "mov\t%0, %1";
123 return aarch64_output_simd_mov_immediate (operands[1], 64);
124 default: gcc_unreachable ();
127 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
128 neon_logic<q>, neon_to_gp<q>, f_mcr,\
129 mov_reg, neon_move<q>")]
132 (define_insn "*aarch64_simd_mov<VQ:mode>"
133 [(set (match_operand:VQ 0 "nonimmediate_operand"
134 "=w, Umn, m, w, ?r, ?w, ?r, w")
135 (match_operand:VQ 1 "general_operand"
136 "m, Dz, w, w, w, r, r, Dn"))]
138 && (register_operand (operands[0], <MODE>mode)
139 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
141 switch (which_alternative)
144 return "ldr\t%q0, %1";
146 return "stp\txzr, xzr, %0";
148 return "str\t%q1, %0";
150 return "mov\t%0.<Vbtype>, %1.<Vbtype>";
156 return aarch64_output_simd_mov_immediate (operands[1], 128);
161 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
162 neon_logic<q>, multiple, multiple,\
163 multiple, neon_move<q>")
164 (set_attr "length" "4,4,4,4,8,8,8,4")]
167 ;; When storing lane zero we can use the normal STR and its more permissive
170 (define_insn "aarch64_store_lane0<mode>"
171 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
172 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
173 (parallel [(match_operand 2 "const_int_operand" "n")])))]
175 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
176 "str\\t%<Vetype>1, %0"
177 [(set_attr "type" "neon_store1_1reg<q>")]
180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
181 [(set (match_operand:DREG 0 "register_operand" "=w")
182 (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
183 (set (match_operand:DREG2 2 "register_operand" "=w")
184 (match_operand:DREG2 3 "memory_operand" "m"))]
186 && rtx_equal_p (XEXP (operands[3], 0),
187 plus_constant (Pmode,
188 XEXP (operands[1], 0),
189 GET_MODE_SIZE (<DREG:MODE>mode)))"
191 [(set_attr "type" "neon_ldp")]
194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
195 [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
196 (match_operand:DREG 1 "register_operand" "w"))
197 (set (match_operand:DREG2 2 "memory_operand" "=m")
198 (match_operand:DREG2 3 "register_operand" "w"))]
200 && rtx_equal_p (XEXP (operands[2], 0),
201 plus_constant (Pmode,
202 XEXP (operands[0], 0),
203 GET_MODE_SIZE (<DREG:MODE>mode)))"
205 [(set_attr "type" "neon_stp")]
208 (define_insn "load_pair<VQ:mode><VQ2:mode>"
209 [(set (match_operand:VQ 0 "register_operand" "=w")
210 (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
211 (set (match_operand:VQ2 2 "register_operand" "=w")
212 (match_operand:VQ2 3 "memory_operand" "m"))]
214 && rtx_equal_p (XEXP (operands[3], 0),
215 plus_constant (Pmode,
216 XEXP (operands[1], 0),
217 GET_MODE_SIZE (<VQ:MODE>mode)))"
219 [(set_attr "type" "neon_ldp_q")]
222 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
223 [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
224 (match_operand:VQ 1 "register_operand" "w"))
225 (set (match_operand:VQ2 2 "memory_operand" "=m")
226 (match_operand:VQ2 3 "register_operand" "w"))]
227 "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
228 plus_constant (Pmode,
229 XEXP (operands[0], 0),
230 GET_MODE_SIZE (<VQ:MODE>mode)))"
232 [(set_attr "type" "neon_stp_q")]
237 [(set (match_operand:VQ 0 "register_operand" "")
238 (match_operand:VQ 1 "register_operand" ""))]
239 "TARGET_SIMD && reload_completed
240 && GP_REGNUM_P (REGNO (operands[0]))
241 && GP_REGNUM_P (REGNO (operands[1]))"
244 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
249 [(set (match_operand:VQ 0 "register_operand" "")
250 (match_operand:VQ 1 "register_operand" ""))]
251 "TARGET_SIMD && reload_completed
252 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
253 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
256 aarch64_split_simd_move (operands[0], operands[1]);
260 (define_expand "aarch64_split_simd_mov<mode>"
261 [(set (match_operand:VQ 0)
262 (match_operand:VQ 1))]
265 rtx dst = operands[0];
266 rtx src = operands[1];
268 if (GP_REGNUM_P (REGNO (src)))
270 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
271 rtx src_high_part = gen_highpart (<VHALF>mode, src);
274 (gen_move_lo_quad_<mode> (dst, src_low_part));
276 (gen_move_hi_quad_<mode> (dst, src_high_part));
281 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
282 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
283 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
284 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
287 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
289 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
295 (define_insn "aarch64_simd_mov_from_<mode>low"
296 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
298 (match_operand:VQ 1 "register_operand" "w")
299 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
300 "TARGET_SIMD && reload_completed"
302 [(set_attr "type" "neon_to_gp<q>")
303 (set_attr "length" "4")
306 (define_insn "aarch64_simd_mov_from_<mode>high"
307 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
309 (match_operand:VQ 1 "register_operand" "w")
310 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
311 "TARGET_SIMD && reload_completed"
313 [(set_attr "type" "neon_to_gp<q>")
314 (set_attr "length" "4")
317 (define_insn "orn<mode>3"
318 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
319 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
320 (match_operand:VDQ_I 2 "register_operand" "w")))]
322 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
323 [(set_attr "type" "neon_logic<q>")]
326 (define_insn "bic<mode>3"
327 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
328 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
329 (match_operand:VDQ_I 2 "register_operand" "w")))]
331 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
332 [(set_attr "type" "neon_logic<q>")]
335 (define_insn "add<mode>3"
336 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
337 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
338 (match_operand:VDQ_I 2 "register_operand" "w")))]
340 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
341 [(set_attr "type" "neon_add<q>")]
344 (define_insn "sub<mode>3"
345 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
346 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
347 (match_operand:VDQ_I 2 "register_operand" "w")))]
349 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
350 [(set_attr "type" "neon_sub<q>")]
353 (define_insn "mul<mode>3"
354 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
355 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
356 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
358 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
359 [(set_attr "type" "neon_mul_<Vetype><q>")]
362 (define_insn "bswap<mode>2"
363 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
364 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
366 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
367 [(set_attr "type" "neon_rev<q>")]
370 (define_insn "aarch64_rbit<mode>"
371 [(set (match_operand:VB 0 "register_operand" "=w")
372 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
375 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
376 [(set_attr "type" "neon_rbit")]
379 (define_expand "ctz<mode>2"
380 [(set (match_operand:VS 0 "register_operand")
381 (ctz:VS (match_operand:VS 1 "register_operand")))]
384 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
385 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
387 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
388 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
393 (define_expand "xorsign<mode>3"
394 [(match_operand:VHSDF 0 "register_operand")
395 (match_operand:VHSDF 1 "register_operand")
396 (match_operand:VHSDF 2 "register_operand")]
400 machine_mode imode = <V_INT_EQUIV>mode;
401 rtx v_bitmask = gen_reg_rtx (imode);
402 rtx op1x = gen_reg_rtx (imode);
403 rtx op2x = gen_reg_rtx (imode);
405 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
406 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
408 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
410 emit_move_insn (v_bitmask,
411 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
412 HOST_WIDE_INT_M1U << bits));
414 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
415 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
416 emit_move_insn (operands[0],
417 lowpart_subreg (<MODE>mode, op1x, imode));
422 ;; These instructions map to the __builtins for the Dot Product operations.
423 (define_insn "aarch64_<sur>dot<vsi2qi>"
424 [(set (match_operand:VS 0 "register_operand" "=w")
425 (plus:VS (match_operand:VS 1 "register_operand" "0")
426 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
427 (match_operand:<VSI2QI> 3 "register_operand" "w")]
430 "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
431 [(set_attr "type" "neon_dot")]
434 ;; These expands map to the Dot Product optab the vectorizer checks for.
435 ;; The auto-vectorizer expects a dot product builtin that also does an
436 ;; accumulation into the provided register.
437 ;; Given the following pattern
439 ;; for (i=0; i<len; i++) {
445 ;; This can be auto-vectorized to
446 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
448 ;; given enough iterations. However the vectorizer can keep unrolling the loop
449 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
450 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
453 ;; and so the vectorizer provides r, in which the result has to be accumulated.
454 (define_expand "<sur>dot_prod<vsi2qi>"
455 [(set (match_operand:VS 0 "register_operand")
456 (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
457 (match_operand:<VSI2QI> 2 "register_operand")]
459 (match_operand:VS 3 "register_operand")))]
463 gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
465 emit_insn (gen_rtx_SET (operands[0], operands[3]));
469 ;; These instructions map to the __builtins for the Dot Product
470 ;; indexed operations.
471 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
472 [(set (match_operand:VS 0 "register_operand" "=w")
473 (plus:VS (match_operand:VS 1 "register_operand" "0")
474 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
475 (match_operand:V8QI 3 "register_operand" "<h_con>")
476 (match_operand:SI 4 "immediate_operand" "i")]
480 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
481 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
483 [(set_attr "type" "neon_dot")]
486 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
487 [(set (match_operand:VS 0 "register_operand" "=w")
488 (plus:VS (match_operand:VS 1 "register_operand" "0")
489 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
490 (match_operand:V16QI 3 "register_operand" "<h_con>")
491 (match_operand:SI 4 "immediate_operand" "i")]
495 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
496 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
498 [(set_attr "type" "neon_dot")]
501 (define_expand "copysign<mode>3"
502 [(match_operand:VHSDF 0 "register_operand")
503 (match_operand:VHSDF 1 "register_operand")
504 (match_operand:VHSDF 2 "register_operand")]
505 "TARGET_FLOAT && TARGET_SIMD"
507 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
508 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
510 emit_move_insn (v_bitmask,
511 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
512 HOST_WIDE_INT_M1U << bits));
513 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
514 operands[2], operands[1]));
519 (define_insn "*aarch64_mul3_elt<mode>"
520 [(set (match_operand:VMUL 0 "register_operand" "=w")
524 (match_operand:VMUL 1 "register_operand" "<h_con>")
525 (parallel [(match_operand:SI 2 "immediate_operand")])))
526 (match_operand:VMUL 3 "register_operand" "w")))]
529 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
530 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
532 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
535 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
536 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
537 (mult:VMUL_CHANGE_NLANES
538 (vec_duplicate:VMUL_CHANGE_NLANES
540 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
541 (parallel [(match_operand:SI 2 "immediate_operand")])))
542 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
545 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
546 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
548 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
551 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
552 [(set (match_operand:VMUL 0 "register_operand" "=w")
555 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
556 (match_operand:VMUL 2 "register_operand" "w")))]
558 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
559 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
562 (define_insn "aarch64_rsqrte<mode>"
563 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
564 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
567 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
568 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
570 (define_insn "aarch64_rsqrts<mode>"
571 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
572 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
573 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
576 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
577 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
579 (define_expand "rsqrt<mode>2"
580 [(set (match_operand:VALLF 0 "register_operand" "=w")
581 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
585 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
589 (define_insn "*aarch64_mul3_elt_to_64v2df"
590 [(set (match_operand:DF 0 "register_operand" "=w")
593 (match_operand:V2DF 1 "register_operand" "w")
594 (parallel [(match_operand:SI 2 "immediate_operand")]))
595 (match_operand:DF 3 "register_operand" "w")))]
598 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
599 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
601 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
604 (define_insn "neg<mode>2"
605 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
606 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
608 "neg\t%0.<Vtype>, %1.<Vtype>"
609 [(set_attr "type" "neon_neg<q>")]
612 (define_insn "abs<mode>2"
613 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
614 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
616 "abs\t%0.<Vtype>, %1.<Vtype>"
617 [(set_attr "type" "neon_abs<q>")]
620 ;; The intrinsic version of integer ABS must not be allowed to
621 ;; combine with any operation with an integerated ABS step, such
623 (define_insn "aarch64_abs<mode>"
624 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
626 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
629 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
630 [(set_attr "type" "neon_abs<q>")]
633 (define_insn "abd<mode>_3"
634 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
635 (abs:VDQ_BHSI (minus:VDQ_BHSI
636 (match_operand:VDQ_BHSI 1 "register_operand" "w")
637 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
639 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
640 [(set_attr "type" "neon_abd<q>")]
643 (define_insn "aarch64_<sur>abdl2<mode>_3"
644 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
645 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
646 (match_operand:VDQV_S 2 "register_operand" "w")]
649 "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
650 [(set_attr "type" "neon_abd<q>")]
653 (define_insn "aarch64_<sur>abal<mode>_4"
654 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
655 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
656 (match_operand:VDQV_S 2 "register_operand" "w")
657 (match_operand:<VDBLW> 3 "register_operand" "0")]
660 "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
661 [(set_attr "type" "neon_arith_acc<q>")]
664 (define_insn "aarch64_<sur>adalp<mode>_3"
665 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
666 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
667 (match_operand:<VDBLW> 2 "register_operand" "0")]
670 "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
671 [(set_attr "type" "neon_reduc_add<q>")]
674 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
675 ;; inputs in operands 1 and 2. The sequence also has to perform a widening
676 ;; reduction of the difference into a V4SI vector and accumulate that into
677 ;; operand 3 before copying that into the result operand 0.
678 ;; Perform that with a sequence of:
679 ;; UABDL2 tmp.8h, op1.16b, op2.16b
680 ;; UABAL tmp.8h, op1.16b, op2.16b
681 ;; UADALP op3.4s, tmp.8h
682 ;; MOV op0, op3 // should be eliminated in later passes.
683 ;; The signed version just uses the signed variants of the above instructions.
685 (define_expand "<sur>sadv16qi"
686 [(use (match_operand:V4SI 0 "register_operand"))
687 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
688 (use (match_operand:V16QI 2 "register_operand"))] ABAL)
689 (use (match_operand:V4SI 3 "register_operand"))]
692 rtx reduc = gen_reg_rtx (V8HImode);
693 emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
695 emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
696 operands[2], reduc));
697 emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
699 emit_move_insn (operands[0], operands[3]);
704 (define_insn "aba<mode>_3"
705 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
706 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
707 (match_operand:VDQ_BHSI 1 "register_operand" "w")
708 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
709 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
711 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
712 [(set_attr "type" "neon_arith_acc<q>")]
715 (define_insn "fabd<mode>3"
716 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
719 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
720 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
722 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
723 [(set_attr "type" "neon_fp_abd_<stype><q>")]
726 ;; For AND (vector, register) and BIC (vector, immediate)
727 (define_insn "and<mode>3"
728 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
729 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
730 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
733 switch (which_alternative)
736 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
738 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
744 [(set_attr "type" "neon_logic<q>")]
747 ;; For ORR (vector, register) and ORR (vector, immediate)
748 (define_insn "ior<mode>3"
749 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
750 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
751 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
754 switch (which_alternative)
757 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
759 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
765 [(set_attr "type" "neon_logic<q>")]
768 (define_insn "xor<mode>3"
769 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
770 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
771 (match_operand:VDQ_I 2 "register_operand" "w")))]
773 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
774 [(set_attr "type" "neon_logic<q>")]
777 (define_insn "one_cmpl<mode>2"
778 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
779 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
781 "not\t%0.<Vbtype>, %1.<Vbtype>"
782 [(set_attr "type" "neon_logic<q>")]
785 (define_insn "aarch64_simd_vec_set<mode>"
786 [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
788 (vec_duplicate:VALL_F16
789 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
790 (match_operand:VALL_F16 3 "register_operand" "0,0,0")
791 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
794 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
795 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
796 switch (which_alternative)
799 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
801 return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
803 return "ld1\\t{%0.<Vetype>}[%p2], %1";
808 [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
811 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
812 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
814 (vec_duplicate:VALL_F16
816 (match_operand:VALL_F16 3 "register_operand" "w")
818 [(match_operand:SI 4 "immediate_operand" "i")])))
819 (match_operand:VALL_F16 1 "register_operand" "0")
820 (match_operand:SI 2 "immediate_operand" "i")))]
823 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
824 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
825 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
827 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
829 [(set_attr "type" "neon_ins<q>")]
832 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
833 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
834 (vec_merge:VALL_F16_NO_V2Q
835 (vec_duplicate:VALL_F16_NO_V2Q
837 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
839 [(match_operand:SI 4 "immediate_operand" "i")])))
840 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
841 (match_operand:SI 2 "immediate_operand" "i")))]
844 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
845 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
846 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
847 INTVAL (operands[4]));
849 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
851 [(set_attr "type" "neon_ins<q>")]
854 (define_insn "aarch64_simd_lshr<mode>"
855 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
856 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
857 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
859 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
860 [(set_attr "type" "neon_shift_imm<q>")]
863 (define_insn "aarch64_simd_ashr<mode>"
864 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
865 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
866 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
868 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
869 [(set_attr "type" "neon_shift_imm<q>")]
872 (define_insn "aarch64_simd_imm_shl<mode>"
873 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
874 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
875 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
877 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
878 [(set_attr "type" "neon_shift_imm<q>")]
881 (define_insn "aarch64_simd_reg_sshl<mode>"
882 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
883 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
884 (match_operand:VDQ_I 2 "register_operand" "w")))]
886 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
887 [(set_attr "type" "neon_shift_reg<q>")]
890 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
891 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
892 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
893 (match_operand:VDQ_I 2 "register_operand" "w")]
894 UNSPEC_ASHIFT_UNSIGNED))]
896 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
897 [(set_attr "type" "neon_shift_reg<q>")]
900 (define_insn "aarch64_simd_reg_shl<mode>_signed"
901 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
902 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
903 (match_operand:VDQ_I 2 "register_operand" "w")]
904 UNSPEC_ASHIFT_SIGNED))]
906 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
907 [(set_attr "type" "neon_shift_reg<q>")]
910 (define_expand "ashl<mode>3"
911 [(match_operand:VDQ_I 0 "register_operand" "")
912 (match_operand:VDQ_I 1 "register_operand" "")
913 (match_operand:SI 2 "general_operand" "")]
916 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
919 if (CONST_INT_P (operands[2]))
921 shift_amount = INTVAL (operands[2]);
922 if (shift_amount >= 0 && shift_amount < bit_width)
924 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
926 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
933 operands[2] = force_reg (SImode, operands[2]);
936 else if (MEM_P (operands[2]))
938 operands[2] = force_reg (SImode, operands[2]);
941 if (REG_P (operands[2]))
943 rtx tmp = gen_reg_rtx (<MODE>mode);
944 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
945 convert_to_mode (<VEL>mode,
948 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
957 (define_expand "lshr<mode>3"
958 [(match_operand:VDQ_I 0 "register_operand" "")
959 (match_operand:VDQ_I 1 "register_operand" "")
960 (match_operand:SI 2 "general_operand" "")]
963 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
966 if (CONST_INT_P (operands[2]))
968 shift_amount = INTVAL (operands[2]);
969 if (shift_amount > 0 && shift_amount <= bit_width)
971 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
973 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
979 operands[2] = force_reg (SImode, operands[2]);
981 else if (MEM_P (operands[2]))
983 operands[2] = force_reg (SImode, operands[2]);
986 if (REG_P (operands[2]))
988 rtx tmp = gen_reg_rtx (SImode);
989 rtx tmp1 = gen_reg_rtx (<MODE>mode);
990 emit_insn (gen_negsi2 (tmp, operands[2]));
991 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
992 convert_to_mode (<VEL>mode,
994 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
1004 (define_expand "ashr<mode>3"
1005 [(match_operand:VDQ_I 0 "register_operand" "")
1006 (match_operand:VDQ_I 1 "register_operand" "")
1007 (match_operand:SI 2 "general_operand" "")]
1010 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1013 if (CONST_INT_P (operands[2]))
1015 shift_amount = INTVAL (operands[2]);
1016 if (shift_amount > 0 && shift_amount <= bit_width)
1018 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1020 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1026 operands[2] = force_reg (SImode, operands[2]);
1028 else if (MEM_P (operands[2]))
1030 operands[2] = force_reg (SImode, operands[2]);
1033 if (REG_P (operands[2]))
1035 rtx tmp = gen_reg_rtx (SImode);
1036 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1037 emit_insn (gen_negsi2 (tmp, operands[2]));
1038 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1039 convert_to_mode (<VEL>mode,
1041 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
1051 (define_expand "vashl<mode>3"
1052 [(match_operand:VDQ_I 0 "register_operand" "")
1053 (match_operand:VDQ_I 1 "register_operand" "")
1054 (match_operand:VDQ_I 2 "register_operand" "")]
1057 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1062 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1063 ;; Negating individual lanes most certainly offsets the
1064 ;; gain from vectorization.
1065 (define_expand "vashr<mode>3"
1066 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1067 (match_operand:VDQ_BHSI 1 "register_operand" "")
1068 (match_operand:VDQ_BHSI 2 "register_operand" "")]
1071 rtx neg = gen_reg_rtx (<MODE>mode);
1072 emit (gen_neg<mode>2 (neg, operands[2]));
1073 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1079 (define_expand "aarch64_ashr_simddi"
1080 [(match_operand:DI 0 "register_operand" "=w")
1081 (match_operand:DI 1 "register_operand" "w")
1082 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1085 /* An arithmetic shift right by 64 fills the result with copies of the sign
1086 bit, just like asr by 63 - however the standard pattern does not handle
1088 if (INTVAL (operands[2]) == 64)
1089 operands[2] = GEN_INT (63);
1090 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1095 (define_expand "vlshr<mode>3"
1096 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1097 (match_operand:VDQ_BHSI 1 "register_operand" "")
1098 (match_operand:VDQ_BHSI 2 "register_operand" "")]
1101 rtx neg = gen_reg_rtx (<MODE>mode);
1102 emit (gen_neg<mode>2 (neg, operands[2]));
1103 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1108 (define_expand "aarch64_lshr_simddi"
1109 [(match_operand:DI 0 "register_operand" "=w")
1110 (match_operand:DI 1 "register_operand" "w")
1111 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1114 if (INTVAL (operands[2]) == 64)
1115 emit_move_insn (operands[0], const0_rtx);
1117 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1122 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1123 (define_insn "vec_shr_<mode>"
1124 [(set (match_operand:VD 0 "register_operand" "=w")
1125 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1126 (match_operand:SI 2 "immediate_operand" "i")]
1130 if (BYTES_BIG_ENDIAN)
1131 return "shl %d0, %d1, %2";
1133 return "ushr %d0, %d1, %2";
1135 [(set_attr "type" "neon_shift_imm")]
1138 (define_expand "vec_set<mode>"
1139 [(match_operand:VALL_F16 0 "register_operand" "+w")
1140 (match_operand:<VEL> 1 "register_operand" "w")
1141 (match_operand:SI 2 "immediate_operand" "")]
1144 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1145 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1146 GEN_INT (elem), operands[0]));
1152 (define_insn "aarch64_mla<mode>"
1153 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1154 (plus:VDQ_BHSI (mult:VDQ_BHSI
1155 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1156 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1157 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1159 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1160 [(set_attr "type" "neon_mla_<Vetype><q>")]
1163 (define_insn "*aarch64_mla_elt<mode>"
1164 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1167 (vec_duplicate:VDQHS
1169 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1170 (parallel [(match_operand:SI 2 "immediate_operand")])))
1171 (match_operand:VDQHS 3 "register_operand" "w"))
1172 (match_operand:VDQHS 4 "register_operand" "0")))]
1175 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1176 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1178 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1181 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1182 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1185 (vec_duplicate:VDQHS
1187 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1188 (parallel [(match_operand:SI 2 "immediate_operand")])))
1189 (match_operand:VDQHS 3 "register_operand" "w"))
1190 (match_operand:VDQHS 4 "register_operand" "0")))]
1193 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1194 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1196 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1199 (define_insn "*aarch64_mla_elt_merge<mode>"
1200 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1202 (mult:VDQHS (vec_duplicate:VDQHS
1203 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1204 (match_operand:VDQHS 2 "register_operand" "w"))
1205 (match_operand:VDQHS 3 "register_operand" "0")))]
1207 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1208 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1211 (define_insn "aarch64_mls<mode>"
1212 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1213 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1214 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1215 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1217 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1218 [(set_attr "type" "neon_mla_<Vetype><q>")]
1221 (define_insn "*aarch64_mls_elt<mode>"
1222 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1224 (match_operand:VDQHS 4 "register_operand" "0")
1226 (vec_duplicate:VDQHS
1228 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1229 (parallel [(match_operand:SI 2 "immediate_operand")])))
1230 (match_operand:VDQHS 3 "register_operand" "w"))))]
1233 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1234 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1236 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1239 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1240 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1242 (match_operand:VDQHS 4 "register_operand" "0")
1244 (vec_duplicate:VDQHS
1246 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1247 (parallel [(match_operand:SI 2 "immediate_operand")])))
1248 (match_operand:VDQHS 3 "register_operand" "w"))))]
1251 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1252 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1254 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1257 (define_insn "*aarch64_mls_elt_merge<mode>"
1258 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1260 (match_operand:VDQHS 1 "register_operand" "0")
1261 (mult:VDQHS (vec_duplicate:VDQHS
1262 (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1263 (match_operand:VDQHS 3 "register_operand" "w"))))]
1265 "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1266 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1269 ;; Max/Min operations.
1270 (define_insn "<su><maxmin><mode>3"
1271 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1272 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1273 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1275 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1276 [(set_attr "type" "neon_minmax<q>")]
1279 (define_expand "<su><maxmin>v2di3"
1280 [(set (match_operand:V2DI 0 "register_operand" "")
1281 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1282 (match_operand:V2DI 2 "register_operand" "")))]
1285 enum rtx_code cmp_operator;
1306 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1307 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1308 operands[2], cmp_fmt, operands[1], operands[2]));
1312 ;; Pairwise Integer Max/Min operations.
1313 (define_insn "aarch64_<maxmin_uns>p<mode>"
1314 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1315 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1316 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1319 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1320 [(set_attr "type" "neon_minmax<q>")]
1323 ;; Pairwise FP Max/Min operations.
1324 (define_insn "aarch64_<maxmin_uns>p<mode>"
1325 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1326 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1327 (match_operand:VHSDF 2 "register_operand" "w")]
1330 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1331 [(set_attr "type" "neon_minmax<q>")]
1334 ;; vec_concat gives a new vector with the low elements from operand 1, and
1335 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1336 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1337 ;; What that means, is that the RTL descriptions of the below patterns
1338 ;; need to change depending on endianness.
1340 ;; Move to the low architectural bits of the register.
1341 ;; On little-endian this is { operand, zeroes }
1342 ;; On big-endian this is { zeroes, operand }
1344 (define_insn "move_lo_quad_internal_<mode>"
1345 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1347 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1348 (vec_duplicate:<VHALF> (const_int 0))))]
1349 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1354 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1355 (set_attr "simd" "yes,*,yes")
1356 (set_attr "fp" "*,yes,*")
1357 (set_attr "length" "4")]
1360 (define_insn "move_lo_quad_internal_<mode>"
1361 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1363 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1365 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1370 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1371 (set_attr "simd" "yes,*,yes")
1372 (set_attr "fp" "*,yes,*")
1373 (set_attr "length" "4")]
1376 (define_insn "move_lo_quad_internal_be_<mode>"
1377 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1379 (vec_duplicate:<VHALF> (const_int 0))
1380 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1381 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1386 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1387 (set_attr "simd" "yes,*,yes")
1388 (set_attr "fp" "*,yes,*")
1389 (set_attr "length" "4")]
1392 (define_insn "move_lo_quad_internal_be_<mode>"
1393 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1396 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1397 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1402 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1403 (set_attr "simd" "yes,*,yes")
1404 (set_attr "fp" "*,yes,*")
1405 (set_attr "length" "4")]
1408 (define_expand "move_lo_quad_<mode>"
1409 [(match_operand:VQ 0 "register_operand")
1410 (match_operand:VQ 1 "register_operand")]
1413 if (BYTES_BIG_ENDIAN)
1414 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1416 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1421 ;; Move operand1 to the high architectural bits of the register, keeping
1422 ;; the low architectural bits of operand2.
1423 ;; For little-endian this is { operand2, operand1 }
1424 ;; For big-endian this is { operand1, operand2 }
1426 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1427 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1431 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1432 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1433 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1435 ins\\t%0.d[1], %1.d[0]
1437 [(set_attr "type" "neon_ins")]
1440 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1441 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1443 (match_operand:<VHALF> 1 "register_operand" "w,r")
1446 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1447 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1449 ins\\t%0.d[1], %1.d[0]
1451 [(set_attr "type" "neon_ins")]
1454 (define_expand "move_hi_quad_<mode>"
1455 [(match_operand:VQ 0 "register_operand" "")
1456 (match_operand:<VHALF> 1 "register_operand" "")]
1459 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1460 if (BYTES_BIG_ENDIAN)
1461 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1464 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1469 ;; Narrowing operations.
1472 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1473 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1474 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1476 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1477 [(set_attr "type" "neon_shift_imm_narrow_q")]
1480 (define_expand "vec_pack_trunc_<mode>"
1481 [(match_operand:<VNARROWD> 0 "register_operand" "")
1482 (match_operand:VDN 1 "register_operand" "")
1483 (match_operand:VDN 2 "register_operand" "")]
1486 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1487 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1488 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1490 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1491 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1492 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1498 (define_insn "vec_pack_trunc_<mode>"
1499 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1500 (vec_concat:<VNARROWQ2>
1501 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1502 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1505 if (BYTES_BIG_ENDIAN)
1506 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1508 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1510 [(set_attr "type" "multiple")
1511 (set_attr "length" "8")]
1514 ;; Widening operations.
1516 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1517 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1518 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1519 (match_operand:VQW 1 "register_operand" "w")
1520 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1523 "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1524 [(set_attr "type" "neon_shift_imm_long")]
1527 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1528 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1529 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1530 (match_operand:VQW 1 "register_operand" "w")
1531 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1534 "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1535 [(set_attr "type" "neon_shift_imm_long")]
1538 (define_expand "vec_unpack<su>_hi_<mode>"
1539 [(match_operand:<VWIDE> 0 "register_operand" "")
1540 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1543 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1544 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1550 (define_expand "vec_unpack<su>_lo_<mode>"
1551 [(match_operand:<VWIDE> 0 "register_operand" "")
1552 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1555 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1556 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1562 ;; Widening arithmetic.
1564 (define_insn "*aarch64_<su>mlal_lo<mode>"
1565 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1568 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1569 (match_operand:VQW 2 "register_operand" "w")
1570 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1571 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1572 (match_operand:VQW 4 "register_operand" "w")
1574 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1576 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1577 [(set_attr "type" "neon_mla_<Vetype>_long")]
1580 (define_insn "*aarch64_<su>mlal_hi<mode>"
1581 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1584 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1585 (match_operand:VQW 2 "register_operand" "w")
1586 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1587 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1588 (match_operand:VQW 4 "register_operand" "w")
1590 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1592 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1593 [(set_attr "type" "neon_mla_<Vetype>_long")]
1596 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1597 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1599 (match_operand:<VWIDE> 1 "register_operand" "0")
1601 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1602 (match_operand:VQW 2 "register_operand" "w")
1603 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1604 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1605 (match_operand:VQW 4 "register_operand" "w")
1608 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1609 [(set_attr "type" "neon_mla_<Vetype>_long")]
1612 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1613 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1615 (match_operand:<VWIDE> 1 "register_operand" "0")
1617 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1618 (match_operand:VQW 2 "register_operand" "w")
1619 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1620 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1621 (match_operand:VQW 4 "register_operand" "w")
1624 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1625 [(set_attr "type" "neon_mla_<Vetype>_long")]
1628 (define_insn "*aarch64_<su>mlal<mode>"
1629 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1633 (match_operand:VD_BHSI 1 "register_operand" "w"))
1635 (match_operand:VD_BHSI 2 "register_operand" "w")))
1636 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1638 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1639 [(set_attr "type" "neon_mla_<Vetype>_long")]
1642 (define_insn "*aarch64_<su>mlsl<mode>"
1643 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1645 (match_operand:<VWIDE> 1 "register_operand" "0")
1648 (match_operand:VD_BHSI 2 "register_operand" "w"))
1650 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1652 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1653 [(set_attr "type" "neon_mla_<Vetype>_long")]
1656 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1657 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1658 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1659 (match_operand:VQW 1 "register_operand" "w")
1660 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1661 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1662 (match_operand:VQW 2 "register_operand" "w")
1665 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1666 [(set_attr "type" "neon_mul_<Vetype>_long")]
1669 (define_expand "vec_widen_<su>mult_lo_<mode>"
1670 [(match_operand:<VWIDE> 0 "register_operand" "")
1671 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1672 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1675 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1676 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1683 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1684 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1685 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1686 (match_operand:VQW 1 "register_operand" "w")
1687 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1688 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1689 (match_operand:VQW 2 "register_operand" "w")
1692 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1693 [(set_attr "type" "neon_mul_<Vetype>_long")]
1696 (define_expand "vec_widen_<su>mult_hi_<mode>"
1697 [(match_operand:<VWIDE> 0 "register_operand" "")
1698 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1699 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1702 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1703 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1711 ;; FP vector operations.
1712 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1713 ;; double-precision (64-bit) floating-point data types and arithmetic as
1714 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1715 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1717 ;; Floating-point operations can raise an exception. Vectorizing such
1718 ;; operations are safe because of reasons explained below.
1720 ;; ARMv8 permits an extension to enable trapped floating-point
1721 ;; exception handling, however this is an optional feature. In the
1722 ;; event of a floating-point exception being raised by vectorised
1724 ;; 1. If trapped floating-point exceptions are available, then a trap
1725 ;; will be taken when any lane raises an enabled exception. A trap
1726 ;; handler may determine which lane raised the exception.
1727 ;; 2. Alternatively a sticky exception flag is set in the
1728 ;; floating-point status register (FPSR). Software may explicitly
1729 ;; test the exception flags, in which case the tests will either
1730 ;; prevent vectorisation, allowing precise identification of the
1731 ;; failing operation, or if tested outside of vectorisable regions
1732 ;; then the specific operation and lane are not of interest.
1734 ;; FP arithmetic operations.
1736 (define_insn "add<mode>3"
1737 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1738 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1739 (match_operand:VHSDF 2 "register_operand" "w")))]
1741 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1742 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1745 (define_insn "sub<mode>3"
1746 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1747 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1748 (match_operand:VHSDF 2 "register_operand" "w")))]
1750 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1751 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1754 (define_insn "mul<mode>3"
1755 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1756 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1757 (match_operand:VHSDF 2 "register_operand" "w")))]
1759 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1760 [(set_attr "type" "neon_fp_mul_<stype><q>")]
1763 (define_expand "div<mode>3"
1764 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1765 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1766 (match_operand:VHSDF 2 "register_operand" "w")))]
1769 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1772 operands[1] = force_reg (<MODE>mode, operands[1]);
1775 (define_insn "*div<mode>3"
1776 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1777 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1778 (match_operand:VHSDF 2 "register_operand" "w")))]
1780 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1781 [(set_attr "type" "neon_fp_div_<stype><q>")]
1784 (define_insn "neg<mode>2"
1785 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1786 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1788 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1789 [(set_attr "type" "neon_fp_neg_<stype><q>")]
1792 (define_insn "abs<mode>2"
1793 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1794 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1796 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1797 [(set_attr "type" "neon_fp_abs_<stype><q>")]
1800 (define_insn "fma<mode>4"
1801 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1802 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1803 (match_operand:VHSDF 2 "register_operand" "w")
1804 (match_operand:VHSDF 3 "register_operand" "0")))]
1806 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1807 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1810 (define_insn "*aarch64_fma4_elt<mode>"
1811 [(set (match_operand:VDQF 0 "register_operand" "=w")
1815 (match_operand:VDQF 1 "register_operand" "<h_con>")
1816 (parallel [(match_operand:SI 2 "immediate_operand")])))
1817 (match_operand:VDQF 3 "register_operand" "w")
1818 (match_operand:VDQF 4 "register_operand" "0")))]
1821 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1822 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1824 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1827 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1828 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1830 (vec_duplicate:VDQSF
1832 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1833 (parallel [(match_operand:SI 2 "immediate_operand")])))
1834 (match_operand:VDQSF 3 "register_operand" "w")
1835 (match_operand:VDQSF 4 "register_operand" "0")))]
1838 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1839 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1841 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1844 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1845 [(set (match_operand:VMUL 0 "register_operand" "=w")
1848 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1849 (match_operand:VMUL 2 "register_operand" "w")
1850 (match_operand:VMUL 3 "register_operand" "0")))]
1852 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1853 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1856 (define_insn "*aarch64_fma4_elt_to_64v2df"
1857 [(set (match_operand:DF 0 "register_operand" "=w")
1860 (match_operand:V2DF 1 "register_operand" "w")
1861 (parallel [(match_operand:SI 2 "immediate_operand")]))
1862 (match_operand:DF 3 "register_operand" "w")
1863 (match_operand:DF 4 "register_operand" "0")))]
1866 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1867 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1869 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1872 (define_insn "fnma<mode>4"
1873 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1875 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1876 (match_operand:VHSDF 2 "register_operand" "w")
1877 (match_operand:VHSDF 3 "register_operand" "0")))]
1879 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1880 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1883 (define_insn "*aarch64_fnma4_elt<mode>"
1884 [(set (match_operand:VDQF 0 "register_operand" "=w")
1887 (match_operand:VDQF 3 "register_operand" "w"))
1890 (match_operand:VDQF 1 "register_operand" "<h_con>")
1891 (parallel [(match_operand:SI 2 "immediate_operand")])))
1892 (match_operand:VDQF 4 "register_operand" "0")))]
1895 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1896 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1898 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1901 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1902 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1905 (match_operand:VDQSF 3 "register_operand" "w"))
1906 (vec_duplicate:VDQSF
1908 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1909 (parallel [(match_operand:SI 2 "immediate_operand")])))
1910 (match_operand:VDQSF 4 "register_operand" "0")))]
1913 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1914 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1916 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1919 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1920 [(set (match_operand:VMUL 0 "register_operand" "=w")
1923 (match_operand:VMUL 2 "register_operand" "w"))
1925 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1926 (match_operand:VMUL 3 "register_operand" "0")))]
1928 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1929 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1932 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1933 [(set (match_operand:DF 0 "register_operand" "=w")
1936 (match_operand:V2DF 1 "register_operand" "w")
1937 (parallel [(match_operand:SI 2 "immediate_operand")]))
1939 (match_operand:DF 3 "register_operand" "w"))
1940 (match_operand:DF 4 "register_operand" "0")))]
1943 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1944 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1946 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1949 ;; Vector versions of the floating-point frint patterns.
1950 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1951 (define_insn "<frint_pattern><mode>2"
1952 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1953 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1956 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1957 [(set_attr "type" "neon_fp_round_<stype><q>")]
1960 ;; Vector versions of the fcvt standard patterns.
1961 ;; Expands to lbtrunc, lround, lceil, lfloor
1962 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1963 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1964 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1965 [(match_operand:VHSDF 1 "register_operand" "w")]
1968 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1969 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1972 ;; HF Scalar variants of related SIMD instructions.
1973 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1974 [(set (match_operand:HI 0 "register_operand" "=w")
1975 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1977 "TARGET_SIMD_F16INST"
1978 "fcvt<frint_suffix><su>\t%h0, %h1"
1979 [(set_attr "type" "neon_fp_to_int_s")]
1982 (define_insn "<optab>_trunchfhi2"
1983 [(set (match_operand:HI 0 "register_operand" "=w")
1984 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1985 "TARGET_SIMD_F16INST"
1986 "fcvtz<su>\t%h0, %h1"
1987 [(set_attr "type" "neon_fp_to_int_s")]
1990 (define_insn "<optab>hihf2"
1991 [(set (match_operand:HF 0 "register_operand" "=w")
1992 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1993 "TARGET_SIMD_F16INST"
1994 "<su_optab>cvtf\t%h0, %h1"
1995 [(set_attr "type" "neon_int_to_fp_s")]
1998 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1999 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2000 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2002 (match_operand:VDQF 1 "register_operand" "w")
2003 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
2006 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
2007 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
2009 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
2011 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
2012 output_asm_insn (buf, operands);
2015 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
2018 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
2019 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2020 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2021 [(match_operand:VHSDF 1 "register_operand")]
2026 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2027 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2028 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2029 [(match_operand:VHSDF 1 "register_operand")]
2034 (define_expand "ftrunc<VHSDF:mode>2"
2035 [(set (match_operand:VHSDF 0 "register_operand")
2036 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2041 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2042 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2044 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2046 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2047 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2050 ;; Conversions between vectors of floats and doubles.
2051 ;; Contains a mix of patterns to match standard pattern names
2052 ;; and those for intrinsics.
2054 ;; Float widening operations.
2056 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2057 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2058 (float_extend:<VWIDE> (vec_select:<VHALF>
2059 (match_operand:VQ_HSF 1 "register_operand" "w")
2060 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2063 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2064 [(set_attr "type" "neon_fp_cvt_widen_s")]
2067 ;; Convert between fixed-point and floating-point (vector modes)
2069 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2070 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2071 (unspec:<VHSDF:FCVT_TARGET>
2072 [(match_operand:VHSDF 1 "register_operand" "w")
2073 (match_operand:SI 2 "immediate_operand" "i")]
2076 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2077 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2080 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2081 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2082 (unspec:<VDQ_HSDI:FCVT_TARGET>
2083 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2084 (match_operand:SI 2 "immediate_operand" "i")]
2087 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2088 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2091 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2092 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2093 ;; the meaning of HI and LO changes depending on the target endianness.
2094 ;; While elsewhere we map the higher numbered elements of a vector to
2095 ;; the lower architectural lanes of the vector, for these patterns we want
2096 ;; to always treat "hi" as referring to the higher architectural lanes.
2097 ;; Consequently, while the patterns below look inconsistent with our
2098 ;; other big-endian patterns their behavior is as required.
2100 (define_expand "vec_unpacks_lo_<mode>"
2101 [(match_operand:<VWIDE> 0 "register_operand" "")
2102 (match_operand:VQ_HSF 1 "register_operand" "")]
2105 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2106 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2112 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2113 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2114 (float_extend:<VWIDE> (vec_select:<VHALF>
2115 (match_operand:VQ_HSF 1 "register_operand" "w")
2116 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2119 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2120 [(set_attr "type" "neon_fp_cvt_widen_s")]
2123 (define_expand "vec_unpacks_hi_<mode>"
2124 [(match_operand:<VWIDE> 0 "register_operand" "")
2125 (match_operand:VQ_HSF 1 "register_operand" "")]
2128 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2129 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2134 (define_insn "aarch64_float_extend_lo_<Vwide>"
2135 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2136 (float_extend:<VWIDE>
2137 (match_operand:VDF 1 "register_operand" "w")))]
2139 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2140 [(set_attr "type" "neon_fp_cvt_widen_s")]
2143 ;; Float narrowing operations.
2145 (define_insn "aarch64_float_truncate_lo_<mode>"
2146 [(set (match_operand:VDF 0 "register_operand" "=w")
2148 (match_operand:<VWIDE> 1 "register_operand" "w")))]
2150 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2151 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2154 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2155 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2157 (match_operand:VDF 1 "register_operand" "0")
2159 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2160 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2161 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2162 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2165 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2166 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2169 (match_operand:<VWIDE> 2 "register_operand" "w"))
2170 (match_operand:VDF 1 "register_operand" "0")))]
2171 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2172 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2173 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2176 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2177 [(match_operand:<VDBL> 0 "register_operand" "=w")
2178 (match_operand:VDF 1 "register_operand" "0")
2179 (match_operand:<VWIDE> 2 "register_operand" "w")]
2182 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2183 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2184 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2185 emit_insn (gen (operands[0], operands[1], operands[2]));
2190 (define_expand "vec_pack_trunc_v2df"
2191 [(set (match_operand:V4SF 0 "register_operand")
2193 (float_truncate:V2SF
2194 (match_operand:V2DF 1 "register_operand"))
2195 (float_truncate:V2SF
2196 (match_operand:V2DF 2 "register_operand"))
2200 rtx tmp = gen_reg_rtx (V2SFmode);
2201 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2202 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2204 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2205 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2206 tmp, operands[hi]));
2211 (define_expand "vec_pack_trunc_df"
2212 [(set (match_operand:V2SF 0 "register_operand")
2215 (match_operand:DF 1 "register_operand"))
2217 (match_operand:DF 2 "register_operand"))
2221 rtx tmp = gen_reg_rtx (V2SFmode);
2222 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2223 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2225 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2226 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2227 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2233 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2235 ;; a = (b < c) ? b : c;
2236 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
2237 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
2240 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2241 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2242 ;; operand will be returned when both operands are zero (i.e. they may not
2243 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2244 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2247 (define_insn "<su><maxmin><mode>3"
2248 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2249 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2250 (match_operand:VHSDF 2 "register_operand" "w")))]
2252 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2253 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2256 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2257 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2258 ;; which implement the IEEE fmax ()/fmin () functions.
2259 (define_insn "<maxmin_uns><mode>3"
2260 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2261 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2262 (match_operand:VHSDF 2 "register_operand" "w")]
2265 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2266 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2269 ;; 'across lanes' add.
2271 (define_expand "reduc_plus_scal_<mode>"
2272 [(match_operand:<VEL> 0 "register_operand" "=w")
2273 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2277 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2278 rtx scratch = gen_reg_rtx (<MODE>mode);
2279 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2280 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2285 (define_insn "aarch64_faddp<mode>"
2286 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2287 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2288 (match_operand:VHSDF 2 "register_operand" "w")]
2291 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2292 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2295 (define_insn "aarch64_reduc_plus_internal<mode>"
2296 [(set (match_operand:VDQV 0 "register_operand" "=w")
2297 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2300 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2301 [(set_attr "type" "neon_reduc_add<q>")]
2304 (define_insn "aarch64_reduc_plus_internalv2si"
2305 [(set (match_operand:V2SI 0 "register_operand" "=w")
2306 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2309 "addp\\t%0.2s, %1.2s, %1.2s"
2310 [(set_attr "type" "neon_reduc_add")]
2313 (define_insn "reduc_plus_scal_<mode>"
2314 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2315 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2318 "faddp\\t%<Vetype>0, %1.<Vtype>"
2319 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2322 (define_expand "reduc_plus_scal_v4sf"
2323 [(set (match_operand:SF 0 "register_operand")
2324 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2328 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2329 rtx scratch = gen_reg_rtx (V4SFmode);
2330 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2331 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2332 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2336 (define_insn "clrsb<mode>2"
2337 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2338 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2340 "cls\\t%0.<Vtype>, %1.<Vtype>"
2341 [(set_attr "type" "neon_cls<q>")]
2344 (define_insn "clz<mode>2"
2345 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2346 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2348 "clz\\t%0.<Vtype>, %1.<Vtype>"
2349 [(set_attr "type" "neon_cls<q>")]
2352 (define_insn "popcount<mode>2"
2353 [(set (match_operand:VB 0 "register_operand" "=w")
2354 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2356 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2357 [(set_attr "type" "neon_cnt<q>")]
2360 ;; 'across lanes' max and min ops.
2362 ;; Template for outputting a scalar, so we can create __builtins which can be
2363 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
2364 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2365 [(match_operand:<VEL> 0 "register_operand")
2366 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2370 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2371 rtx scratch = gen_reg_rtx (<MODE>mode);
2372 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2374 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2379 ;; Likewise for integer cases, signed and unsigned.
2380 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2381 [(match_operand:<VEL> 0 "register_operand")
2382 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2386 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2387 rtx scratch = gen_reg_rtx (<MODE>mode);
2388 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2390 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2395 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2396 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2397 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2400 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2401 [(set_attr "type" "neon_reduc_minmax<q>")]
2404 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2405 [(set (match_operand:V2SI 0 "register_operand" "=w")
2406 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2409 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2410 [(set_attr "type" "neon_reduc_minmax")]
2413 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2414 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2415 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2418 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2419 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2422 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2424 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2427 ;; Thus our BSL is of the form:
2428 ;; op0 = bsl (mask, op2, op3)
2429 ;; We can use any of:
2432 ;; bsl mask, op1, op2
2433 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2434 ;; bit op0, op2, mask
2435 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2436 ;; bif op0, op1, mask
2438 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2439 ;; Some forms of straight-line code may generate the equivalent form
2440 ;; in *aarch64_simd_bsl<mode>_alt.
2442 (define_insn "aarch64_simd_bsl<mode>_internal"
2443 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2447 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2448 (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2449 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2450 (match_dup:<V_INT_EQUIV> 3)
2454 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2455 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2456 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2457 [(set_attr "type" "neon_bsl<q>")]
2460 ;; We need this form in addition to the above pattern to match the case
2461 ;; when combine tries merging three insns such that the second operand of
2462 ;; the outer XOR matches the second operand of the inner XOR rather than
2463 ;; the first. The two are equivalent but since recog doesn't try all
2464 ;; permutations of commutative operations, we have to have a separate pattern.
2466 (define_insn "*aarch64_simd_bsl<mode>_alt"
2467 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2471 (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2472 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2473 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2474 (match_dup:<V_INT_EQUIV> 2)))]
2477 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2478 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2479 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2480 [(set_attr "type" "neon_bsl<q>")]
2483 ;; DImode is special, we want to avoid computing operations which are
2484 ;; more naturally computed in general purpose registers in the vector
2485 ;; registers. If we do that, we need to move all three operands from general
2486 ;; purpose registers to vector registers, then back again. However, we
2487 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2488 ;; optimizations based on the component operations of a BSL.
2490 ;; That means we need a splitter back to the individual operations, if they
2491 ;; would be better calculated on the integer side.
2493 (define_insn_and_split "aarch64_simd_bsldi_internal"
2494 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2498 (match_operand:DI 3 "register_operand" "w,0,w,r")
2499 (match_operand:DI 2 "register_operand" "w,w,0,r"))
2500 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2505 bsl\\t%0.8b, %2.8b, %3.8b
2506 bit\\t%0.8b, %2.8b, %1.8b
2507 bif\\t%0.8b, %3.8b, %1.8b
2509 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2510 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2512 /* Split back to individual operations. If we're before reload, and
2513 able to create a temporary register, do so. If we're after reload,
2514 we've got an early-clobber destination register, so use that.
2515 Otherwise, we can't create pseudos and we can't yet guarantee that
2516 operands[0] is safe to write, so FAIL to split. */
2519 if (reload_completed)
2520 scratch = operands[0];
2521 else if (can_create_pseudo_p ())
2522 scratch = gen_reg_rtx (DImode);
2526 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2527 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2528 emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2531 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2532 (set_attr "length" "4,4,4,12")]
2535 (define_insn_and_split "aarch64_simd_bsldi_alt"
2536 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2540 (match_operand:DI 3 "register_operand" "w,w,0,r")
2541 (match_operand:DI 2 "register_operand" "w,0,w,r"))
2542 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2547 bsl\\t%0.8b, %3.8b, %2.8b
2548 bit\\t%0.8b, %3.8b, %1.8b
2549 bif\\t%0.8b, %2.8b, %1.8b
2551 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2552 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2554 /* Split back to individual operations. If we're before reload, and
2555 able to create a temporary register, do so. If we're after reload,
2556 we've got an early-clobber destination register, so use that.
2557 Otherwise, we can't create pseudos and we can't yet guarantee that
2558 operands[0] is safe to write, so FAIL to split. */
2561 if (reload_completed)
2562 scratch = operands[0];
2563 else if (can_create_pseudo_p ())
2564 scratch = gen_reg_rtx (DImode);
2568 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2569 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2570 emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2573 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2574 (set_attr "length" "4,4,4,12")]
2577 (define_expand "aarch64_simd_bsl<mode>"
2578 [(match_operand:VALLDIF 0 "register_operand")
2579 (match_operand:<V_INT_EQUIV> 1 "register_operand")
2580 (match_operand:VALLDIF 2 "register_operand")
2581 (match_operand:VALLDIF 3 "register_operand")]
2584 /* We can't alias operands together if they have different modes. */
2585 rtx tmp = operands[0];
2586 if (FLOAT_MODE_P (<MODE>mode))
2588 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2589 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2590 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2592 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2593 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2597 if (tmp != operands[0])
2598 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2603 (define_expand "vcond_mask_<mode><v_int_equiv>"
2604 [(match_operand:VALLDI 0 "register_operand")
2605 (match_operand:VALLDI 1 "nonmemory_operand")
2606 (match_operand:VALLDI 2 "nonmemory_operand")
2607 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2610 /* If we have (a = (P) ? -1 : 0);
2611 Then we can simply move the generated mask (result must be int). */
2612 if (operands[1] == CONSTM1_RTX (<MODE>mode)
2613 && operands[2] == CONST0_RTX (<MODE>mode))
2614 emit_move_insn (operands[0], operands[3]);
2615 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
2616 else if (operands[1] == CONST0_RTX (<MODE>mode)
2617 && operands[2] == CONSTM1_RTX (<MODE>mode))
2618 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2621 if (!REG_P (operands[1]))
2622 operands[1] = force_reg (<MODE>mode, operands[1]);
2623 if (!REG_P (operands[2]))
2624 operands[2] = force_reg (<MODE>mode, operands[2]);
2625 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2626 operands[1], operands[2]));
2632 ;; Patterns comparing two vectors to produce a mask.
2634 (define_expand "vec_cmp<mode><mode>"
2635 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2636 (match_operator 1 "comparison_operator"
2637 [(match_operand:VSDQ_I_DI 2 "register_operand")
2638 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2641 rtx mask = operands[0];
2642 enum rtx_code code = GET_CODE (operands[1]);
2652 if (operands[3] == CONST0_RTX (<MODE>mode))
2657 if (!REG_P (operands[3]))
2658 operands[3] = force_reg (<MODE>mode, operands[3]);
2666 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2670 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2674 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2678 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2682 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2686 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2690 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2694 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2698 /* Handle NE as !EQ. */
2699 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2700 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2704 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2714 (define_expand "vec_cmp<mode><v_int_equiv>"
2715 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2716 (match_operator 1 "comparison_operator"
2717 [(match_operand:VDQF 2 "register_operand")
2718 (match_operand:VDQF 3 "nonmemory_operand")]))]
2721 int use_zero_form = 0;
2722 enum rtx_code code = GET_CODE (operands[1]);
2723 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2725 rtx (*comparison) (rtx, rtx, rtx) = NULL;
2734 if (operands[3] == CONST0_RTX (<MODE>mode))
2741 if (!REG_P (operands[3]))
2742 operands[3] = force_reg (<MODE>mode, operands[3]);
2752 comparison = gen_aarch64_cmlt<mode>;
2757 std::swap (operands[2], operands[3]);
2761 comparison = gen_aarch64_cmgt<mode>;
2766 comparison = gen_aarch64_cmle<mode>;
2771 std::swap (operands[2], operands[3]);
2775 comparison = gen_aarch64_cmge<mode>;
2779 comparison = gen_aarch64_cmeq<mode>;
2797 /* All of the above must not raise any FP exceptions. Thus we first
2798 check each operand for NaNs and force any elements containing NaN to
2799 zero before using them in the compare.
2800 Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2801 (cm<cc> (isnan (a) ? 0.0 : a,
2802 isnan (b) ? 0.0 : b))
2803 We use the following transformations for doing the comparisions:
2807 a UNLT b -> b GT a. */
2809 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2810 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2811 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2812 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2813 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2814 emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2815 emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2816 lowpart_subreg (<V_INT_EQUIV>mode,
2819 emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2820 lowpart_subreg (<V_INT_EQUIV>mode,
2823 gcc_assert (comparison != NULL);
2824 emit_insn (comparison (operands[0],
2825 lowpart_subreg (<MODE>mode,
2826 tmp0, <V_INT_EQUIV>mode),
2827 lowpart_subreg (<MODE>mode,
2828 tmp1, <V_INT_EQUIV>mode)));
2829 emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2839 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2840 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2846 a NE b -> ~(a EQ b) */
2847 gcc_assert (comparison != NULL);
2848 emit_insn (comparison (operands[0], operands[2], operands[3]));
2850 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2854 /* LTGT is not guranteed to not generate a FP exception. So let's
2855 go the faster way : ((a > b) || (b > a)). */
2856 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2857 operands[2], operands[3]));
2858 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2859 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2865 /* cmeq (a, a) & cmeq (b, b). */
2866 emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2867 operands[2], operands[2]));
2868 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2869 emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2871 if (code == UNORDERED)
2872 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2873 else if (code == UNEQ)
2875 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
2876 emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
2887 (define_expand "vec_cmpu<mode><mode>"
2888 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2889 (match_operator 1 "comparison_operator"
2890 [(match_operand:VSDQ_I_DI 2 "register_operand")
2891 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2894 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2895 operands[2], operands[3]));
2899 (define_expand "vcond<mode><mode>"
2900 [(set (match_operand:VALLDI 0 "register_operand")
2901 (if_then_else:VALLDI
2902 (match_operator 3 "comparison_operator"
2903 [(match_operand:VALLDI 4 "register_operand")
2904 (match_operand:VALLDI 5 "nonmemory_operand")])
2905 (match_operand:VALLDI 1 "nonmemory_operand")
2906 (match_operand:VALLDI 2 "nonmemory_operand")))]
2909 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2910 enum rtx_code code = GET_CODE (operands[3]);
2912 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2913 it as well as switch operands 1/2 in order to avoid the additional
2917 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2918 operands[4], operands[5]);
2919 std::swap (operands[1], operands[2]);
2921 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2922 operands[4], operands[5]));
2923 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2924 operands[2], mask));
2929 (define_expand "vcond<v_cmp_mixed><mode>"
2930 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2931 (if_then_else:<V_cmp_mixed>
2932 (match_operator 3 "comparison_operator"
2933 [(match_operand:VDQF_COND 4 "register_operand")
2934 (match_operand:VDQF_COND 5 "nonmemory_operand")])
2935 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2936 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2939 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2940 enum rtx_code code = GET_CODE (operands[3]);
2942 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2943 it as well as switch operands 1/2 in order to avoid the additional
2947 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2948 operands[4], operands[5]);
2949 std::swap (operands[1], operands[2]);
2951 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2952 operands[4], operands[5]));
2953 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
2954 operands[0], operands[1],
2955 operands[2], mask));
2960 (define_expand "vcondu<mode><mode>"
2961 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2962 (if_then_else:VSDQ_I_DI
2963 (match_operator 3 "comparison_operator"
2964 [(match_operand:VSDQ_I_DI 4 "register_operand")
2965 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2966 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2967 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2970 rtx mask = gen_reg_rtx (<MODE>mode);
2971 enum rtx_code code = GET_CODE (operands[3]);
2973 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2974 it as well as switch operands 1/2 in order to avoid the additional
2978 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2979 operands[4], operands[5]);
2980 std::swap (operands[1], operands[2]);
2982 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2983 operands[4], operands[5]));
2984 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2985 operands[2], mask));
2989 (define_expand "vcondu<mode><v_cmp_mixed>"
2990 [(set (match_operand:VDQF 0 "register_operand")
2992 (match_operator 3 "comparison_operator"
2993 [(match_operand:<V_cmp_mixed> 4 "register_operand")
2994 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2995 (match_operand:VDQF 1 "nonmemory_operand")
2996 (match_operand:VDQF 2 "nonmemory_operand")))]
2999 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3000 enum rtx_code code = GET_CODE (operands[3]);
3002 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3003 it as well as switch operands 1/2 in order to avoid the additional
3007 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3008 operands[4], operands[5]);
3009 std::swap (operands[1], operands[2]);
3011 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
3013 operands[4], operands[5]));
3014 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3015 operands[2], mask));
3019 ;; Patterns for AArch64 SIMD Intrinsics.
3021 ;; Lane extraction with sign extension to general purpose register.
3022 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3023 [(set (match_operand:GPI 0 "register_operand" "=r")
3026 (match_operand:VDQQH 1 "register_operand" "w")
3027 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3030 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3031 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3033 [(set_attr "type" "neon_to_gp<q>")]
3036 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
3037 [(set (match_operand:SI 0 "register_operand" "=r")
3040 (match_operand:VDQQH 1 "register_operand" "w")
3041 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3044 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3045 return "umov\\t%w0, %1.<Vetype>[%2]";
3047 [(set_attr "type" "neon_to_gp<q>")]
3050 ;; Lane extraction of a value, neither sign nor zero extension
3051 ;; is guaranteed so upper bits should be considered undefined.
3052 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3053 (define_insn "aarch64_get_lane<mode>"
3054 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3056 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3057 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3060 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3061 switch (which_alternative)
3064 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3066 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3068 return "st1\\t{%1.<Vetype>}[%2], %0";
3073 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3076 (define_insn "load_pair_lanes<mode>"
3077 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3079 (match_operand:VDC 1 "memory_operand" "Utq")
3080 (match_operand:VDC 2 "memory_operand" "m")))]
3081 "TARGET_SIMD && !STRICT_ALIGNMENT
3082 && rtx_equal_p (XEXP (operands[2], 0),
3083 plus_constant (Pmode,
3084 XEXP (operands[1], 0),
3085 GET_MODE_SIZE (<MODE>mode)))"
3087 [(set_attr "type" "neon_load1_1reg_q")]
3090 (define_insn "store_pair_lanes<mode>"
3091 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
3093 (match_operand:VDC 1 "register_operand" "w, r")
3094 (match_operand:VDC 2 "register_operand" "w, r")))]
3098 stp\\t%x1, %x2, %y0"
3099 [(set_attr "type" "neon_stp, store_16")]
3102 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3105 (define_insn "*aarch64_combinez<mode>"
3106 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3108 (match_operand:VDC 1 "general_operand" "w,?r,m")
3109 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3110 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3115 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3116 (set_attr "simd" "yes,*,yes")
3117 (set_attr "fp" "*,yes,*")]
3120 (define_insn "*aarch64_combinez_be<mode>"
3121 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3123 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3124 (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3125 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3130 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3131 (set_attr "simd" "yes,*,yes")
3132 (set_attr "fp" "*,yes,*")]
3135 (define_expand "aarch64_combine<mode>"
3136 [(match_operand:<VDBL> 0 "register_operand")
3137 (match_operand:VDC 1 "register_operand")
3138 (match_operand:VDC 2 "register_operand")]
3141 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3147 (define_expand "aarch64_simd_combine<mode>"
3148 [(match_operand:<VDBL> 0 "register_operand")
3149 (match_operand:VDC 1 "register_operand")
3150 (match_operand:VDC 2 "register_operand")]
3153 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3154 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3157 [(set_attr "type" "multiple")]
3160 ;; <su><addsub>l<q>.
3162 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3163 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3164 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3165 (match_operand:VQW 1 "register_operand" "w")
3166 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3167 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3168 (match_operand:VQW 2 "register_operand" "w")
3171 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3172 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3175 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3176 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3177 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3178 (match_operand:VQW 1 "register_operand" "w")
3179 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3180 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3181 (match_operand:VQW 2 "register_operand" "w")
3184 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3185 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3189 (define_expand "aarch64_saddl2<mode>"
3190 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3191 (match_operand:VQW 1 "register_operand" "w")
3192 (match_operand:VQW 2 "register_operand" "w")]
3195 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3196 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3201 (define_expand "aarch64_uaddl2<mode>"
3202 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3203 (match_operand:VQW 1 "register_operand" "w")
3204 (match_operand:VQW 2 "register_operand" "w")]
3207 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3208 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3213 (define_expand "aarch64_ssubl2<mode>"
3214 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3215 (match_operand:VQW 1 "register_operand" "w")
3216 (match_operand:VQW 2 "register_operand" "w")]
3219 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3220 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3225 (define_expand "aarch64_usubl2<mode>"
3226 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3227 (match_operand:VQW 1 "register_operand" "w")
3228 (match_operand:VQW 2 "register_operand" "w")]
3231 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3232 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3237 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3238 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3239 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3240 (match_operand:VD_BHSI 1 "register_operand" "w"))
3242 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3244 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3245 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3248 ;; <su><addsub>w<q>.
3250 (define_expand "widen_ssum<mode>3"
3251 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3252 (plus:<VDBLW> (sign_extend:<VDBLW>
3253 (match_operand:VQW 1 "register_operand" ""))
3254 (match_operand:<VDBLW> 2 "register_operand" "")))]
3257 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3258 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3260 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3262 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3267 (define_expand "widen_ssum<mode>3"
3268 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3269 (plus:<VWIDE> (sign_extend:<VWIDE>
3270 (match_operand:VD_BHSI 1 "register_operand" ""))
3271 (match_operand:<VWIDE> 2 "register_operand" "")))]
3274 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3278 (define_expand "widen_usum<mode>3"
3279 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3280 (plus:<VDBLW> (zero_extend:<VDBLW>
3281 (match_operand:VQW 1 "register_operand" ""))
3282 (match_operand:<VDBLW> 2 "register_operand" "")))]
3285 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3286 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3288 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3290 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3295 (define_expand "widen_usum<mode>3"
3296 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3297 (plus:<VWIDE> (zero_extend:<VWIDE>
3298 (match_operand:VD_BHSI 1 "register_operand" ""))
3299 (match_operand:<VWIDE> 2 "register_operand" "")))]
3302 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3306 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
3307 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3308 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3310 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3312 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3313 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3316 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
3317 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3318 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3321 (match_operand:VQW 2 "register_operand" "w")
3322 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3324 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3325 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3328 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
3329 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3330 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3333 (match_operand:VQW 2 "register_operand" "w")
3334 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3336 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3337 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3340 (define_expand "aarch64_saddw2<mode>"
3341 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3342 (match_operand:<VWIDE> 1 "register_operand" "w")
3343 (match_operand:VQW 2 "register_operand" "w")]
3346 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3347 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3352 (define_expand "aarch64_uaddw2<mode>"
3353 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3354 (match_operand:<VWIDE> 1 "register_operand" "w")
3355 (match_operand:VQW 2 "register_operand" "w")]
3358 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3359 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3365 (define_expand "aarch64_ssubw2<mode>"
3366 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3367 (match_operand:<VWIDE> 1 "register_operand" "w")
3368 (match_operand:VQW 2 "register_operand" "w")]
3371 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3372 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3377 (define_expand "aarch64_usubw2<mode>"
3378 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3379 (match_operand:<VWIDE> 1 "register_operand" "w")
3380 (match_operand:VQW 2 "register_operand" "w")]
3383 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3384 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3389 ;; <su><r>h<addsub>.
3391 (define_expand "<u>avg<mode>3_floor"
3392 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3393 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3394 (match_operand:VDQ_BHSI 2 "register_operand")]
3399 (define_expand "<u>avg<mode>3_ceil"
3400 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3401 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3402 (match_operand:VDQ_BHSI 2 "register_operand")]
3407 (define_insn "aarch64_<sur>h<addsub><mode>"
3408 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3409 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3410 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3413 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3414 [(set_attr "type" "neon_<addsub>_halve<q>")]
3417 ;; <r><addsub>hn<q>.
3419 (define_insn "aarch64_<sur><addsub>hn<mode>"
3420 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3421 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3422 (match_operand:VQN 2 "register_operand" "w")]
3425 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3426 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3429 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3430 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3431 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3432 (match_operand:VQN 2 "register_operand" "w")
3433 (match_operand:VQN 3 "register_operand" "w")]
3436 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3437 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3442 (define_insn "aarch64_pmul<mode>"
3443 [(set (match_operand:VB 0 "register_operand" "=w")
3444 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3445 (match_operand:VB 2 "register_operand" "w")]
3448 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3449 [(set_attr "type" "neon_mul_<Vetype><q>")]
3454 (define_insn "aarch64_fmulx<mode>"
3455 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3457 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3458 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3461 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3462 [(set_attr "type" "neon_fp_mul_<stype>")]
3465 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3467 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3468 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3470 [(match_operand:VDQSF 1 "register_operand" "w")
3471 (vec_duplicate:VDQSF
3473 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3474 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3478 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3479 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3481 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3484 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3486 (define_insn "*aarch64_mulx_elt<mode>"
3487 [(set (match_operand:VDQF 0 "register_operand" "=w")
3489 [(match_operand:VDQF 1 "register_operand" "w")
3492 (match_operand:VDQF 2 "register_operand" "w")
3493 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3497 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3498 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3500 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3505 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3506 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3508 [(match_operand:VHSDF 1 "register_operand" "w")
3509 (vec_duplicate:VHSDF
3510 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3513 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3514 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3517 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3518 ;; vmulxd_lane_f64 == vmulx_lane_f64
3519 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3521 (define_insn "*aarch64_vgetfmulx<mode>"
3522 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3524 [(match_operand:<VEL> 1 "register_operand" "w")
3526 (match_operand:VDQF 2 "register_operand" "w")
3527 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3531 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3532 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3534 [(set_attr "type" "fmul<Vetype>")]
3538 (define_insn "aarch64_<su_optab><optab><mode>"
3539 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3540 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3541 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3543 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3544 [(set_attr "type" "neon_<optab><q>")]
3547 ;; suqadd and usqadd
3549 (define_insn "aarch64_<sur>qadd<mode>"
3550 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3551 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3552 (match_operand:VSDQ_I 2 "register_operand" "w")]
3555 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3556 [(set_attr "type" "neon_qadd<q>")]
3561 (define_insn "aarch64_sqmovun<mode>"
3562 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3563 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3566 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3567 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3570 ;; sqmovn and uqmovn
3572 (define_insn "aarch64_<sur>qmovn<mode>"
3573 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3574 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3577 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3578 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3583 (define_insn "aarch64_s<optab><mode>"
3584 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3586 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3588 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3589 [(set_attr "type" "neon_<optab><q>")]
3594 (define_insn "aarch64_sq<r>dmulh<mode>"
3595 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3597 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3598 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3601 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3602 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3607 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3608 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3610 [(match_operand:VDQHS 1 "register_operand" "w")
3612 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3613 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3617 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3618 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3619 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3622 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3623 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3625 [(match_operand:VDQHS 1 "register_operand" "w")
3627 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3628 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3632 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3633 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3634 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3637 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3638 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3640 [(match_operand:SD_HSI 1 "register_operand" "w")
3642 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3643 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3647 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3648 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3649 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3652 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3653 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3655 [(match_operand:SD_HSI 1 "register_operand" "w")
3657 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3658 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3662 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3663 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3664 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3669 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3670 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3672 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3673 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3674 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3677 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3678 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3681 ;; sqrdml[as]h_lane.
3683 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3684 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3686 [(match_operand:VDQHS 1 "register_operand" "0")
3687 (match_operand:VDQHS 2 "register_operand" "w")
3689 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3690 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3694 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3696 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3698 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3701 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3702 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3704 [(match_operand:SD_HSI 1 "register_operand" "0")
3705 (match_operand:SD_HSI 2 "register_operand" "w")
3707 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3708 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3712 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3714 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3716 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3719 ;; sqrdml[as]h_laneq.
3721 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3722 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3724 [(match_operand:VDQHS 1 "register_operand" "0")
3725 (match_operand:VDQHS 2 "register_operand" "w")
3727 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3728 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3732 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3734 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3736 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3739 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3740 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3742 [(match_operand:SD_HSI 1 "register_operand" "0")
3743 (match_operand:SD_HSI 2 "register_operand" "w")
3745 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3746 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3750 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3752 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3754 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3759 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3760 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3762 (match_operand:<VWIDE> 1 "register_operand" "0")
3765 (sign_extend:<VWIDE>
3766 (match_operand:VSD_HSI 2 "register_operand" "w"))
3767 (sign_extend:<VWIDE>
3768 (match_operand:VSD_HSI 3 "register_operand" "w")))
3771 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3772 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3777 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3778 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3780 (match_operand:<VWIDE> 1 "register_operand" "0")
3783 (sign_extend:<VWIDE>
3784 (match_operand:VD_HSI 2 "register_operand" "w"))
3785 (sign_extend:<VWIDE>
3786 (vec_duplicate:VD_HSI
3788 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3789 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3794 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3796 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3798 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3801 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3802 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3804 (match_operand:<VWIDE> 1 "register_operand" "0")
3807 (sign_extend:<VWIDE>
3808 (match_operand:VD_HSI 2 "register_operand" "w"))
3809 (sign_extend:<VWIDE>
3810 (vec_duplicate:VD_HSI
3812 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3813 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3818 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3820 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3822 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3825 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3826 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3828 (match_operand:<VWIDE> 1 "register_operand" "0")
3831 (sign_extend:<VWIDE>
3832 (match_operand:SD_HSI 2 "register_operand" "w"))
3833 (sign_extend:<VWIDE>
3835 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3836 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3841 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3843 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3845 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3848 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3849 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3851 (match_operand:<VWIDE> 1 "register_operand" "0")
3854 (sign_extend:<VWIDE>
3855 (match_operand:SD_HSI 2 "register_operand" "w"))
3856 (sign_extend:<VWIDE>
3858 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3859 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3864 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3866 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3868 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3873 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3874 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3876 (match_operand:<VWIDE> 1 "register_operand" "0")
3879 (sign_extend:<VWIDE>
3880 (match_operand:VD_HSI 2 "register_operand" "w"))
3881 (sign_extend:<VWIDE>
3882 (vec_duplicate:VD_HSI
3883 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3886 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3887 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3892 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3893 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3895 (match_operand:<VWIDE> 1 "register_operand" "0")
3898 (sign_extend:<VWIDE>
3900 (match_operand:VQ_HSI 2 "register_operand" "w")
3901 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3902 (sign_extend:<VWIDE>
3904 (match_operand:VQ_HSI 3 "register_operand" "w")
3908 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3909 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3912 (define_expand "aarch64_sqdmlal2<mode>"
3913 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3914 (match_operand:<VWIDE> 1 "register_operand" "w")
3915 (match_operand:VQ_HSI 2 "register_operand" "w")
3916 (match_operand:VQ_HSI 3 "register_operand" "w")]
3919 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3920 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3921 operands[2], operands[3], p));
3925 (define_expand "aarch64_sqdmlsl2<mode>"
3926 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3927 (match_operand:<VWIDE> 1 "register_operand" "w")
3928 (match_operand:VQ_HSI 2 "register_operand" "w")
3929 (match_operand:VQ_HSI 3 "register_operand" "w")]
3932 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3933 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3934 operands[2], operands[3], p));
3940 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3941 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3943 (match_operand:<VWIDE> 1 "register_operand" "0")
3946 (sign_extend:<VWIDE>
3948 (match_operand:VQ_HSI 2 "register_operand" "w")
3949 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3950 (sign_extend:<VWIDE>
3951 (vec_duplicate:<VHALF>
3953 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3954 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3959 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3961 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3963 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3966 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3967 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3969 (match_operand:<VWIDE> 1 "register_operand" "0")
3972 (sign_extend:<VWIDE>
3974 (match_operand:VQ_HSI 2 "register_operand" "w")
3975 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3976 (sign_extend:<VWIDE>
3977 (vec_duplicate:<VHALF>
3979 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3980 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3985 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3987 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3989 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3992 (define_expand "aarch64_sqdmlal2_lane<mode>"
3993 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3994 (match_operand:<VWIDE> 1 "register_operand" "w")
3995 (match_operand:VQ_HSI 2 "register_operand" "w")
3996 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3997 (match_operand:SI 4 "immediate_operand" "i")]
4000 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4001 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
4002 operands[2], operands[3],
4007 (define_expand "aarch64_sqdmlal2_laneq<mode>"
4008 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4009 (match_operand:<VWIDE> 1 "register_operand" "w")
4010 (match_operand:VQ_HSI 2 "register_operand" "w")
4011 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4012 (match_operand:SI 4 "immediate_operand" "i")]
4015 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4016 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
4017 operands[2], operands[3],
4022 (define_expand "aarch64_sqdmlsl2_lane<mode>"
4023 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4024 (match_operand:<VWIDE> 1 "register_operand" "w")
4025 (match_operand:VQ_HSI 2 "register_operand" "w")
4026 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4027 (match_operand:SI 4 "immediate_operand" "i")]
4030 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4031 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
4032 operands[2], operands[3],
4037 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
4038 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4039 (match_operand:<VWIDE> 1 "register_operand" "w")
4040 (match_operand:VQ_HSI 2 "register_operand" "w")
4041 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4042 (match_operand:SI 4 "immediate_operand" "i")]
4045 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4046 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4047 operands[2], operands[3],
4052 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4053 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4055 (match_operand:<VWIDE> 1 "register_operand" "0")
4058 (sign_extend:<VWIDE>
4060 (match_operand:VQ_HSI 2 "register_operand" "w")
4061 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4062 (sign_extend:<VWIDE>
4063 (vec_duplicate:<VHALF>
4064 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4067 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4068 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4071 (define_expand "aarch64_sqdmlal2_n<mode>"
4072 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4073 (match_operand:<VWIDE> 1 "register_operand" "w")
4074 (match_operand:VQ_HSI 2 "register_operand" "w")
4075 (match_operand:<VEL> 3 "register_operand" "w")]
4078 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4079 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4080 operands[2], operands[3],
4085 (define_expand "aarch64_sqdmlsl2_n<mode>"
4086 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4087 (match_operand:<VWIDE> 1 "register_operand" "w")
4088 (match_operand:VQ_HSI 2 "register_operand" "w")
4089 (match_operand:<VEL> 3 "register_operand" "w")]
4092 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4093 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4094 operands[2], operands[3],
4101 (define_insn "aarch64_sqdmull<mode>"
4102 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4105 (sign_extend:<VWIDE>
4106 (match_operand:VSD_HSI 1 "register_operand" "w"))
4107 (sign_extend:<VWIDE>
4108 (match_operand:VSD_HSI 2 "register_operand" "w")))
4111 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4112 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4117 (define_insn "aarch64_sqdmull_lane<mode>"
4118 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4121 (sign_extend:<VWIDE>
4122 (match_operand:VD_HSI 1 "register_operand" "w"))
4123 (sign_extend:<VWIDE>
4124 (vec_duplicate:VD_HSI
4126 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4127 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4132 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4133 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4135 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4138 (define_insn "aarch64_sqdmull_laneq<mode>"
4139 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4142 (sign_extend:<VWIDE>
4143 (match_operand:VD_HSI 1 "register_operand" "w"))
4144 (sign_extend:<VWIDE>
4145 (vec_duplicate:VD_HSI
4147 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4148 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4153 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4154 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4156 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4159 (define_insn "aarch64_sqdmull_lane<mode>"
4160 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4163 (sign_extend:<VWIDE>
4164 (match_operand:SD_HSI 1 "register_operand" "w"))
4165 (sign_extend:<VWIDE>
4167 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4168 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4173 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4174 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4176 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4179 (define_insn "aarch64_sqdmull_laneq<mode>"
4180 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4183 (sign_extend:<VWIDE>
4184 (match_operand:SD_HSI 1 "register_operand" "w"))
4185 (sign_extend:<VWIDE>
4187 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4188 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4193 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4194 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4196 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4201 (define_insn "aarch64_sqdmull_n<mode>"
4202 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4205 (sign_extend:<VWIDE>
4206 (match_operand:VD_HSI 1 "register_operand" "w"))
4207 (sign_extend:<VWIDE>
4208 (vec_duplicate:VD_HSI
4209 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4213 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4214 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4221 (define_insn "aarch64_sqdmull2<mode>_internal"
4222 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4225 (sign_extend:<VWIDE>
4227 (match_operand:VQ_HSI 1 "register_operand" "w")
4228 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4229 (sign_extend:<VWIDE>
4231 (match_operand:VQ_HSI 2 "register_operand" "w")
4236 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4237 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4240 (define_expand "aarch64_sqdmull2<mode>"
4241 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4242 (match_operand:VQ_HSI 1 "register_operand" "w")
4243 (match_operand:VQ_HSI 2 "register_operand" "w")]
4246 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4247 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4254 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4255 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4258 (sign_extend:<VWIDE>
4260 (match_operand:VQ_HSI 1 "register_operand" "w")
4261 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4262 (sign_extend:<VWIDE>
4263 (vec_duplicate:<VHALF>
4265 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4266 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4271 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4272 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4274 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4277 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4278 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4281 (sign_extend:<VWIDE>
4283 (match_operand:VQ_HSI 1 "register_operand" "w")
4284 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4285 (sign_extend:<VWIDE>
4286 (vec_duplicate:<VHALF>
4288 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4289 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4294 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4295 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4297 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4300 (define_expand "aarch64_sqdmull2_lane<mode>"
4301 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4302 (match_operand:VQ_HSI 1 "register_operand" "w")
4303 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4304 (match_operand:SI 3 "immediate_operand" "i")]
4307 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4308 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4309 operands[2], operands[3],
4314 (define_expand "aarch64_sqdmull2_laneq<mode>"
4315 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4316 (match_operand:VQ_HSI 1 "register_operand" "w")
4317 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4318 (match_operand:SI 3 "immediate_operand" "i")]
4321 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4322 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4323 operands[2], operands[3],
4330 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4331 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4334 (sign_extend:<VWIDE>
4336 (match_operand:VQ_HSI 1 "register_operand" "w")
4337 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4338 (sign_extend:<VWIDE>
4339 (vec_duplicate:<VHALF>
4340 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4344 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4345 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4348 (define_expand "aarch64_sqdmull2_n<mode>"
4349 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4350 (match_operand:VQ_HSI 1 "register_operand" "w")
4351 (match_operand:<VEL> 2 "register_operand" "w")]
4354 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4355 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4362 (define_insn "aarch64_<sur>shl<mode>"
4363 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4365 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4366 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4369 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4370 [(set_attr "type" "neon_shift_reg<q>")]
4376 (define_insn "aarch64_<sur>q<r>shl<mode>"
4377 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4379 [(match_operand:VSDQ_I 1 "register_operand" "w")
4380 (match_operand:VSDQ_I 2 "register_operand" "w")]
4383 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4384 [(set_attr "type" "neon_sat_shift_reg<q>")]
4389 (define_insn "aarch64_<sur>shll_n<mode>"
4390 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4391 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4393 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4397 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4398 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4400 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4402 [(set_attr "type" "neon_shift_imm_long")]
4407 (define_insn "aarch64_<sur>shll2_n<mode>"
4408 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4409 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4410 (match_operand:SI 2 "immediate_operand" "i")]
4414 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4415 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4417 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4419 [(set_attr "type" "neon_shift_imm_long")]
4424 (define_insn "aarch64_<sur>shr_n<mode>"
4425 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4426 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4428 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4431 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4432 [(set_attr "type" "neon_sat_shift_imm<q>")]
4437 (define_insn "aarch64_<sur>sra_n<mode>"
4438 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4439 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4440 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4442 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4445 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4446 [(set_attr "type" "neon_shift_acc<q>")]
4451 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4452 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4453 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4454 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4456 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4459 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4460 [(set_attr "type" "neon_shift_imm<q>")]
4465 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4466 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4467 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4469 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4472 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4473 [(set_attr "type" "neon_sat_shift_imm<q>")]
4479 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4480 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4481 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4483 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4486 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4487 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4491 ;; cm(eq|ge|gt|lt|le)
4492 ;; Note, we have constraints for Dz and Z as different expanders
4493 ;; have different ideas of what should be passed to this pattern.
4495 (define_insn "aarch64_cm<optab><mode>"
4496 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4498 (COMPARISONS:<V_INT_EQUIV>
4499 (match_operand:VDQ_I 1 "register_operand" "w,w")
4500 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4504 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4505 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4506 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4509 (define_insn_and_split "aarch64_cm<optab>di"
4510 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4513 (match_operand:DI 1 "register_operand" "w,w,r")
4514 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4516 (clobber (reg:CC CC_REGNUM))]
4519 "&& reload_completed"
4520 [(set (match_operand:DI 0 "register_operand")
4523 (match_operand:DI 1 "register_operand")
4524 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4527 /* If we are in the general purpose register file,
4528 we split to a sequence of comparison and store. */
4529 if (GP_REGNUM_P (REGNO (operands[0]))
4530 && GP_REGNUM_P (REGNO (operands[1])))
4532 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4533 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4534 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4535 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4538 /* Otherwise, we expand to a similar pattern which does not
4539 clobber CC_REGNUM. */
4541 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4544 (define_insn "*aarch64_cm<optab>di"
4545 [(set (match_operand:DI 0 "register_operand" "=w,w")
4548 (match_operand:DI 1 "register_operand" "w,w")
4549 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4551 "TARGET_SIMD && reload_completed"
4553 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4554 cm<optab>\t%d0, %d1, #0"
4555 [(set_attr "type" "neon_compare, neon_compare_zero")]
4560 (define_insn "aarch64_cm<optab><mode>"
4561 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4563 (UCOMPARISONS:<V_INT_EQUIV>
4564 (match_operand:VDQ_I 1 "register_operand" "w")
4565 (match_operand:VDQ_I 2 "register_operand" "w")
4568 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4569 [(set_attr "type" "neon_compare<q>")]
4572 (define_insn_and_split "aarch64_cm<optab>di"
4573 [(set (match_operand:DI 0 "register_operand" "=w,r")
4576 (match_operand:DI 1 "register_operand" "w,r")
4577 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4579 (clobber (reg:CC CC_REGNUM))]
4582 "&& reload_completed"
4583 [(set (match_operand:DI 0 "register_operand")
4586 (match_operand:DI 1 "register_operand")
4587 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4590 /* If we are in the general purpose register file,
4591 we split to a sequence of comparison and store. */
4592 if (GP_REGNUM_P (REGNO (operands[0]))
4593 && GP_REGNUM_P (REGNO (operands[1])))
4595 machine_mode mode = CCmode;
4596 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4597 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4598 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4601 /* Otherwise, we expand to a similar pattern which does not
4602 clobber CC_REGNUM. */
4604 [(set_attr "type" "neon_compare,multiple")]
4607 (define_insn "*aarch64_cm<optab>di"
4608 [(set (match_operand:DI 0 "register_operand" "=w")
4611 (match_operand:DI 1 "register_operand" "w")
4612 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4614 "TARGET_SIMD && reload_completed"
4615 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4616 [(set_attr "type" "neon_compare")]
4621 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4622 ;; we don't have any insns using ne, and aarch64_vcond outputs
4623 ;; not (neg (eq (and x y) 0))
4624 ;; which is rewritten by simplify_rtx as
4625 ;; plus (eq (and x y) 0) -1.
4627 (define_insn "aarch64_cmtst<mode>"
4628 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4632 (match_operand:VDQ_I 1 "register_operand" "w")
4633 (match_operand:VDQ_I 2 "register_operand" "w"))
4634 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4635 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4638 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4639 [(set_attr "type" "neon_tst<q>")]
4642 (define_insn_and_split "aarch64_cmtstdi"
4643 [(set (match_operand:DI 0 "register_operand" "=w,r")
4647 (match_operand:DI 1 "register_operand" "w,r")
4648 (match_operand:DI 2 "register_operand" "w,r"))
4650 (clobber (reg:CC CC_REGNUM))]
4653 "&& reload_completed"
4654 [(set (match_operand:DI 0 "register_operand")
4658 (match_operand:DI 1 "register_operand")
4659 (match_operand:DI 2 "register_operand"))
4662 /* If we are in the general purpose register file,
4663 we split to a sequence of comparison and store. */
4664 if (GP_REGNUM_P (REGNO (operands[0]))
4665 && GP_REGNUM_P (REGNO (operands[1])))
4667 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4668 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4669 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4670 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4671 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4674 /* Otherwise, we expand to a similar pattern which does not
4675 clobber CC_REGNUM. */
4677 [(set_attr "type" "neon_tst,multiple")]
4680 (define_insn "*aarch64_cmtstdi"
4681 [(set (match_operand:DI 0 "register_operand" "=w")
4685 (match_operand:DI 1 "register_operand" "w")
4686 (match_operand:DI 2 "register_operand" "w"))
4689 "cmtst\t%d0, %d1, %d2"
4690 [(set_attr "type" "neon_tst")]
4693 ;; fcm(eq|ge|gt|le|lt)
4695 (define_insn "aarch64_cm<optab><mode>"
4696 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4698 (COMPARISONS:<V_INT_EQUIV>
4699 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4700 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4704 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4705 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4706 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4710 ;; Note we can also handle what would be fac(le|lt) by
4711 ;; generating fac(ge|gt).
4713 (define_insn "aarch64_fac<optab><mode>"
4714 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4716 (FAC_COMPARISONS:<V_INT_EQUIV>
4718 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4720 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4723 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4724 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4729 (define_insn "aarch64_addp<mode>"
4730 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4732 [(match_operand:VD_BHSI 1 "register_operand" "w")
4733 (match_operand:VD_BHSI 2 "register_operand" "w")]
4736 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4737 [(set_attr "type" "neon_reduc_add<q>")]
4740 (define_insn "aarch64_addpdi"
4741 [(set (match_operand:DI 0 "register_operand" "=w")
4743 [(match_operand:V2DI 1 "register_operand" "w")]
4747 [(set_attr "type" "neon_reduc_add")]
4752 (define_expand "sqrt<mode>2"
4753 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4754 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4757 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4761 (define_insn "*sqrt<mode>2"
4762 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4763 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4765 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4766 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4769 ;; Patterns for vector struct loads and stores.
4771 (define_insn "aarch64_simd_ld2<mode>"
4772 [(set (match_operand:OI 0 "register_operand" "=w")
4773 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4774 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4777 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4778 [(set_attr "type" "neon_load2_2reg<q>")]
4781 (define_insn "aarch64_simd_ld2r<mode>"
4782 [(set (match_operand:OI 0 "register_operand" "=w")
4783 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4784 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4787 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4788 [(set_attr "type" "neon_load2_all_lanes<q>")]
4791 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4792 [(set (match_operand:OI 0 "register_operand" "=w")
4793 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4794 (match_operand:OI 2 "register_operand" "0")
4795 (match_operand:SI 3 "immediate_operand" "i")
4796 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4800 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4801 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4803 [(set_attr "type" "neon_load2_one_lane")]
4806 (define_expand "vec_load_lanesoi<mode>"
4807 [(set (match_operand:OI 0 "register_operand" "=w")
4808 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4809 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4813 if (BYTES_BIG_ENDIAN)
4815 rtx tmp = gen_reg_rtx (OImode);
4816 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4817 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4818 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4821 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4825 (define_insn "aarch64_simd_st2<mode>"
4826 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4827 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4828 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4831 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4832 [(set_attr "type" "neon_store2_2reg<q>")]
4835 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4836 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4837 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4838 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4839 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4840 (match_operand:SI 2 "immediate_operand" "i")]
4844 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4845 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4847 [(set_attr "type" "neon_store2_one_lane<q>")]
4850 (define_expand "vec_store_lanesoi<mode>"
4851 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4852 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4853 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4857 if (BYTES_BIG_ENDIAN)
4859 rtx tmp = gen_reg_rtx (OImode);
4860 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4861 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4862 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4865 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4869 (define_insn "aarch64_simd_ld3<mode>"
4870 [(set (match_operand:CI 0 "register_operand" "=w")
4871 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4872 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4875 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4876 [(set_attr "type" "neon_load3_3reg<q>")]
4879 (define_insn "aarch64_simd_ld3r<mode>"
4880 [(set (match_operand:CI 0 "register_operand" "=w")
4881 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4882 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4885 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4886 [(set_attr "type" "neon_load3_all_lanes<q>")]
4889 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4890 [(set (match_operand:CI 0 "register_operand" "=w")
4891 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4892 (match_operand:CI 2 "register_operand" "0")
4893 (match_operand:SI 3 "immediate_operand" "i")
4894 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4898 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4899 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4901 [(set_attr "type" "neon_load3_one_lane")]
4904 (define_expand "vec_load_lanesci<mode>"
4905 [(set (match_operand:CI 0 "register_operand" "=w")
4906 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4907 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4911 if (BYTES_BIG_ENDIAN)
4913 rtx tmp = gen_reg_rtx (CImode);
4914 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4915 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4916 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4919 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4923 (define_insn "aarch64_simd_st3<mode>"
4924 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4925 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4926 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4929 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4930 [(set_attr "type" "neon_store3_3reg<q>")]
4933 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4934 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4935 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4936 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4937 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4938 (match_operand:SI 2 "immediate_operand" "i")]
4942 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4943 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4945 [(set_attr "type" "neon_store3_one_lane<q>")]
4948 (define_expand "vec_store_lanesci<mode>"
4949 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4950 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4951 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4955 if (BYTES_BIG_ENDIAN)
4957 rtx tmp = gen_reg_rtx (CImode);
4958 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4959 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4960 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4963 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4967 (define_insn "aarch64_simd_ld4<mode>"
4968 [(set (match_operand:XI 0 "register_operand" "=w")
4969 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4970 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4973 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4974 [(set_attr "type" "neon_load4_4reg<q>")]
4977 (define_insn "aarch64_simd_ld4r<mode>"
4978 [(set (match_operand:XI 0 "register_operand" "=w")
4979 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4980 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4983 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4984 [(set_attr "type" "neon_load4_all_lanes<q>")]
4987 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4988 [(set (match_operand:XI 0 "register_operand" "=w")
4989 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4990 (match_operand:XI 2 "register_operand" "0")
4991 (match_operand:SI 3 "immediate_operand" "i")
4992 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4996 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4997 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4999 [(set_attr "type" "neon_load4_one_lane")]
5002 (define_expand "vec_load_lanesxi<mode>"
5003 [(set (match_operand:XI 0 "register_operand" "=w")
5004 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5005 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5009 if (BYTES_BIG_ENDIAN)
5011 rtx tmp = gen_reg_rtx (XImode);
5012 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5013 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
5014 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
5017 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
5021 (define_insn "aarch64_simd_st4<mode>"
5022 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5023 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5024 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5027 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5028 [(set_attr "type" "neon_store4_4reg<q>")]
5031 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5032 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
5033 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5034 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5035 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5036 (match_operand:SI 2 "immediate_operand" "i")]
5040 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5041 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5043 [(set_attr "type" "neon_store4_one_lane<q>")]
5046 (define_expand "vec_store_lanesxi<mode>"
5047 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5048 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5049 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5053 if (BYTES_BIG_ENDIAN)
5055 rtx tmp = gen_reg_rtx (XImode);
5056 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5057 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5058 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5061 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5065 (define_insn_and_split "aarch64_rev_reglist<mode>"
5066 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5068 [(match_operand:VSTRUCT 1 "register_operand" "w")
5069 (match_operand:V16QI 2 "register_operand" "w")]
5070 UNSPEC_REV_REGLIST))]
5073 "&& reload_completed"
5077 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5078 for (i = 0; i < nregs; i++)
5080 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5081 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5082 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5086 [(set_attr "type" "neon_tbl1_q")
5087 (set_attr "length" "<insn_count>")]
5090 ;; Reload patterns for AdvSIMD register list operands.
5092 (define_expand "mov<mode>"
5093 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
5094 (match_operand:VSTRUCT 1 "general_operand" ""))]
5097 if (can_create_pseudo_p ())
5099 if (GET_CODE (operands[0]) != REG)
5100 operands[1] = force_reg (<MODE>mode, operands[1]);
5105 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5106 [(match_operand:CI 0 "register_operand" "=w")
5107 (match_operand:DI 1 "register_operand" "r")
5108 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5111 rtx mem = gen_rtx_MEM (CImode, operands[1]);
5112 emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5116 (define_insn "aarch64_ld1_x3_<mode>"
5117 [(set (match_operand:CI 0 "register_operand" "=w")
5119 [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5120 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5122 "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5123 [(set_attr "type" "neon_load1_3reg<q>")]
5126 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5127 [(match_operand:DI 0 "register_operand" "")
5128 (match_operand:OI 1 "register_operand" "")
5129 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5132 rtx mem = gen_rtx_MEM (OImode, operands[0]);
5133 emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5137 (define_insn "aarch64_st1_x2_<mode>"
5138 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5140 [(match_operand:OI 1 "register_operand" "w")
5141 (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5143 "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5144 [(set_attr "type" "neon_store1_2reg<q>")]
5147 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5148 [(match_operand:DI 0 "register_operand" "")
5149 (match_operand:CI 1 "register_operand" "")
5150 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5153 rtx mem = gen_rtx_MEM (CImode, operands[0]);
5154 emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5158 (define_insn "aarch64_st1_x3_<mode>"
5159 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5161 [(match_operand:CI 1 "register_operand" "w")
5162 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5164 "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5165 [(set_attr "type" "neon_store1_3reg<q>")]
5168 (define_insn "*aarch64_mov<mode>"
5169 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5170 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5171 "TARGET_SIMD && !BYTES_BIG_ENDIAN
5172 && (register_operand (operands[0], <MODE>mode)
5173 || register_operand (operands[1], <MODE>mode))"
5176 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5177 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5178 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5179 neon_load<nregs>_<nregs>reg_q")
5180 (set_attr "length" "<insn_count>,4,4")]
5183 (define_insn "aarch64_be_ld1<mode>"
5184 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
5185 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5186 "aarch64_simd_struct_operand" "Utv")]
5189 "ld1\\t{%0<Vmtype>}, %1"
5190 [(set_attr "type" "neon_load1_1reg<q>")]
5193 (define_insn "aarch64_be_st1<mode>"
5194 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5195 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5198 "st1\\t{%1<Vmtype>}, %0"
5199 [(set_attr "type" "neon_store1_1reg<q>")]
5202 (define_insn "*aarch64_be_movoi"
5203 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5204 (match_operand:OI 1 "general_operand" " w,w,m"))]
5205 "TARGET_SIMD && BYTES_BIG_ENDIAN
5206 && (register_operand (operands[0], OImode)
5207 || register_operand (operands[1], OImode))"
5212 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5213 (set_attr "length" "8,4,4")]
5216 (define_insn "*aarch64_be_movci"
5217 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5218 (match_operand:CI 1 "general_operand" " w,w,o"))]
5219 "TARGET_SIMD && BYTES_BIG_ENDIAN
5220 && (register_operand (operands[0], CImode)
5221 || register_operand (operands[1], CImode))"
5223 [(set_attr "type" "multiple")
5224 (set_attr "length" "12,4,4")]
5227 (define_insn "*aarch64_be_movxi"
5228 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5229 (match_operand:XI 1 "general_operand" " w,w,o"))]
5230 "TARGET_SIMD && BYTES_BIG_ENDIAN
5231 && (register_operand (operands[0], XImode)
5232 || register_operand (operands[1], XImode))"
5234 [(set_attr "type" "multiple")
5235 (set_attr "length" "16,4,4")]
5239 [(set (match_operand:OI 0 "register_operand")
5240 (match_operand:OI 1 "register_operand"))]
5241 "TARGET_SIMD && reload_completed"
5244 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5249 [(set (match_operand:CI 0 "nonimmediate_operand")
5250 (match_operand:CI 1 "general_operand"))]
5251 "TARGET_SIMD && reload_completed"
5254 if (register_operand (operands[0], CImode)
5255 && register_operand (operands[1], CImode))
5257 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5260 else if (BYTES_BIG_ENDIAN)
5262 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5263 simplify_gen_subreg (OImode, operands[1], CImode, 0));
5264 emit_move_insn (gen_lowpart (V16QImode,
5265 simplify_gen_subreg (TImode, operands[0],
5267 gen_lowpart (V16QImode,
5268 simplify_gen_subreg (TImode, operands[1],
5277 [(set (match_operand:XI 0 "nonimmediate_operand")
5278 (match_operand:XI 1 "general_operand"))]
5279 "TARGET_SIMD && reload_completed"
5282 if (register_operand (operands[0], XImode)
5283 && register_operand (operands[1], XImode))
5285 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5288 else if (BYTES_BIG_ENDIAN)
5290 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5291 simplify_gen_subreg (OImode, operands[1], XImode, 0));
5292 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5293 simplify_gen_subreg (OImode, operands[1], XImode, 32));
5300 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5301 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5302 (match_operand:DI 1 "register_operand" "w")
5303 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5306 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5307 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5310 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5315 (define_insn "aarch64_ld2<mode>_dreg"
5316 [(set (match_operand:OI 0 "register_operand" "=w")
5317 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5318 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5321 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5322 [(set_attr "type" "neon_load2_2reg<q>")]
5325 (define_insn "aarch64_ld2<mode>_dreg"
5326 [(set (match_operand:OI 0 "register_operand" "=w")
5327 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5328 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5331 "ld1\\t{%S0.1d - %T0.1d}, %1"
5332 [(set_attr "type" "neon_load1_2reg<q>")]
5335 (define_insn "aarch64_ld3<mode>_dreg"
5336 [(set (match_operand:CI 0 "register_operand" "=w")
5337 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5338 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5341 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5342 [(set_attr "type" "neon_load3_3reg<q>")]
5345 (define_insn "aarch64_ld3<mode>_dreg"
5346 [(set (match_operand:CI 0 "register_operand" "=w")
5347 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5348 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5351 "ld1\\t{%S0.1d - %U0.1d}, %1"
5352 [(set_attr "type" "neon_load1_3reg<q>")]
5355 (define_insn "aarch64_ld4<mode>_dreg"
5356 [(set (match_operand:XI 0 "register_operand" "=w")
5357 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5358 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5361 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5362 [(set_attr "type" "neon_load4_4reg<q>")]
5365 (define_insn "aarch64_ld4<mode>_dreg"
5366 [(set (match_operand:XI 0 "register_operand" "=w")
5367 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5368 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5371 "ld1\\t{%S0.1d - %V0.1d}, %1"
5372 [(set_attr "type" "neon_load1_4reg<q>")]
5375 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5376 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5377 (match_operand:DI 1 "register_operand" "r")
5378 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5381 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5382 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5384 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5388 (define_expand "aarch64_ld1<VALL_F16:mode>"
5389 [(match_operand:VALL_F16 0 "register_operand")
5390 (match_operand:DI 1 "register_operand")]
5393 machine_mode mode = <VALL_F16:MODE>mode;
5394 rtx mem = gen_rtx_MEM (mode, operands[1]);
5396 if (BYTES_BIG_ENDIAN)
5397 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5399 emit_move_insn (operands[0], mem);
5403 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5404 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5405 (match_operand:DI 1 "register_operand" "r")
5406 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5409 machine_mode mode = <VSTRUCT:MODE>mode;
5410 rtx mem = gen_rtx_MEM (mode, operands[1]);
5412 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5416 (define_expand "aarch64_ld1x2<VQ:mode>"
5417 [(match_operand:OI 0 "register_operand" "=w")
5418 (match_operand:DI 1 "register_operand" "r")
5419 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5422 machine_mode mode = OImode;
5423 rtx mem = gen_rtx_MEM (mode, operands[1]);
5425 emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5429 (define_expand "aarch64_ld1x2<VDC:mode>"
5430 [(match_operand:OI 0 "register_operand" "=w")
5431 (match_operand:DI 1 "register_operand" "r")
5432 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5435 machine_mode mode = OImode;
5436 rtx mem = gen_rtx_MEM (mode, operands[1]);
5438 emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5443 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5444 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5445 (match_operand:DI 1 "register_operand" "w")
5446 (match_operand:VSTRUCT 2 "register_operand" "0")
5447 (match_operand:SI 3 "immediate_operand" "i")
5448 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5451 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5452 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5455 aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5456 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5457 operands[0], mem, operands[2], operands[3]));
5461 ;; Expanders for builtins to extract vector registers from large
5462 ;; opaque integer modes.
5466 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5467 [(match_operand:VDC 0 "register_operand" "=w")
5468 (match_operand:VSTRUCT 1 "register_operand" "w")
5469 (match_operand:SI 2 "immediate_operand" "i")]
5472 int part = INTVAL (operands[2]);
5473 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5474 int offset = part * 16;
5476 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5477 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5483 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5484 [(match_operand:VQ 0 "register_operand" "=w")
5485 (match_operand:VSTRUCT 1 "register_operand" "w")
5486 (match_operand:SI 2 "immediate_operand" "i")]
5489 int part = INTVAL (operands[2]);
5490 int offset = part * 16;
5492 emit_move_insn (operands[0],
5493 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5497 ;; Permuted-store expanders for neon intrinsics.
5499 ;; Permute instructions
5503 (define_expand "vec_perm<mode>"
5504 [(match_operand:VB 0 "register_operand")
5505 (match_operand:VB 1 "register_operand")
5506 (match_operand:VB 2 "register_operand")
5507 (match_operand:VB 3 "register_operand")]
5510 aarch64_expand_vec_perm (operands[0], operands[1],
5511 operands[2], operands[3], <nunits>);
5515 (define_insn "aarch64_tbl1<mode>"
5516 [(set (match_operand:VB 0 "register_operand" "=w")
5517 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5518 (match_operand:VB 2 "register_operand" "w")]
5521 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5522 [(set_attr "type" "neon_tbl1<q>")]
5525 ;; Two source registers.
5527 (define_insn "aarch64_tbl2v16qi"
5528 [(set (match_operand:V16QI 0 "register_operand" "=w")
5529 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5530 (match_operand:V16QI 2 "register_operand" "w")]
5533 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5534 [(set_attr "type" "neon_tbl2_q")]
5537 (define_insn "aarch64_tbl3<mode>"
5538 [(set (match_operand:VB 0 "register_operand" "=w")
5539 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5540 (match_operand:VB 2 "register_operand" "w")]
5543 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5544 [(set_attr "type" "neon_tbl3")]
5547 (define_insn "aarch64_tbx4<mode>"
5548 [(set (match_operand:VB 0 "register_operand" "=w")
5549 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5550 (match_operand:OI 2 "register_operand" "w")
5551 (match_operand:VB 3 "register_operand" "w")]
5554 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5555 [(set_attr "type" "neon_tbl4")]
5558 ;; Three source registers.
5560 (define_insn "aarch64_qtbl3<mode>"
5561 [(set (match_operand:VB 0 "register_operand" "=w")
5562 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5563 (match_operand:VB 2 "register_operand" "w")]
5566 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5567 [(set_attr "type" "neon_tbl3")]
5570 (define_insn "aarch64_qtbx3<mode>"
5571 [(set (match_operand:VB 0 "register_operand" "=w")
5572 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5573 (match_operand:CI 2 "register_operand" "w")
5574 (match_operand:VB 3 "register_operand" "w")]
5577 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5578 [(set_attr "type" "neon_tbl3")]
5581 ;; Four source registers.
5583 (define_insn "aarch64_qtbl4<mode>"
5584 [(set (match_operand:VB 0 "register_operand" "=w")
5585 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5586 (match_operand:VB 2 "register_operand" "w")]
5589 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5590 [(set_attr "type" "neon_tbl4")]
5593 (define_insn "aarch64_qtbx4<mode>"
5594 [(set (match_operand:VB 0 "register_operand" "=w")
5595 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5596 (match_operand:XI 2 "register_operand" "w")
5597 (match_operand:VB 3 "register_operand" "w")]
5600 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5601 [(set_attr "type" "neon_tbl4")]
5604 (define_insn_and_split "aarch64_combinev16qi"
5605 [(set (match_operand:OI 0 "register_operand" "=w")
5606 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5607 (match_operand:V16QI 2 "register_operand" "w")]
5611 "&& reload_completed"
5614 aarch64_split_combinev16qi (operands);
5617 [(set_attr "type" "multiple")]
5620 ;; This instruction's pattern is generated directly by
5621 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5622 ;; need corresponding changes there.
5623 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5624 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5625 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5626 (match_operand:VALL_F16 2 "register_operand" "w")]
5629 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5630 [(set_attr "type" "neon_permute<q>")]
5633 ;; This instruction's pattern is generated directly by
5634 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5635 ;; need corresponding changes there. Note that the immediate (third)
5636 ;; operand is a lane index not a byte index.
5637 (define_insn "aarch64_ext<mode>"
5638 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5639 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5640 (match_operand:VALL_F16 2 "register_operand" "w")
5641 (match_operand:SI 3 "immediate_operand" "i")]
5645 operands[3] = GEN_INT (INTVAL (operands[3])
5646 * GET_MODE_UNIT_SIZE (<MODE>mode));
5647 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5649 [(set_attr "type" "neon_ext<q>")]
5652 ;; This instruction's pattern is generated directly by
5653 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5654 ;; need corresponding changes there.
5655 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5656 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5657 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5660 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5661 [(set_attr "type" "neon_rev<q>")]
5664 (define_insn "aarch64_st2<mode>_dreg"
5665 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5666 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5667 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5670 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5671 [(set_attr "type" "neon_store2_2reg")]
5674 (define_insn "aarch64_st2<mode>_dreg"
5675 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5676 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5677 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5680 "st1\\t{%S1.1d - %T1.1d}, %0"
5681 [(set_attr "type" "neon_store1_2reg")]
5684 (define_insn "aarch64_st3<mode>_dreg"
5685 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5686 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5687 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5690 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5691 [(set_attr "type" "neon_store3_3reg")]
5694 (define_insn "aarch64_st3<mode>_dreg"
5695 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5696 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5697 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5700 "st1\\t{%S1.1d - %U1.1d}, %0"
5701 [(set_attr "type" "neon_store1_3reg")]
5704 (define_insn "aarch64_st4<mode>_dreg"
5705 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5706 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5707 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5710 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5711 [(set_attr "type" "neon_store4_4reg")]
5714 (define_insn "aarch64_st4<mode>_dreg"
5715 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5716 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5717 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5720 "st1\\t{%S1.1d - %V1.1d}, %0"
5721 [(set_attr "type" "neon_store1_4reg")]
5724 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5725 [(match_operand:DI 0 "register_operand" "r")
5726 (match_operand:VSTRUCT 1 "register_operand" "w")
5727 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5730 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5731 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5733 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5737 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5738 [(match_operand:DI 0 "register_operand" "r")
5739 (match_operand:VSTRUCT 1 "register_operand" "w")
5740 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5743 machine_mode mode = <VSTRUCT:MODE>mode;
5744 rtx mem = gen_rtx_MEM (mode, operands[0]);
5746 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5750 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5751 [(match_operand:DI 0 "register_operand" "r")
5752 (match_operand:VSTRUCT 1 "register_operand" "w")
5753 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5754 (match_operand:SI 2 "immediate_operand")]
5757 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5758 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5761 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5762 mem, operands[1], operands[2]));
5766 (define_expand "aarch64_st1<VALL_F16:mode>"
5767 [(match_operand:DI 0 "register_operand")
5768 (match_operand:VALL_F16 1 "register_operand")]
5771 machine_mode mode = <VALL_F16:MODE>mode;
5772 rtx mem = gen_rtx_MEM (mode, operands[0]);
5774 if (BYTES_BIG_ENDIAN)
5775 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5777 emit_move_insn (mem, operands[1]);
5781 ;; Expander for builtins to insert vector registers into large
5782 ;; opaque integer modes.
5784 ;; Q-register list. We don't need a D-reg inserter as we zero
5785 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5787 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5788 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5789 (match_operand:VSTRUCT 1 "register_operand" "0")
5790 (match_operand:VQ 2 "register_operand" "w")
5791 (match_operand:SI 3 "immediate_operand" "i")]
5794 int part = INTVAL (operands[3]);
5795 int offset = part * 16;
5797 emit_move_insn (operands[0], operands[1]);
5798 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5803 ;; Standard pattern name vec_init<mode><Vel>.
5805 (define_expand "vec_init<mode><Vel>"
5806 [(match_operand:VALL_F16 0 "register_operand" "")
5807 (match_operand 1 "" "")]
5810 aarch64_expand_vector_init (operands[0], operands[1]);
5814 (define_insn "*aarch64_simd_ld1r<mode>"
5815 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5816 (vec_duplicate:VALL_F16
5817 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5819 "ld1r\\t{%0.<Vtype>}, %1"
5820 [(set_attr "type" "neon_load1_all_lanes")]
5823 (define_insn "aarch64_simd_ld1<mode>_x2"
5824 [(set (match_operand:OI 0 "register_operand" "=w")
5825 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5826 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5829 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5830 [(set_attr "type" "neon_load1_2reg<q>")]
5833 (define_insn "aarch64_simd_ld1<mode>_x2"
5834 [(set (match_operand:OI 0 "register_operand" "=w")
5835 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5836 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5839 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5840 [(set_attr "type" "neon_load1_2reg<q>")]
5844 (define_insn "aarch64_frecpe<mode>"
5845 [(set (match_operand:VHSDF 0 "register_operand" "=w")
5846 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5849 "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5850 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5853 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5854 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5855 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5858 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5859 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5862 (define_insn "aarch64_frecps<mode>"
5863 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5865 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5866 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5869 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5870 [(set_attr "type" "neon_fp_recps_<stype><q>")]
5873 (define_insn "aarch64_urecpe<mode>"
5874 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5875 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5878 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5879 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5881 ;; Standard pattern name vec_extract<mode><Vel>.
5883 (define_expand "vec_extract<mode><Vel>"
5884 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5885 (match_operand:VALL_F16 1 "register_operand" "")
5886 (match_operand:SI 2 "immediate_operand" "")]
5890 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5896 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5897 [(set (match_operand:V16QI 0 "register_operand" "=w")
5898 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%0")
5899 (match_operand:V16QI 2 "register_operand" "w")]
5901 "TARGET_SIMD && TARGET_AES"
5902 "aes<aes_op>\\t%0.16b, %2.16b"
5903 [(set_attr "type" "crypto_aese")]
5906 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
5907 [(set (match_operand:V16QI 0 "register_operand" "=w")
5908 (unspec:V16QI [(xor:V16QI
5909 (match_operand:V16QI 1 "register_operand" "%0")
5910 (match_operand:V16QI 2 "register_operand" "w"))
5911 (match_operand:V16QI 3 "aarch64_simd_imm_zero" "")]
5913 "TARGET_SIMD && TARGET_AES"
5914 "aes<aes_op>\\t%0.16b, %2.16b"
5915 [(set_attr "type" "crypto_aese")]
5918 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
5919 [(set (match_operand:V16QI 0 "register_operand" "=w")
5920 (unspec:V16QI [(match_operand:V16QI 3 "aarch64_simd_imm_zero" "")
5921 (xor:V16QI (match_operand:V16QI 1 "register_operand" "%0")
5922 (match_operand:V16QI 2 "register_operand" "w"))]
5924 "TARGET_SIMD && TARGET_AES"
5925 "aes<aes_op>\\t%0.16b, %2.16b"
5926 [(set_attr "type" "crypto_aese")]
5929 ;; When AES/AESMC fusion is enabled we want the register allocation to
5933 ;; So prefer to tie operand 1 to operand 0 when fusing.
5935 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5936 [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5937 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5939 "TARGET_SIMD && TARGET_AES"
5940 "aes<aesmc_op>\\t%0.16b, %1.16b"
5941 [(set_attr "type" "crypto_aesmc")
5942 (set_attr_alternative "enabled"
5943 [(if_then_else (match_test
5944 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5945 (const_string "yes" )
5946 (const_string "no"))
5947 (const_string "yes")])]
5950 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
5951 ;; and enforce the register dependency without scheduling or register
5952 ;; allocation messing up the order or introducing moves inbetween.
5953 ;; Mash the two together during combine.
5955 (define_insn "*aarch64_crypto_aese_fused"
5956 [(set (match_operand:V16QI 0 "register_operand" "=&w")
5959 [(match_operand:V16QI 1 "register_operand" "0")
5960 (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE)
5962 "TARGET_SIMD && TARGET_AES
5963 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
5964 "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
5965 [(set_attr "type" "crypto_aese")
5966 (set_attr "length" "8")]
5969 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
5970 ;; and enforce the register dependency without scheduling or register
5971 ;; allocation messing up the order or introducing moves inbetween.
5972 ;; Mash the two together during combine.
5974 (define_insn "*aarch64_crypto_aesd_fused"
5975 [(set (match_operand:V16QI 0 "register_operand" "=&w")
5978 [(match_operand:V16QI 1 "register_operand" "0")
5979 (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD)
5981 "TARGET_SIMD && TARGET_AES
5982 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
5983 "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
5984 [(set_attr "type" "crypto_aese")
5985 (set_attr "length" "8")]
5990 (define_insn "aarch64_crypto_sha1hsi"
5991 [(set (match_operand:SI 0 "register_operand" "=w")
5992 (unspec:SI [(match_operand:SI 1
5993 "register_operand" "w")]
5995 "TARGET_SIMD && TARGET_SHA2"
5997 [(set_attr "type" "crypto_sha1_fast")]
6000 (define_insn "aarch64_crypto_sha1hv4si"
6001 [(set (match_operand:SI 0 "register_operand" "=w")
6002 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6003 (parallel [(const_int 0)]))]
6005 "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
6007 [(set_attr "type" "crypto_sha1_fast")]
6010 (define_insn "aarch64_be_crypto_sha1hv4si"
6011 [(set (match_operand:SI 0 "register_operand" "=w")
6012 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6013 (parallel [(const_int 3)]))]
6015 "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
6017 [(set_attr "type" "crypto_sha1_fast")]
6020 (define_insn "aarch64_crypto_sha1su1v4si"
6021 [(set (match_operand:V4SI 0 "register_operand" "=w")
6022 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6023 (match_operand:V4SI 2 "register_operand" "w")]
6025 "TARGET_SIMD && TARGET_SHA2"
6026 "sha1su1\\t%0.4s, %2.4s"
6027 [(set_attr "type" "crypto_sha1_fast")]
6030 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
6031 [(set (match_operand:V4SI 0 "register_operand" "=w")
6032 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6033 (match_operand:SI 2 "register_operand" "w")
6034 (match_operand:V4SI 3 "register_operand" "w")]
6036 "TARGET_SIMD && TARGET_SHA2"
6037 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
6038 [(set_attr "type" "crypto_sha1_slow")]
6041 (define_insn "aarch64_crypto_sha1su0v4si"
6042 [(set (match_operand:V4SI 0 "register_operand" "=w")
6043 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6044 (match_operand:V4SI 2 "register_operand" "w")
6045 (match_operand:V4SI 3 "register_operand" "w")]
6047 "TARGET_SIMD && TARGET_SHA2"
6048 "sha1su0\\t%0.4s, %2.4s, %3.4s"
6049 [(set_attr "type" "crypto_sha1_xor")]
6054 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6055 [(set (match_operand:V4SI 0 "register_operand" "=w")
6056 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6057 (match_operand:V4SI 2 "register_operand" "w")
6058 (match_operand:V4SI 3 "register_operand" "w")]
6060 "TARGET_SIMD && TARGET_SHA2"
6061 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
6062 [(set_attr "type" "crypto_sha256_slow")]
6065 (define_insn "aarch64_crypto_sha256su0v4si"
6066 [(set (match_operand:V4SI 0 "register_operand" "=w")
6067 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6068 (match_operand:V4SI 2 "register_operand" "w")]
6070 "TARGET_SIMD && TARGET_SHA2"
6071 "sha256su0\\t%0.4s, %2.4s"
6072 [(set_attr "type" "crypto_sha256_fast")]
6075 (define_insn "aarch64_crypto_sha256su1v4si"
6076 [(set (match_operand:V4SI 0 "register_operand" "=w")
6077 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6078 (match_operand:V4SI 2 "register_operand" "w")
6079 (match_operand:V4SI 3 "register_operand" "w")]
6081 "TARGET_SIMD && TARGET_SHA2"
6082 "sha256su1\\t%0.4s, %2.4s, %3.4s"
6083 [(set_attr "type" "crypto_sha256_slow")]
6088 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6089 [(set (match_operand:V2DI 0 "register_operand" "=w")
6090 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6091 (match_operand:V2DI 2 "register_operand" "w")
6092 (match_operand:V2DI 3 "register_operand" "w")]
6094 "TARGET_SIMD && TARGET_SHA3"
6095 "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6096 [(set_attr "type" "crypto_sha512")]
6099 (define_insn "aarch64_crypto_sha512su0qv2di"
6100 [(set (match_operand:V2DI 0 "register_operand" "=w")
6101 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6102 (match_operand:V2DI 2 "register_operand" "w")]
6104 "TARGET_SIMD && TARGET_SHA3"
6105 "sha512su0\\t%0.2d, %2.2d"
6106 [(set_attr "type" "crypto_sha512")]
6109 (define_insn "aarch64_crypto_sha512su1qv2di"
6110 [(set (match_operand:V2DI 0 "register_operand" "=w")
6111 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6112 (match_operand:V2DI 2 "register_operand" "w")
6113 (match_operand:V2DI 3 "register_operand" "w")]
6115 "TARGET_SIMD && TARGET_SHA3"
6116 "sha512su1\\t%0.2d, %2.2d, %3.2d"
6117 [(set_attr "type" "crypto_sha512")]
6122 (define_insn "eor3q<mode>4"
6123 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6126 (match_operand:VQ_I 2 "register_operand" "w")
6127 (match_operand:VQ_I 3 "register_operand" "w"))
6128 (match_operand:VQ_I 1 "register_operand" "w")))]
6129 "TARGET_SIMD && TARGET_SHA3"
6130 "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6131 [(set_attr "type" "crypto_sha3")]
6134 (define_insn "aarch64_rax1qv2di"
6135 [(set (match_operand:V2DI 0 "register_operand" "=w")
6138 (match_operand:V2DI 2 "register_operand" "w")
6140 (match_operand:V2DI 1 "register_operand" "w")))]
6141 "TARGET_SIMD && TARGET_SHA3"
6142 "rax1\\t%0.2d, %1.2d, %2.2d"
6143 [(set_attr "type" "crypto_sha3")]
6146 (define_insn "aarch64_xarqv2di"
6147 [(set (match_operand:V2DI 0 "register_operand" "=w")
6150 (match_operand:V2DI 1 "register_operand" "%w")
6151 (match_operand:V2DI 2 "register_operand" "w"))
6152 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6153 "TARGET_SIMD && TARGET_SHA3"
6154 "xar\\t%0.2d, %1.2d, %2.2d, %3"
6155 [(set_attr "type" "crypto_sha3")]
6158 (define_insn "bcaxq<mode>4"
6159 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6162 (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6163 (match_operand:VQ_I 2 "register_operand" "w"))
6164 (match_operand:VQ_I 1 "register_operand" "w")))]
6165 "TARGET_SIMD && TARGET_SHA3"
6166 "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6167 [(set_attr "type" "crypto_sha3")]
6172 (define_insn "aarch64_sm3ss1qv4si"
6173 [(set (match_operand:V4SI 0 "register_operand" "=w")
6174 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6175 (match_operand:V4SI 2 "register_operand" "w")
6176 (match_operand:V4SI 3 "register_operand" "w")]
6178 "TARGET_SIMD && TARGET_SM4"
6179 "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6180 [(set_attr "type" "crypto_sm3")]
6184 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6185 [(set (match_operand:V4SI 0 "register_operand" "=w")
6186 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6187 (match_operand:V4SI 2 "register_operand" "w")
6188 (match_operand:V4SI 3 "register_operand" "w")
6189 (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6191 "TARGET_SIMD && TARGET_SM4"
6192 "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6193 [(set_attr "type" "crypto_sm3")]
6196 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6197 [(set (match_operand:V4SI 0 "register_operand" "=w")
6198 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6199 (match_operand:V4SI 2 "register_operand" "w")
6200 (match_operand:V4SI 3 "register_operand" "w")]
6202 "TARGET_SIMD && TARGET_SM4"
6203 "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6204 [(set_attr "type" "crypto_sm3")]
6209 (define_insn "aarch64_sm4eqv4si"
6210 [(set (match_operand:V4SI 0 "register_operand" "=w")
6211 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6212 (match_operand:V4SI 2 "register_operand" "w")]
6214 "TARGET_SIMD && TARGET_SM4"
6215 "sm4e\\t%0.4s, %2.4s"
6216 [(set_attr "type" "crypto_sm4")]
6219 (define_insn "aarch64_sm4ekeyqv4si"
6220 [(set (match_operand:V4SI 0 "register_operand" "=w")
6221 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6222 (match_operand:V4SI 2 "register_operand" "w")]
6224 "TARGET_SIMD && TARGET_SM4"
6225 "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6226 [(set_attr "type" "crypto_sm4")]
6231 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6232 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6234 [(match_operand:VDQSF 1 "register_operand" "0")
6235 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6236 (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6240 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6241 <nunits> * 2, false);
6242 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6243 <nunits> * 2, false);
6245 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6254 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6255 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6257 [(match_operand:VDQSF 1 "register_operand" "0")
6258 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6259 (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6263 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6264 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6266 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6274 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6275 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6278 (vec_select:<VFMLA_SEL_W>
6279 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6280 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6282 (vec_select:<VFMLA_SEL_W>
6283 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6284 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6285 (match_operand:VDQSF 1 "register_operand" "0")))]
6287 "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6288 [(set_attr "type" "neon_fp_mul_s")]
6291 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6292 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6296 (vec_select:<VFMLA_SEL_W>
6297 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6298 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6300 (vec_select:<VFMLA_SEL_W>
6301 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6302 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6303 (match_operand:VDQSF 1 "register_operand" "0")))]
6305 "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6306 [(set_attr "type" "neon_fp_mul_s")]
6309 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6310 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6313 (vec_select:<VFMLA_SEL_W>
6314 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6315 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6317 (vec_select:<VFMLA_SEL_W>
6318 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6319 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6320 (match_operand:VDQSF 1 "register_operand" "0")))]
6322 "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6323 [(set_attr "type" "neon_fp_mul_s")]
6326 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6327 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6331 (vec_select:<VFMLA_SEL_W>
6332 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6333 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6335 (vec_select:<VFMLA_SEL_W>
6336 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6337 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6338 (match_operand:VDQSF 1 "register_operand" "0")))]
6340 "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6341 [(set_attr "type" "neon_fp_mul_s")]
6344 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6345 [(set (match_operand:V2SF 0 "register_operand" "")
6346 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6347 (match_operand:V4HF 2 "register_operand" "")
6348 (match_operand:V4HF 3 "register_operand" "")
6349 (match_operand:SI 4 "aarch64_imm2" "")]
6353 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6354 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6356 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6365 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6366 [(set (match_operand:V2SF 0 "register_operand" "")
6367 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6368 (match_operand:V4HF 2 "register_operand" "")
6369 (match_operand:V4HF 3 "register_operand" "")
6370 (match_operand:SI 4 "aarch64_imm2" "")]
6374 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6375 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6377 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6385 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6386 [(set (match_operand:V2SF 0 "register_operand" "=w")
6390 (match_operand:V4HF 2 "register_operand" "w")
6391 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6395 (match_operand:V4HF 3 "register_operand" "x")
6396 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6397 (match_operand:V2SF 1 "register_operand" "0")))]
6399 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6400 [(set_attr "type" "neon_fp_mul_s")]
6403 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6404 [(set (match_operand:V2SF 0 "register_operand" "=w")
6409 (match_operand:V4HF 2 "register_operand" "w")
6410 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6414 (match_operand:V4HF 3 "register_operand" "x")
6415 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6416 (match_operand:V2SF 1 "register_operand" "0")))]
6418 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6419 [(set_attr "type" "neon_fp_mul_s")]
6422 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6423 [(set (match_operand:V2SF 0 "register_operand" "=w")
6427 (match_operand:V4HF 2 "register_operand" "w")
6428 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6432 (match_operand:V4HF 3 "register_operand" "x")
6433 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6434 (match_operand:V2SF 1 "register_operand" "0")))]
6436 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6437 [(set_attr "type" "neon_fp_mul_s")]
6440 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6441 [(set (match_operand:V2SF 0 "register_operand" "=w")
6446 (match_operand:V4HF 2 "register_operand" "w")
6447 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6451 (match_operand:V4HF 3 "register_operand" "x")
6452 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6453 (match_operand:V2SF 1 "register_operand" "0")))]
6455 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6456 [(set_attr "type" "neon_fp_mul_s")]
6459 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6460 [(set (match_operand:V4SF 0 "register_operand" "")
6461 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6462 (match_operand:V8HF 2 "register_operand" "")
6463 (match_operand:V8HF 3 "register_operand" "")
6464 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6468 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6469 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6471 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6479 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6480 [(set (match_operand:V4SF 0 "register_operand" "")
6481 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6482 (match_operand:V8HF 2 "register_operand" "")
6483 (match_operand:V8HF 3 "register_operand" "")
6484 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6488 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6489 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6491 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6499 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6500 [(set (match_operand:V4SF 0 "register_operand" "=w")
6504 (match_operand:V8HF 2 "register_operand" "w")
6505 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6509 (match_operand:V8HF 3 "register_operand" "x")
6510 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6511 (match_operand:V4SF 1 "register_operand" "0")))]
6513 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6514 [(set_attr "type" "neon_fp_mul_s")]
6517 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6518 [(set (match_operand:V4SF 0 "register_operand" "=w")
6523 (match_operand:V8HF 2 "register_operand" "w")
6524 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6528 (match_operand:V8HF 3 "register_operand" "x")
6529 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6530 (match_operand:V4SF 1 "register_operand" "0")))]
6532 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6533 [(set_attr "type" "neon_fp_mul_s")]
6536 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6537 [(set (match_operand:V4SF 0 "register_operand" "=w")
6541 (match_operand:V8HF 2 "register_operand" "w")
6542 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6546 (match_operand:V8HF 3 "register_operand" "x")
6547 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6548 (match_operand:V4SF 1 "register_operand" "0")))]
6550 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6551 [(set_attr "type" "neon_fp_mul_s")]
6554 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6555 [(set (match_operand:V4SF 0 "register_operand" "=w")
6560 (match_operand:V8HF 2 "register_operand" "w")
6561 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6565 (match_operand:V8HF 3 "register_operand" "x")
6566 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6567 (match_operand:V4SF 1 "register_operand" "0")))]
6569 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6570 [(set_attr "type" "neon_fp_mul_s")]
6573 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6574 [(set (match_operand:V2SF 0 "register_operand" "")
6575 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6576 (match_operand:V4HF 2 "register_operand" "")
6577 (match_operand:V8HF 3 "register_operand" "")
6578 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6582 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6583 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6585 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6594 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6595 [(set (match_operand:V2SF 0 "register_operand" "")
6596 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6597 (match_operand:V4HF 2 "register_operand" "")
6598 (match_operand:V8HF 3 "register_operand" "")
6599 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6603 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6604 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6606 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6615 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6616 [(set (match_operand:V2SF 0 "register_operand" "=w")
6620 (match_operand:V4HF 2 "register_operand" "w")
6621 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6625 (match_operand:V8HF 3 "register_operand" "x")
6626 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6627 (match_operand:V2SF 1 "register_operand" "0")))]
6629 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6630 [(set_attr "type" "neon_fp_mul_s")]
6633 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6634 [(set (match_operand:V2SF 0 "register_operand" "=w")
6639 (match_operand:V4HF 2 "register_operand" "w")
6640 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6644 (match_operand:V8HF 3 "register_operand" "x")
6645 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6646 (match_operand:V2SF 1 "register_operand" "0")))]
6648 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6649 [(set_attr "type" "neon_fp_mul_s")]
6652 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6653 [(set (match_operand:V2SF 0 "register_operand" "=w")
6657 (match_operand:V4HF 2 "register_operand" "w")
6658 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6662 (match_operand:V8HF 3 "register_operand" "x")
6663 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6664 (match_operand:V2SF 1 "register_operand" "0")))]
6666 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6667 [(set_attr "type" "neon_fp_mul_s")]
6670 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6671 [(set (match_operand:V2SF 0 "register_operand" "=w")
6676 (match_operand:V4HF 2 "register_operand" "w")
6677 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6681 (match_operand:V8HF 3 "register_operand" "x")
6682 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6683 (match_operand:V2SF 1 "register_operand" "0")))]
6685 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6686 [(set_attr "type" "neon_fp_mul_s")]
6689 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6690 [(set (match_operand:V4SF 0 "register_operand" "")
6691 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6692 (match_operand:V8HF 2 "register_operand" "")
6693 (match_operand:V4HF 3 "register_operand" "")
6694 (match_operand:SI 4 "aarch64_imm2" "")]
6698 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6699 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6701 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6709 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6710 [(set (match_operand:V4SF 0 "register_operand" "")
6711 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6712 (match_operand:V8HF 2 "register_operand" "")
6713 (match_operand:V4HF 3 "register_operand" "")
6714 (match_operand:SI 4 "aarch64_imm2" "")]
6718 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6719 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6721 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6729 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6730 [(set (match_operand:V4SF 0 "register_operand" "=w")
6734 (match_operand:V8HF 2 "register_operand" "w")
6735 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6739 (match_operand:V4HF 3 "register_operand" "x")
6740 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6741 (match_operand:V4SF 1 "register_operand" "0")))]
6743 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6744 [(set_attr "type" "neon_fp_mul_s")]
6747 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6748 [(set (match_operand:V4SF 0 "register_operand" "=w")
6753 (match_operand:V8HF 2 "register_operand" "w")
6754 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6758 (match_operand:V4HF 3 "register_operand" "x")
6759 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6760 (match_operand:V4SF 1 "register_operand" "0")))]
6762 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6763 [(set_attr "type" "neon_fp_mul_s")]
6766 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6767 [(set (match_operand:V4SF 0 "register_operand" "=w")
6771 (match_operand:V8HF 2 "register_operand" "w")
6772 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6776 (match_operand:V4HF 3 "register_operand" "x")
6777 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6778 (match_operand:V4SF 1 "register_operand" "0")))]
6780 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6781 [(set_attr "type" "neon_fp_mul_s")]
6784 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6785 [(set (match_operand:V4SF 0 "register_operand" "=w")
6790 (match_operand:V8HF 2 "register_operand" "w")
6791 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6795 (match_operand:V4HF 3 "register_operand" "x")
6796 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6797 (match_operand:V4SF 1 "register_operand" "0")))]
6799 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6800 [(set_attr "type" "neon_fp_mul_s")]
6805 (define_insn "aarch64_crypto_pmulldi"
6806 [(set (match_operand:TI 0 "register_operand" "=w")
6807 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
6808 (match_operand:DI 2 "register_operand" "w")]
6810 "TARGET_SIMD && TARGET_AES"
6811 "pmull\\t%0.1q, %1.1d, %2.1d"
6812 [(set_attr "type" "crypto_pmull")]
6815 (define_insn "aarch64_crypto_pmullv2di"
6816 [(set (match_operand:TI 0 "register_operand" "=w")
6817 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
6818 (match_operand:V2DI 2 "register_operand" "w")]
6820 "TARGET_SIMD && TARGET_AES"
6821 "pmull2\\t%0.1q, %1.2d, %2.2d"
6822 [(set_attr "type" "crypto_pmull")]