1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2018 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16 1 "general_operand" ""))]
26 /* Force the operand into a register if it is not an
27 immediate whose use can be replaced with xzr.
28 If the mode is 16 bytes wide, then we will be doing
29 a stp in DI mode, so we check the validity of that.
30 If the mode is 8 bytes wide, then we will do doing a
31 normal str, so the check need not apply. */
32 if (GET_CODE (operands[0]) == MEM
33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34 && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
35 && aarch64_mem_pair_operand (operands[0], DImode))
36 || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
37 operands[1] = force_reg (<MODE>mode, operands[1]);
41 (define_expand "movmisalign<mode>"
42 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
43 (match_operand:VALL 1 "general_operand" ""))]
46 /* This pattern is not permitted to fail during expansion: if both arguments
47 are non-registers (e.g. memory := constant, which can be created by the
48 auto-vectorizer), force operand 1 into a register. */
49 if (!register_operand (operands[0], <MODE>mode)
50 && !register_operand (operands[1], <MODE>mode))
51 operands[1] = force_reg (<MODE>mode, operands[1]);
54 (define_insn "aarch64_simd_dup<mode>"
55 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
57 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
60 dup\\t%0.<Vtype>, %1.<Vetype>[0]
61 dup\\t%0.<Vtype>, %<vw>1"
62 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
65 (define_insn "aarch64_simd_dup<mode>"
66 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
67 (vec_duplicate:VDQF_F16
68 (match_operand:<VEL> 1 "register_operand" "w")))]
70 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
71 [(set_attr "type" "neon_dup<q>")]
74 (define_insn "aarch64_dup_lane<mode>"
75 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
76 (vec_duplicate:VALL_F16
78 (match_operand:VALL_F16 1 "register_operand" "w")
79 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
83 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
84 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
86 [(set_attr "type" "neon_dup<q>")]
89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
90 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
91 (vec_duplicate:VALL_F16_NO_V2Q
93 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
98 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
99 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
101 [(set_attr "type" "neon_dup<q>")]
104 (define_insn "*aarch64_simd_mov<VD:mode>"
105 [(set (match_operand:VD 0 "nonimmediate_operand"
106 "=w, m, m, w, ?r, ?w, ?r, w")
107 (match_operand:VD 1 "general_operand"
108 "m, Dz, w, w, w, r, r, Dn"))]
110 && (register_operand (operands[0], <MODE>mode)
111 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
113 switch (which_alternative)
115 case 0: return "ldr\t%d0, %1";
116 case 1: return "str\txzr, %0";
117 case 2: return "str\t%d1, %0";
118 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
119 case 4: return "umov\t%0, %1.d[0]";
120 case 5: return "fmov\t%d0, %1";
121 case 6: return "mov\t%0, %1";
123 return aarch64_output_simd_mov_immediate (operands[1], 64);
124 default: gcc_unreachable ();
127 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
128 neon_logic<q>, neon_to_gp<q>, f_mcr,\
129 mov_reg, neon_move<q>")]
132 (define_insn "*aarch64_simd_mov<VQ:mode>"
133 [(set (match_operand:VQ 0 "nonimmediate_operand"
134 "=w, Umn, m, w, ?r, ?w, ?r, w")
135 (match_operand:VQ 1 "general_operand"
136 "m, Dz, w, w, w, r, r, Dn"))]
138 && (register_operand (operands[0], <MODE>mode)
139 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
141 switch (which_alternative)
144 return "ldr\t%q0, %1";
146 return "stp\txzr, xzr, %0";
148 return "str\t%q1, %0";
150 return "mov\t%0.<Vbtype>, %1.<Vbtype>";
156 return aarch64_output_simd_mov_immediate (operands[1], 128);
161 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
162 neon_logic<q>, multiple, multiple,\
163 multiple, neon_move<q>")
164 (set_attr "length" "4,4,4,4,8,8,8,4")]
167 ;; When storing lane zero we can use the normal STR and its more permissive
170 (define_insn "aarch64_store_lane0<mode>"
171 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
172 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
173 (parallel [(match_operand 2 "const_int_operand" "n")])))]
175 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
176 "str\\t%<Vetype>1, %0"
177 [(set_attr "type" "neon_store1_1reg<q>")]
180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
181 [(set (match_operand:DREG 0 "register_operand" "=w")
182 (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
183 (set (match_operand:DREG2 2 "register_operand" "=w")
184 (match_operand:DREG2 3 "memory_operand" "m"))]
186 && rtx_equal_p (XEXP (operands[3], 0),
187 plus_constant (Pmode,
188 XEXP (operands[1], 0),
189 GET_MODE_SIZE (<DREG:MODE>mode)))"
191 [(set_attr "type" "neon_ldp")]
194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
195 [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
196 (match_operand:DREG 1 "register_operand" "w"))
197 (set (match_operand:DREG2 2 "memory_operand" "=m")
198 (match_operand:DREG2 3 "register_operand" "w"))]
200 && rtx_equal_p (XEXP (operands[2], 0),
201 plus_constant (Pmode,
202 XEXP (operands[0], 0),
203 GET_MODE_SIZE (<DREG:MODE>mode)))"
205 [(set_attr "type" "neon_stp")]
208 (define_insn "load_pair<VQ:mode><VQ2:mode>"
209 [(set (match_operand:VQ 0 "register_operand" "=w")
210 (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
211 (set (match_operand:VQ2 2 "register_operand" "=w")
212 (match_operand:VQ2 3 "memory_operand" "m"))]
214 && rtx_equal_p (XEXP (operands[3], 0),
215 plus_constant (Pmode,
216 XEXP (operands[1], 0),
217 GET_MODE_SIZE (<VQ:MODE>mode)))"
219 [(set_attr "type" "neon_ldp_q")]
222 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
223 [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
224 (match_operand:VQ 1 "register_operand" "w"))
225 (set (match_operand:VQ2 2 "memory_operand" "=m")
226 (match_operand:VQ2 3 "register_operand" "w"))]
227 "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
228 plus_constant (Pmode,
229 XEXP (operands[0], 0),
230 GET_MODE_SIZE (<VQ:MODE>mode)))"
232 [(set_attr "type" "neon_stp_q")]
237 [(set (match_operand:VQ 0 "register_operand" "")
238 (match_operand:VQ 1 "register_operand" ""))]
239 "TARGET_SIMD && reload_completed
240 && GP_REGNUM_P (REGNO (operands[0]))
241 && GP_REGNUM_P (REGNO (operands[1]))"
244 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
249 [(set (match_operand:VQ 0 "register_operand" "")
250 (match_operand:VQ 1 "register_operand" ""))]
251 "TARGET_SIMD && reload_completed
252 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
253 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
256 aarch64_split_simd_move (operands[0], operands[1]);
260 (define_expand "aarch64_split_simd_mov<mode>"
261 [(set (match_operand:VQ 0)
262 (match_operand:VQ 1))]
265 rtx dst = operands[0];
266 rtx src = operands[1];
268 if (GP_REGNUM_P (REGNO (src)))
270 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
271 rtx src_high_part = gen_highpart (<VHALF>mode, src);
274 (gen_move_lo_quad_<mode> (dst, src_low_part));
276 (gen_move_hi_quad_<mode> (dst, src_high_part));
281 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
282 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
283 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
284 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
287 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
289 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
295 (define_insn "aarch64_simd_mov_from_<mode>low"
296 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
298 (match_operand:VQ 1 "register_operand" "w")
299 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
300 "TARGET_SIMD && reload_completed"
302 [(set_attr "type" "neon_to_gp<q>")
303 (set_attr "length" "4")
306 (define_insn "aarch64_simd_mov_from_<mode>high"
307 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
309 (match_operand:VQ 1 "register_operand" "w")
310 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
311 "TARGET_SIMD && reload_completed"
313 [(set_attr "type" "neon_to_gp<q>")
314 (set_attr "length" "4")
317 (define_insn "orn<mode>3"
318 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
319 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
320 (match_operand:VDQ_I 2 "register_operand" "w")))]
322 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
323 [(set_attr "type" "neon_logic<q>")]
326 (define_insn "bic<mode>3"
327 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
328 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
329 (match_operand:VDQ_I 2 "register_operand" "w")))]
331 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
332 [(set_attr "type" "neon_logic<q>")]
335 (define_insn "add<mode>3"
336 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
337 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
338 (match_operand:VDQ_I 2 "register_operand" "w")))]
340 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
341 [(set_attr "type" "neon_add<q>")]
344 (define_insn "sub<mode>3"
345 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
346 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
347 (match_operand:VDQ_I 2 "register_operand" "w")))]
349 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
350 [(set_attr "type" "neon_sub<q>")]
353 (define_insn "mul<mode>3"
354 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
355 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
356 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
358 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
359 [(set_attr "type" "neon_mul_<Vetype><q>")]
362 (define_insn "bswap<mode>2"
363 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
364 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
366 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
367 [(set_attr "type" "neon_rev<q>")]
370 (define_insn "aarch64_rbit<mode>"
371 [(set (match_operand:VB 0 "register_operand" "=w")
372 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
375 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
376 [(set_attr "type" "neon_rbit")]
379 (define_expand "ctz<mode>2"
380 [(set (match_operand:VS 0 "register_operand")
381 (ctz:VS (match_operand:VS 1 "register_operand")))]
384 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
385 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
387 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
388 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
393 (define_expand "xorsign<mode>3"
394 [(match_operand:VHSDF 0 "register_operand")
395 (match_operand:VHSDF 1 "register_operand")
396 (match_operand:VHSDF 2 "register_operand")]
400 machine_mode imode = <V_INT_EQUIV>mode;
401 rtx v_bitmask = gen_reg_rtx (imode);
402 rtx op1x = gen_reg_rtx (imode);
403 rtx op2x = gen_reg_rtx (imode);
405 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
406 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
408 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
410 emit_move_insn (v_bitmask,
411 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
412 HOST_WIDE_INT_M1U << bits));
414 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
415 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
416 emit_move_insn (operands[0],
417 lowpart_subreg (<MODE>mode, op1x, imode));
422 ;; These instructions map to the __builtins for the Dot Product operations.
423 (define_insn "aarch64_<sur>dot<vsi2qi>"
424 [(set (match_operand:VS 0 "register_operand" "=w")
425 (plus:VS (match_operand:VS 1 "register_operand" "0")
426 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
427 (match_operand:<VSI2QI> 3 "register_operand" "w")]
430 "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
431 [(set_attr "type" "neon_dot")]
434 ;; These expands map to the Dot Product optab the vectorizer checks for.
435 ;; The auto-vectorizer expects a dot product builtin that also does an
436 ;; accumulation into the provided register.
437 ;; Given the following pattern
439 ;; for (i=0; i<len; i++) {
445 ;; This can be auto-vectorized to
446 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
448 ;; given enough iterations. However the vectorizer can keep unrolling the loop
449 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
450 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
453 ;; and so the vectorizer provides r, in which the result has to be accumulated.
454 (define_expand "<sur>dot_prod<vsi2qi>"
455 [(set (match_operand:VS 0 "register_operand")
456 (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
457 (match_operand:<VSI2QI> 2 "register_operand")]
459 (match_operand:VS 3 "register_operand")))]
463 gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
465 emit_insn (gen_rtx_SET (operands[0], operands[3]));
469 ;; These instructions map to the __builtins for the Dot Product
470 ;; indexed operations.
471 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
472 [(set (match_operand:VS 0 "register_operand" "=w")
473 (plus:VS (match_operand:VS 1 "register_operand" "0")
474 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
475 (match_operand:V8QI 3 "register_operand" "<h_con>")
476 (match_operand:SI 4 "immediate_operand" "i")]
480 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
481 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
483 [(set_attr "type" "neon_dot")]
486 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
487 [(set (match_operand:VS 0 "register_operand" "=w")
488 (plus:VS (match_operand:VS 1 "register_operand" "0")
489 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
490 (match_operand:V16QI 3 "register_operand" "<h_con>")
491 (match_operand:SI 4 "immediate_operand" "i")]
495 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
496 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
498 [(set_attr "type" "neon_dot")]
501 (define_expand "copysign<mode>3"
502 [(match_operand:VHSDF 0 "register_operand")
503 (match_operand:VHSDF 1 "register_operand")
504 (match_operand:VHSDF 2 "register_operand")]
505 "TARGET_FLOAT && TARGET_SIMD"
507 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
508 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
510 emit_move_insn (v_bitmask,
511 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
512 HOST_WIDE_INT_M1U << bits));
513 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
514 operands[2], operands[1]));
519 (define_insn "*aarch64_mul3_elt<mode>"
520 [(set (match_operand:VMUL 0 "register_operand" "=w")
524 (match_operand:VMUL 1 "register_operand" "<h_con>")
525 (parallel [(match_operand:SI 2 "immediate_operand")])))
526 (match_operand:VMUL 3 "register_operand" "w")))]
529 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
530 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
532 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
535 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
536 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
537 (mult:VMUL_CHANGE_NLANES
538 (vec_duplicate:VMUL_CHANGE_NLANES
540 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
541 (parallel [(match_operand:SI 2 "immediate_operand")])))
542 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
545 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
546 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
548 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
551 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
552 [(set (match_operand:VMUL 0 "register_operand" "=w")
555 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
556 (match_operand:VMUL 2 "register_operand" "w")))]
558 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
559 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
562 (define_insn "aarch64_rsqrte<mode>"
563 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
564 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
567 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
568 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
570 (define_insn "aarch64_rsqrts<mode>"
571 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
572 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
573 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
576 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
577 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
579 (define_expand "rsqrt<mode>2"
580 [(set (match_operand:VALLF 0 "register_operand" "=w")
581 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
585 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
589 (define_insn "*aarch64_mul3_elt_to_64v2df"
590 [(set (match_operand:DF 0 "register_operand" "=w")
593 (match_operand:V2DF 1 "register_operand" "w")
594 (parallel [(match_operand:SI 2 "immediate_operand")]))
595 (match_operand:DF 3 "register_operand" "w")))]
598 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
599 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
601 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
604 (define_insn "neg<mode>2"
605 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
606 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
608 "neg\t%0.<Vtype>, %1.<Vtype>"
609 [(set_attr "type" "neon_neg<q>")]
612 (define_insn "abs<mode>2"
613 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
614 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
616 "abs\t%0.<Vtype>, %1.<Vtype>"
617 [(set_attr "type" "neon_abs<q>")]
620 ;; The intrinsic version of integer ABS must not be allowed to
621 ;; combine with any operation with an integerated ABS step, such
623 (define_insn "aarch64_abs<mode>"
624 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
626 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
629 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
630 [(set_attr "type" "neon_abs<q>")]
633 (define_insn "abd<mode>_3"
634 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
635 (abs:VDQ_BHSI (minus:VDQ_BHSI
636 (match_operand:VDQ_BHSI 1 "register_operand" "w")
637 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
639 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
640 [(set_attr "type" "neon_abd<q>")]
643 (define_insn "aarch64_<sur>abdl2<mode>_3"
644 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
645 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
646 (match_operand:VDQV_S 2 "register_operand" "w")]
649 "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
650 [(set_attr "type" "neon_abd<q>")]
653 (define_insn "aarch64_<sur>abal<mode>_4"
654 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
655 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
656 (match_operand:VDQV_S 2 "register_operand" "w")
657 (match_operand:<VDBLW> 3 "register_operand" "0")]
660 "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
661 [(set_attr "type" "neon_arith_acc<q>")]
664 (define_insn "aarch64_<sur>adalp<mode>_3"
665 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
666 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
667 (match_operand:<VDBLW> 2 "register_operand" "0")]
670 "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
671 [(set_attr "type" "neon_reduc_add<q>")]
674 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
675 ;; inputs in operands 1 and 2. The sequence also has to perform a widening
676 ;; reduction of the difference into a V4SI vector and accumulate that into
677 ;; operand 3 before copying that into the result operand 0.
678 ;; Perform that with a sequence of:
679 ;; UABDL2 tmp.8h, op1.16b, op2.16b
680 ;; UABAL tmp.8h, op1.16b, op2.16b
681 ;; UADALP op3.4s, tmp.8h
682 ;; MOV op0, op3 // should be eliminated in later passes.
683 ;; The signed version just uses the signed variants of the above instructions.
685 (define_expand "<sur>sadv16qi"
686 [(use (match_operand:V4SI 0 "register_operand"))
687 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
688 (use (match_operand:V16QI 2 "register_operand"))] ABAL)
689 (use (match_operand:V4SI 3 "register_operand"))]
692 rtx reduc = gen_reg_rtx (V8HImode);
693 emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
695 emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
696 operands[2], reduc));
697 emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
699 emit_move_insn (operands[0], operands[3]);
704 (define_insn "aba<mode>_3"
705 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
706 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
707 (match_operand:VDQ_BHSI 1 "register_operand" "w")
708 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
709 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
711 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
712 [(set_attr "type" "neon_arith_acc<q>")]
715 (define_insn "fabd<mode>3"
716 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
719 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
720 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
722 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
723 [(set_attr "type" "neon_fp_abd_<stype><q>")]
726 ;; For AND (vector, register) and BIC (vector, immediate)
727 (define_insn "and<mode>3"
728 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
729 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
730 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
733 switch (which_alternative)
736 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
738 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
744 [(set_attr "type" "neon_logic<q>")]
747 ;; For ORR (vector, register) and ORR (vector, immediate)
748 (define_insn "ior<mode>3"
749 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
750 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
751 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
754 switch (which_alternative)
757 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
759 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
765 [(set_attr "type" "neon_logic<q>")]
768 (define_insn "xor<mode>3"
769 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
770 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
771 (match_operand:VDQ_I 2 "register_operand" "w")))]
773 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
774 [(set_attr "type" "neon_logic<q>")]
777 (define_insn "one_cmpl<mode>2"
778 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
779 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
781 "not\t%0.<Vbtype>, %1.<Vbtype>"
782 [(set_attr "type" "neon_logic<q>")]
785 (define_insn "aarch64_simd_vec_set<mode>"
786 [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
788 (vec_duplicate:VALL_F16
789 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
790 (match_operand:VALL_F16 3 "register_operand" "0,0,0")
791 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
794 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
795 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
796 switch (which_alternative)
799 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
801 return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
803 return "ld1\\t{%0.<Vetype>}[%p2], %1";
808 [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
811 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
812 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
814 (vec_duplicate:VALL_F16
816 (match_operand:VALL_F16 3 "register_operand" "w")
818 [(match_operand:SI 4 "immediate_operand" "i")])))
819 (match_operand:VALL_F16 1 "register_operand" "0")
820 (match_operand:SI 2 "immediate_operand" "i")))]
823 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
824 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
825 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
827 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
829 [(set_attr "type" "neon_ins<q>")]
832 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
833 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
834 (vec_merge:VALL_F16_NO_V2Q
835 (vec_duplicate:VALL_F16_NO_V2Q
837 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
839 [(match_operand:SI 4 "immediate_operand" "i")])))
840 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
841 (match_operand:SI 2 "immediate_operand" "i")))]
844 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
845 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
846 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
847 INTVAL (operands[4]));
849 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
851 [(set_attr "type" "neon_ins<q>")]
854 (define_insn "aarch64_simd_lshr<mode>"
855 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
856 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
857 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
859 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
860 [(set_attr "type" "neon_shift_imm<q>")]
863 (define_insn "aarch64_simd_ashr<mode>"
864 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
865 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
866 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
868 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
869 [(set_attr "type" "neon_shift_imm<q>")]
872 (define_insn "aarch64_simd_imm_shl<mode>"
873 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
874 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
875 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
877 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
878 [(set_attr "type" "neon_shift_imm<q>")]
881 (define_insn "aarch64_simd_reg_sshl<mode>"
882 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
883 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
884 (match_operand:VDQ_I 2 "register_operand" "w")))]
886 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
887 [(set_attr "type" "neon_shift_reg<q>")]
890 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
891 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
892 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
893 (match_operand:VDQ_I 2 "register_operand" "w")]
894 UNSPEC_ASHIFT_UNSIGNED))]
896 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
897 [(set_attr "type" "neon_shift_reg<q>")]
900 (define_insn "aarch64_simd_reg_shl<mode>_signed"
901 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
902 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
903 (match_operand:VDQ_I 2 "register_operand" "w")]
904 UNSPEC_ASHIFT_SIGNED))]
906 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
907 [(set_attr "type" "neon_shift_reg<q>")]
910 (define_expand "ashl<mode>3"
911 [(match_operand:VDQ_I 0 "register_operand" "")
912 (match_operand:VDQ_I 1 "register_operand" "")
913 (match_operand:SI 2 "general_operand" "")]
916 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
919 if (CONST_INT_P (operands[2]))
921 shift_amount = INTVAL (operands[2]);
922 if (shift_amount >= 0 && shift_amount < bit_width)
924 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
926 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
933 operands[2] = force_reg (SImode, operands[2]);
936 else if (MEM_P (operands[2]))
938 operands[2] = force_reg (SImode, operands[2]);
941 if (REG_P (operands[2]))
943 rtx tmp = gen_reg_rtx (<MODE>mode);
944 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
945 convert_to_mode (<VEL>mode,
948 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
957 (define_expand "lshr<mode>3"
958 [(match_operand:VDQ_I 0 "register_operand" "")
959 (match_operand:VDQ_I 1 "register_operand" "")
960 (match_operand:SI 2 "general_operand" "")]
963 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
966 if (CONST_INT_P (operands[2]))
968 shift_amount = INTVAL (operands[2]);
969 if (shift_amount > 0 && shift_amount <= bit_width)
971 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
973 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
979 operands[2] = force_reg (SImode, operands[2]);
981 else if (MEM_P (operands[2]))
983 operands[2] = force_reg (SImode, operands[2]);
986 if (REG_P (operands[2]))
988 rtx tmp = gen_reg_rtx (SImode);
989 rtx tmp1 = gen_reg_rtx (<MODE>mode);
990 emit_insn (gen_negsi2 (tmp, operands[2]));
991 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
992 convert_to_mode (<VEL>mode,
994 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
1004 (define_expand "ashr<mode>3"
1005 [(match_operand:VDQ_I 0 "register_operand" "")
1006 (match_operand:VDQ_I 1 "register_operand" "")
1007 (match_operand:SI 2 "general_operand" "")]
1010 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1013 if (CONST_INT_P (operands[2]))
1015 shift_amount = INTVAL (operands[2]);
1016 if (shift_amount > 0 && shift_amount <= bit_width)
1018 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1020 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1026 operands[2] = force_reg (SImode, operands[2]);
1028 else if (MEM_P (operands[2]))
1030 operands[2] = force_reg (SImode, operands[2]);
1033 if (REG_P (operands[2]))
1035 rtx tmp = gen_reg_rtx (SImode);
1036 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1037 emit_insn (gen_negsi2 (tmp, operands[2]));
1038 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1039 convert_to_mode (<VEL>mode,
1041 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
1051 (define_expand "vashl<mode>3"
1052 [(match_operand:VDQ_I 0 "register_operand" "")
1053 (match_operand:VDQ_I 1 "register_operand" "")
1054 (match_operand:VDQ_I 2 "register_operand" "")]
1057 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1062 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1063 ;; Negating individual lanes most certainly offsets the
1064 ;; gain from vectorization.
1065 (define_expand "vashr<mode>3"
1066 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1067 (match_operand:VDQ_BHSI 1 "register_operand" "")
1068 (match_operand:VDQ_BHSI 2 "register_operand" "")]
1071 rtx neg = gen_reg_rtx (<MODE>mode);
1072 emit (gen_neg<mode>2 (neg, operands[2]));
1073 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1079 (define_expand "aarch64_ashr_simddi"
1080 [(match_operand:DI 0 "register_operand" "=w")
1081 (match_operand:DI 1 "register_operand" "w")
1082 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1085 /* An arithmetic shift right by 64 fills the result with copies of the sign
1086 bit, just like asr by 63 - however the standard pattern does not handle
1088 if (INTVAL (operands[2]) == 64)
1089 operands[2] = GEN_INT (63);
1090 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1095 (define_expand "vlshr<mode>3"
1096 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1097 (match_operand:VDQ_BHSI 1 "register_operand" "")
1098 (match_operand:VDQ_BHSI 2 "register_operand" "")]
1101 rtx neg = gen_reg_rtx (<MODE>mode);
1102 emit (gen_neg<mode>2 (neg, operands[2]));
1103 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1108 (define_expand "aarch64_lshr_simddi"
1109 [(match_operand:DI 0 "register_operand" "=w")
1110 (match_operand:DI 1 "register_operand" "w")
1111 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1114 if (INTVAL (operands[2]) == 64)
1115 emit_move_insn (operands[0], const0_rtx);
1117 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1122 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1123 (define_insn "vec_shr_<mode>"
1124 [(set (match_operand:VD 0 "register_operand" "=w")
1125 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1126 (match_operand:SI 2 "immediate_operand" "i")]
1130 if (BYTES_BIG_ENDIAN)
1131 return "shl %d0, %d1, %2";
1133 return "ushr %d0, %d1, %2";
1135 [(set_attr "type" "neon_shift_imm")]
1138 (define_expand "vec_set<mode>"
1139 [(match_operand:VALL_F16 0 "register_operand" "+w")
1140 (match_operand:<VEL> 1 "register_operand" "w")
1141 (match_operand:SI 2 "immediate_operand" "")]
1144 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1145 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1146 GEN_INT (elem), operands[0]));
1152 (define_insn "aarch64_mla<mode>"
1153 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1154 (plus:VDQ_BHSI (mult:VDQ_BHSI
1155 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1156 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1157 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1159 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1160 [(set_attr "type" "neon_mla_<Vetype><q>")]
1163 (define_insn "*aarch64_mla_elt<mode>"
1164 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1167 (vec_duplicate:VDQHS
1169 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1170 (parallel [(match_operand:SI 2 "immediate_operand")])))
1171 (match_operand:VDQHS 3 "register_operand" "w"))
1172 (match_operand:VDQHS 4 "register_operand" "0")))]
1175 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1176 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1178 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1181 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1182 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1185 (vec_duplicate:VDQHS
1187 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1188 (parallel [(match_operand:SI 2 "immediate_operand")])))
1189 (match_operand:VDQHS 3 "register_operand" "w"))
1190 (match_operand:VDQHS 4 "register_operand" "0")))]
1193 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1194 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1196 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1199 (define_insn "*aarch64_mla_elt_merge<mode>"
1200 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1202 (mult:VDQHS (vec_duplicate:VDQHS
1203 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1204 (match_operand:VDQHS 2 "register_operand" "w"))
1205 (match_operand:VDQHS 3 "register_operand" "0")))]
1207 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1208 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1211 (define_insn "aarch64_mls<mode>"
1212 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1213 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1214 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1215 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1217 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1218 [(set_attr "type" "neon_mla_<Vetype><q>")]
1221 (define_insn "*aarch64_mls_elt<mode>"
1222 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1224 (match_operand:VDQHS 4 "register_operand" "0")
1226 (vec_duplicate:VDQHS
1228 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1229 (parallel [(match_operand:SI 2 "immediate_operand")])))
1230 (match_operand:VDQHS 3 "register_operand" "w"))))]
1233 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1234 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1236 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1239 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1240 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1242 (match_operand:VDQHS 4 "register_operand" "0")
1244 (vec_duplicate:VDQHS
1246 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1247 (parallel [(match_operand:SI 2 "immediate_operand")])))
1248 (match_operand:VDQHS 3 "register_operand" "w"))))]
1251 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1252 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1254 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1257 (define_insn "*aarch64_mls_elt_merge<mode>"
1258 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1260 (match_operand:VDQHS 1 "register_operand" "0")
1261 (mult:VDQHS (vec_duplicate:VDQHS
1262 (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1263 (match_operand:VDQHS 3 "register_operand" "w"))))]
1265 "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1266 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1269 ;; Max/Min operations.
1270 (define_insn "<su><maxmin><mode>3"
1271 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1272 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1273 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1275 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1276 [(set_attr "type" "neon_minmax<q>")]
1279 (define_expand "<su><maxmin>v2di3"
1280 [(set (match_operand:V2DI 0 "register_operand" "")
1281 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1282 (match_operand:V2DI 2 "register_operand" "")))]
1285 enum rtx_code cmp_operator;
1306 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1307 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1308 operands[2], cmp_fmt, operands[1], operands[2]));
1312 ;; Pairwise Integer Max/Min operations.
1313 (define_insn "aarch64_<maxmin_uns>p<mode>"
1314 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1315 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1316 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1319 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1320 [(set_attr "type" "neon_minmax<q>")]
1323 ;; Pairwise FP Max/Min operations.
1324 (define_insn "aarch64_<maxmin_uns>p<mode>"
1325 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1326 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1327 (match_operand:VHSDF 2 "register_operand" "w")]
1330 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1331 [(set_attr "type" "neon_minmax<q>")]
1334 ;; vec_concat gives a new vector with the low elements from operand 1, and
1335 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1336 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1337 ;; What that means, is that the RTL descriptions of the below patterns
1338 ;; need to change depending on endianness.
1340 ;; Move to the low architectural bits of the register.
1341 ;; On little-endian this is { operand, zeroes }
1342 ;; On big-endian this is { zeroes, operand }
1344 (define_insn "move_lo_quad_internal_<mode>"
1345 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1347 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1348 (vec_duplicate:<VHALF> (const_int 0))))]
1349 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1354 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1355 (set_attr "simd" "yes,*,yes")
1356 (set_attr "fp" "*,yes,*")
1357 (set_attr "length" "4")]
1360 (define_insn "move_lo_quad_internal_<mode>"
1361 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1363 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1365 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1370 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1371 (set_attr "simd" "yes,*,yes")
1372 (set_attr "fp" "*,yes,*")
1373 (set_attr "length" "4")]
1376 (define_insn "move_lo_quad_internal_be_<mode>"
1377 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1379 (vec_duplicate:<VHALF> (const_int 0))
1380 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1381 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1386 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1387 (set_attr "simd" "yes,*,yes")
1388 (set_attr "fp" "*,yes,*")
1389 (set_attr "length" "4")]
1392 (define_insn "move_lo_quad_internal_be_<mode>"
1393 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1396 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1397 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1402 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1403 (set_attr "simd" "yes,*,yes")
1404 (set_attr "fp" "*,yes,*")
1405 (set_attr "length" "4")]
1408 (define_expand "move_lo_quad_<mode>"
1409 [(match_operand:VQ 0 "register_operand")
1410 (match_operand:VQ 1 "register_operand")]
1413 if (BYTES_BIG_ENDIAN)
1414 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1416 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1421 ;; Move operand1 to the high architectural bits of the register, keeping
1422 ;; the low architectural bits of operand2.
1423 ;; For little-endian this is { operand2, operand1 }
1424 ;; For big-endian this is { operand1, operand2 }
1426 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1427 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1431 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1432 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1433 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1435 ins\\t%0.d[1], %1.d[0]
1437 [(set_attr "type" "neon_ins")]
1440 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1441 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1443 (match_operand:<VHALF> 1 "register_operand" "w,r")
1446 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1447 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1449 ins\\t%0.d[1], %1.d[0]
1451 [(set_attr "type" "neon_ins")]
1454 (define_expand "move_hi_quad_<mode>"
1455 [(match_operand:VQ 0 "register_operand" "")
1456 (match_operand:<VHALF> 1 "register_operand" "")]
1459 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1460 if (BYTES_BIG_ENDIAN)
1461 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1464 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1469 ;; Narrowing operations.
1472 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1473 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1474 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1476 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1477 [(set_attr "type" "neon_shift_imm_narrow_q")]
1480 (define_expand "vec_pack_trunc_<mode>"
1481 [(match_operand:<VNARROWD> 0 "register_operand" "")
1482 (match_operand:VDN 1 "register_operand" "")
1483 (match_operand:VDN 2 "register_operand" "")]
1486 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1487 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1488 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1490 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1491 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1492 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1498 (define_insn "vec_pack_trunc_<mode>"
1499 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1500 (vec_concat:<VNARROWQ2>
1501 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1502 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1505 if (BYTES_BIG_ENDIAN)
1506 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1508 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1510 [(set_attr "type" "multiple")
1511 (set_attr "length" "8")]
1514 ;; Widening operations.
1516 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1517 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1518 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1519 (match_operand:VQW 1 "register_operand" "w")
1520 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1523 "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1524 [(set_attr "type" "neon_shift_imm_long")]
1527 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1528 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1529 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1530 (match_operand:VQW 1 "register_operand" "w")
1531 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1534 "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1535 [(set_attr "type" "neon_shift_imm_long")]
1538 (define_expand "vec_unpack<su>_hi_<mode>"
1539 [(match_operand:<VWIDE> 0 "register_operand" "")
1540 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1543 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1544 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1550 (define_expand "vec_unpack<su>_lo_<mode>"
1551 [(match_operand:<VWIDE> 0 "register_operand" "")
1552 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1555 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1556 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1562 ;; Widening arithmetic.
1564 (define_insn "*aarch64_<su>mlal_lo<mode>"
1565 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1568 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1569 (match_operand:VQW 2 "register_operand" "w")
1570 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1571 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1572 (match_operand:VQW 4 "register_operand" "w")
1574 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1576 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1577 [(set_attr "type" "neon_mla_<Vetype>_long")]
1580 (define_insn "*aarch64_<su>mlal_hi<mode>"
1581 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1584 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1585 (match_operand:VQW 2 "register_operand" "w")
1586 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1587 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1588 (match_operand:VQW 4 "register_operand" "w")
1590 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1592 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1593 [(set_attr "type" "neon_mla_<Vetype>_long")]
1596 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1597 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1599 (match_operand:<VWIDE> 1 "register_operand" "0")
1601 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1602 (match_operand:VQW 2 "register_operand" "w")
1603 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1604 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1605 (match_operand:VQW 4 "register_operand" "w")
1608 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1609 [(set_attr "type" "neon_mla_<Vetype>_long")]
1612 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1613 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1615 (match_operand:<VWIDE> 1 "register_operand" "0")
1617 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1618 (match_operand:VQW 2 "register_operand" "w")
1619 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1620 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1621 (match_operand:VQW 4 "register_operand" "w")
1624 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1625 [(set_attr "type" "neon_mla_<Vetype>_long")]
1628 (define_insn "*aarch64_<su>mlal<mode>"
1629 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1633 (match_operand:VD_BHSI 1 "register_operand" "w"))
1635 (match_operand:VD_BHSI 2 "register_operand" "w")))
1636 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1638 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1639 [(set_attr "type" "neon_mla_<Vetype>_long")]
1642 (define_insn "*aarch64_<su>mlsl<mode>"
1643 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1645 (match_operand:<VWIDE> 1 "register_operand" "0")
1648 (match_operand:VD_BHSI 2 "register_operand" "w"))
1650 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1652 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1653 [(set_attr "type" "neon_mla_<Vetype>_long")]
1656 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1657 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1658 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1659 (match_operand:VQW 1 "register_operand" "w")
1660 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1661 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1662 (match_operand:VQW 2 "register_operand" "w")
1665 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1666 [(set_attr "type" "neon_mul_<Vetype>_long")]
1669 (define_expand "vec_widen_<su>mult_lo_<mode>"
1670 [(match_operand:<VWIDE> 0 "register_operand" "")
1671 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1672 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1675 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1676 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1683 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1684 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1685 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1686 (match_operand:VQW 1 "register_operand" "w")
1687 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1688 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1689 (match_operand:VQW 2 "register_operand" "w")
1692 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1693 [(set_attr "type" "neon_mul_<Vetype>_long")]
1696 (define_expand "vec_widen_<su>mult_hi_<mode>"
1697 [(match_operand:<VWIDE> 0 "register_operand" "")
1698 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1699 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1702 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1703 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1711 ;; FP vector operations.
1712 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1713 ;; double-precision (64-bit) floating-point data types and arithmetic as
1714 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1715 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1717 ;; Floating-point operations can raise an exception. Vectorizing such
1718 ;; operations are safe because of reasons explained below.
1720 ;; ARMv8 permits an extension to enable trapped floating-point
1721 ;; exception handling, however this is an optional feature. In the
1722 ;; event of a floating-point exception being raised by vectorised
1724 ;; 1. If trapped floating-point exceptions are available, then a trap
1725 ;; will be taken when any lane raises an enabled exception. A trap
1726 ;; handler may determine which lane raised the exception.
1727 ;; 2. Alternatively a sticky exception flag is set in the
1728 ;; floating-point status register (FPSR). Software may explicitly
1729 ;; test the exception flags, in which case the tests will either
1730 ;; prevent vectorisation, allowing precise identification of the
1731 ;; failing operation, or if tested outside of vectorisable regions
1732 ;; then the specific operation and lane are not of interest.
1734 ;; FP arithmetic operations.
1736 (define_insn "add<mode>3"
1737 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1738 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1739 (match_operand:VHSDF 2 "register_operand" "w")))]
1741 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1742 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1745 (define_insn "sub<mode>3"
1746 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1747 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1748 (match_operand:VHSDF 2 "register_operand" "w")))]
1750 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1751 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1754 (define_insn "mul<mode>3"
1755 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1756 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1757 (match_operand:VHSDF 2 "register_operand" "w")))]
1759 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1760 [(set_attr "type" "neon_fp_mul_<stype><q>")]
1763 (define_expand "div<mode>3"
1764 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1765 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1766 (match_operand:VHSDF 2 "register_operand" "w")))]
1769 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1772 operands[1] = force_reg (<MODE>mode, operands[1]);
1775 (define_insn "*div<mode>3"
1776 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1777 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1778 (match_operand:VHSDF 2 "register_operand" "w")))]
1780 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1781 [(set_attr "type" "neon_fp_div_<stype><q>")]
1784 (define_insn "neg<mode>2"
1785 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1786 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1788 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1789 [(set_attr "type" "neon_fp_neg_<stype><q>")]
1792 (define_insn "abs<mode>2"
1793 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1794 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1796 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1797 [(set_attr "type" "neon_fp_abs_<stype><q>")]
1800 (define_insn "fma<mode>4"
1801 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1802 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1803 (match_operand:VHSDF 2 "register_operand" "w")
1804 (match_operand:VHSDF 3 "register_operand" "0")))]
1806 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1807 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1810 (define_insn "*aarch64_fma4_elt<mode>"
1811 [(set (match_operand:VDQF 0 "register_operand" "=w")
1815 (match_operand:VDQF 1 "register_operand" "<h_con>")
1816 (parallel [(match_operand:SI 2 "immediate_operand")])))
1817 (match_operand:VDQF 3 "register_operand" "w")
1818 (match_operand:VDQF 4 "register_operand" "0")))]
1821 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1822 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1824 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1827 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1828 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1830 (vec_duplicate:VDQSF
1832 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1833 (parallel [(match_operand:SI 2 "immediate_operand")])))
1834 (match_operand:VDQSF 3 "register_operand" "w")
1835 (match_operand:VDQSF 4 "register_operand" "0")))]
1838 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1839 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1841 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1844 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1845 [(set (match_operand:VMUL 0 "register_operand" "=w")
1848 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1849 (match_operand:VMUL 2 "register_operand" "w")
1850 (match_operand:VMUL 3 "register_operand" "0")))]
1852 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1853 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1856 (define_insn "*aarch64_fma4_elt_to_64v2df"
1857 [(set (match_operand:DF 0 "register_operand" "=w")
1860 (match_operand:V2DF 1 "register_operand" "w")
1861 (parallel [(match_operand:SI 2 "immediate_operand")]))
1862 (match_operand:DF 3 "register_operand" "w")
1863 (match_operand:DF 4 "register_operand" "0")))]
1866 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1867 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1869 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1872 (define_insn "fnma<mode>4"
1873 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1875 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1876 (match_operand:VHSDF 2 "register_operand" "w")
1877 (match_operand:VHSDF 3 "register_operand" "0")))]
1879 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1880 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1883 (define_insn "*aarch64_fnma4_elt<mode>"
1884 [(set (match_operand:VDQF 0 "register_operand" "=w")
1887 (match_operand:VDQF 3 "register_operand" "w"))
1890 (match_operand:VDQF 1 "register_operand" "<h_con>")
1891 (parallel [(match_operand:SI 2 "immediate_operand")])))
1892 (match_operand:VDQF 4 "register_operand" "0")))]
1895 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1896 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1898 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1901 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1902 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1905 (match_operand:VDQSF 3 "register_operand" "w"))
1906 (vec_duplicate:VDQSF
1908 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1909 (parallel [(match_operand:SI 2 "immediate_operand")])))
1910 (match_operand:VDQSF 4 "register_operand" "0")))]
1913 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1914 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1916 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1919 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1920 [(set (match_operand:VMUL 0 "register_operand" "=w")
1923 (match_operand:VMUL 2 "register_operand" "w"))
1925 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1926 (match_operand:VMUL 3 "register_operand" "0")))]
1928 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1929 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1932 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1933 [(set (match_operand:DF 0 "register_operand" "=w")
1936 (match_operand:V2DF 1 "register_operand" "w")
1937 (parallel [(match_operand:SI 2 "immediate_operand")]))
1939 (match_operand:DF 3 "register_operand" "w"))
1940 (match_operand:DF 4 "register_operand" "0")))]
1943 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1944 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1946 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1949 ;; Vector versions of the floating-point frint patterns.
1950 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1951 (define_insn "<frint_pattern><mode>2"
1952 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1953 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1956 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1957 [(set_attr "type" "neon_fp_round_<stype><q>")]
1960 ;; Vector versions of the fcvt standard patterns.
1961 ;; Expands to lbtrunc, lround, lceil, lfloor
1962 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1963 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1964 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1965 [(match_operand:VHSDF 1 "register_operand" "w")]
1968 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1969 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1972 ;; HF Scalar variants of related SIMD instructions.
1973 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1974 [(set (match_operand:HI 0 "register_operand" "=w")
1975 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1977 "TARGET_SIMD_F16INST"
1978 "fcvt<frint_suffix><su>\t%h0, %h1"
1979 [(set_attr "type" "neon_fp_to_int_s")]
1982 (define_insn "<optab>_trunchfhi2"
1983 [(set (match_operand:HI 0 "register_operand" "=w")
1984 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1985 "TARGET_SIMD_F16INST"
1986 "fcvtz<su>\t%h0, %h1"
1987 [(set_attr "type" "neon_fp_to_int_s")]
1990 (define_insn "<optab>hihf2"
1991 [(set (match_operand:HF 0 "register_operand" "=w")
1992 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1993 "TARGET_SIMD_F16INST"
1994 "<su_optab>cvtf\t%h0, %h1"
1995 [(set_attr "type" "neon_int_to_fp_s")]
1998 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1999 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2000 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2002 (match_operand:VDQF 1 "register_operand" "w")
2003 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
2006 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
2007 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
2009 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
2011 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
2012 output_asm_insn (buf, operands);
2015 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
2018 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
2019 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2020 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2021 [(match_operand:VHSDF 1 "register_operand")]
2026 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2027 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2028 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2029 [(match_operand:VHSDF 1 "register_operand")]
2034 (define_expand "ftrunc<VHSDF:mode>2"
2035 [(set (match_operand:VHSDF 0 "register_operand")
2036 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2041 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2042 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2044 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2046 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2047 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2050 ;; Conversions between vectors of floats and doubles.
2051 ;; Contains a mix of patterns to match standard pattern names
2052 ;; and those for intrinsics.
2054 ;; Float widening operations.
2056 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2057 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2058 (float_extend:<VWIDE> (vec_select:<VHALF>
2059 (match_operand:VQ_HSF 1 "register_operand" "w")
2060 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2063 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2064 [(set_attr "type" "neon_fp_cvt_widen_s")]
2067 ;; Convert between fixed-point and floating-point (vector modes)
2069 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2070 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2071 (unspec:<VHSDF:FCVT_TARGET>
2072 [(match_operand:VHSDF 1 "register_operand" "w")
2073 (match_operand:SI 2 "immediate_operand" "i")]
2076 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2077 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2080 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2081 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2082 (unspec:<VDQ_HSDI:FCVT_TARGET>
2083 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2084 (match_operand:SI 2 "immediate_operand" "i")]
2087 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2088 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2091 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2092 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2093 ;; the meaning of HI and LO changes depending on the target endianness.
2094 ;; While elsewhere we map the higher numbered elements of a vector to
2095 ;; the lower architectural lanes of the vector, for these patterns we want
2096 ;; to always treat "hi" as referring to the higher architectural lanes.
2097 ;; Consequently, while the patterns below look inconsistent with our
2098 ;; other big-endian patterns their behavior is as required.
2100 (define_expand "vec_unpacks_lo_<mode>"
2101 [(match_operand:<VWIDE> 0 "register_operand" "")
2102 (match_operand:VQ_HSF 1 "register_operand" "")]
2105 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2106 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2112 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2113 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2114 (float_extend:<VWIDE> (vec_select:<VHALF>
2115 (match_operand:VQ_HSF 1 "register_operand" "w")
2116 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2119 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2120 [(set_attr "type" "neon_fp_cvt_widen_s")]
2123 (define_expand "vec_unpacks_hi_<mode>"
2124 [(match_operand:<VWIDE> 0 "register_operand" "")
2125 (match_operand:VQ_HSF 1 "register_operand" "")]
2128 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2129 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2134 (define_insn "aarch64_float_extend_lo_<Vwide>"
2135 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2136 (float_extend:<VWIDE>
2137 (match_operand:VDF 1 "register_operand" "w")))]
2139 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2140 [(set_attr "type" "neon_fp_cvt_widen_s")]
2143 ;; Float narrowing operations.
2145 (define_insn "aarch64_float_truncate_lo_<mode>"
2146 [(set (match_operand:VDF 0 "register_operand" "=w")
2148 (match_operand:<VWIDE> 1 "register_operand" "w")))]
2150 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2151 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2154 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2155 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2157 (match_operand:VDF 1 "register_operand" "0")
2159 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2160 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2161 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2162 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2165 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2166 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2169 (match_operand:<VWIDE> 2 "register_operand" "w"))
2170 (match_operand:VDF 1 "register_operand" "0")))]
2171 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2172 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2173 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2176 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2177 [(match_operand:<VDBL> 0 "register_operand" "=w")
2178 (match_operand:VDF 1 "register_operand" "0")
2179 (match_operand:<VWIDE> 2 "register_operand" "w")]
2182 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2183 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2184 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2185 emit_insn (gen (operands[0], operands[1], operands[2]));
2190 (define_expand "vec_pack_trunc_v2df"
2191 [(set (match_operand:V4SF 0 "register_operand")
2193 (float_truncate:V2SF
2194 (match_operand:V2DF 1 "register_operand"))
2195 (float_truncate:V2SF
2196 (match_operand:V2DF 2 "register_operand"))
2200 rtx tmp = gen_reg_rtx (V2SFmode);
2201 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2202 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2204 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2205 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2206 tmp, operands[hi]));
2211 (define_expand "vec_pack_trunc_df"
2212 [(set (match_operand:V2SF 0 "register_operand")
2215 (match_operand:DF 1 "register_operand"))
2217 (match_operand:DF 2 "register_operand"))
2221 rtx tmp = gen_reg_rtx (V2SFmode);
2222 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2223 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2225 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2226 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2227 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2233 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2235 ;; a = (b < c) ? b : c;
2236 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
2237 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
2240 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2241 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2242 ;; operand will be returned when both operands are zero (i.e. they may not
2243 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2244 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2247 (define_insn "<su><maxmin><mode>3"
2248 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2249 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2250 (match_operand:VHSDF 2 "register_operand" "w")))]
2252 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2253 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2256 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2257 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2258 ;; which implement the IEEE fmax ()/fmin () functions.
2259 (define_insn "<maxmin_uns><mode>3"
2260 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2261 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2262 (match_operand:VHSDF 2 "register_operand" "w")]
2265 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2266 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2269 ;; 'across lanes' add.
2271 (define_expand "reduc_plus_scal_<mode>"
2272 [(match_operand:<VEL> 0 "register_operand" "=w")
2273 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2277 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2278 rtx scratch = gen_reg_rtx (<MODE>mode);
2279 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2280 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2285 (define_insn "aarch64_faddp<mode>"
2286 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2287 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2288 (match_operand:VHSDF 2 "register_operand" "w")]
2291 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2292 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2295 (define_insn "aarch64_reduc_plus_internal<mode>"
2296 [(set (match_operand:VDQV 0 "register_operand" "=w")
2297 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2300 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2301 [(set_attr "type" "neon_reduc_add<q>")]
2304 (define_insn "aarch64_reduc_plus_internalv2si"
2305 [(set (match_operand:V2SI 0 "register_operand" "=w")
2306 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2309 "addp\\t%0.2s, %1.2s, %1.2s"
2310 [(set_attr "type" "neon_reduc_add")]
2313 (define_insn "reduc_plus_scal_<mode>"
2314 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2315 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2318 "faddp\\t%<Vetype>0, %1.<Vtype>"
2319 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2322 (define_expand "reduc_plus_scal_v4sf"
2323 [(set (match_operand:SF 0 "register_operand")
2324 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2328 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2329 rtx scratch = gen_reg_rtx (V4SFmode);
2330 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2331 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2332 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2336 (define_insn "clrsb<mode>2"
2337 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2338 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2340 "cls\\t%0.<Vtype>, %1.<Vtype>"
2341 [(set_attr "type" "neon_cls<q>")]
2344 (define_insn "clz<mode>2"
2345 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2346 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2348 "clz\\t%0.<Vtype>, %1.<Vtype>"
2349 [(set_attr "type" "neon_cls<q>")]
2352 (define_insn "popcount<mode>2"
2353 [(set (match_operand:VB 0 "register_operand" "=w")
2354 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2356 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2357 [(set_attr "type" "neon_cnt<q>")]
2360 ;; 'across lanes' max and min ops.
2362 ;; Template for outputting a scalar, so we can create __builtins which can be
2363 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
2364 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2365 [(match_operand:<VEL> 0 "register_operand")
2366 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2370 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2371 rtx scratch = gen_reg_rtx (<MODE>mode);
2372 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2374 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2379 ;; Likewise for integer cases, signed and unsigned.
2380 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2381 [(match_operand:<VEL> 0 "register_operand")
2382 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2386 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2387 rtx scratch = gen_reg_rtx (<MODE>mode);
2388 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2390 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2395 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2396 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2397 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2400 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2401 [(set_attr "type" "neon_reduc_minmax<q>")]
2404 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2405 [(set (match_operand:V2SI 0 "register_operand" "=w")
2406 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2409 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2410 [(set_attr "type" "neon_reduc_minmax")]
2413 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2414 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2415 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2418 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2419 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2422 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2424 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2427 ;; Thus our BSL is of the form:
2428 ;; op0 = bsl (mask, op2, op3)
2429 ;; We can use any of:
2432 ;; bsl mask, op1, op2
2433 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2434 ;; bit op0, op2, mask
2435 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2436 ;; bif op0, op1, mask
2438 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2439 ;; Some forms of straight-line code may generate the equivalent form
2440 ;; in *aarch64_simd_bsl<mode>_alt.
2442 (define_insn "aarch64_simd_bsl<mode>_internal"
2443 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2447 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2448 (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2449 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2450 (match_dup:<V_INT_EQUIV> 3)
2454 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2455 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2456 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2457 [(set_attr "type" "neon_bsl<q>")]
2460 ;; We need this form in addition to the above pattern to match the case
2461 ;; when combine tries merging three insns such that the second operand of
2462 ;; the outer XOR matches the second operand of the inner XOR rather than
2463 ;; the first. The two are equivalent but since recog doesn't try all
2464 ;; permutations of commutative operations, we have to have a separate pattern.
2466 (define_insn "*aarch64_simd_bsl<mode>_alt"
2467 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2471 (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2472 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2473 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2474 (match_dup:<V_INT_EQUIV> 2)))]
2477 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2478 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2479 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2480 [(set_attr "type" "neon_bsl<q>")]
2483 ;; DImode is special, we want to avoid computing operations which are
2484 ;; more naturally computed in general purpose registers in the vector
2485 ;; registers. If we do that, we need to move all three operands from general
2486 ;; purpose registers to vector registers, then back again. However, we
2487 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2488 ;; optimizations based on the component operations of a BSL.
2490 ;; That means we need a splitter back to the individual operations, if they
2491 ;; would be better calculated on the integer side.
2493 (define_insn_and_split "aarch64_simd_bsldi_internal"
2494 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2498 (match_operand:DI 3 "register_operand" "w,0,w,r")
2499 (match_operand:DI 2 "register_operand" "w,w,0,r"))
2500 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2505 bsl\\t%0.8b, %2.8b, %3.8b
2506 bit\\t%0.8b, %2.8b, %1.8b
2507 bif\\t%0.8b, %3.8b, %1.8b
2509 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2510 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2512 /* Split back to individual operations. If we're before reload, and
2513 able to create a temporary register, do so. If we're after reload,
2514 we've got an early-clobber destination register, so use that.
2515 Otherwise, we can't create pseudos and we can't yet guarantee that
2516 operands[0] is safe to write, so FAIL to split. */
2519 if (reload_completed)
2520 scratch = operands[0];
2521 else if (can_create_pseudo_p ())
2522 scratch = gen_reg_rtx (DImode);
2526 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2527 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2528 emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2531 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2532 (set_attr "length" "4,4,4,12")]
2535 (define_insn_and_split "aarch64_simd_bsldi_alt"
2536 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2540 (match_operand:DI 3 "register_operand" "w,w,0,r")
2541 (match_operand:DI 2 "register_operand" "w,0,w,r"))
2542 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2547 bsl\\t%0.8b, %3.8b, %2.8b
2548 bit\\t%0.8b, %3.8b, %1.8b
2549 bif\\t%0.8b, %2.8b, %1.8b
2551 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2552 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2554 /* Split back to individual operations. If we're before reload, and
2555 able to create a temporary register, do so. If we're after reload,
2556 we've got an early-clobber destination register, so use that.
2557 Otherwise, we can't create pseudos and we can't yet guarantee that
2558 operands[0] is safe to write, so FAIL to split. */
2561 if (reload_completed)
2562 scratch = operands[0];
2563 else if (can_create_pseudo_p ())
2564 scratch = gen_reg_rtx (DImode);
2568 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2569 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2570 emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2573 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2574 (set_attr "length" "4,4,4,12")]
2577 (define_expand "aarch64_simd_bsl<mode>"
2578 [(match_operand:VALLDIF 0 "register_operand")
2579 (match_operand:<V_INT_EQUIV> 1 "register_operand")
2580 (match_operand:VALLDIF 2 "register_operand")
2581 (match_operand:VALLDIF 3 "register_operand")]
2584 /* We can't alias operands together if they have different modes. */
2585 rtx tmp = operands[0];
2586 if (FLOAT_MODE_P (<MODE>mode))
2588 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2589 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2590 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2592 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2593 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2597 if (tmp != operands[0])
2598 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2603 (define_expand "vcond_mask_<mode><v_int_equiv>"
2604 [(match_operand:VALLDI 0 "register_operand")
2605 (match_operand:VALLDI 1 "nonmemory_operand")
2606 (match_operand:VALLDI 2 "nonmemory_operand")
2607 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2610 /* If we have (a = (P) ? -1 : 0);
2611 Then we can simply move the generated mask (result must be int). */
2612 if (operands[1] == CONSTM1_RTX (<MODE>mode)
2613 && operands[2] == CONST0_RTX (<MODE>mode))
2614 emit_move_insn (operands[0], operands[3]);
2615 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
2616 else if (operands[1] == CONST0_RTX (<MODE>mode)
2617 && operands[2] == CONSTM1_RTX (<MODE>mode))
2618 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2621 if (!REG_P (operands[1]))
2622 operands[1] = force_reg (<MODE>mode, operands[1]);
2623 if (!REG_P (operands[2]))
2624 operands[2] = force_reg (<MODE>mode, operands[2]);
2625 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2626 operands[1], operands[2]));
2632 ;; Patterns comparing two vectors to produce a mask.
2634 (define_expand "vec_cmp<mode><mode>"
2635 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2636 (match_operator 1 "comparison_operator"
2637 [(match_operand:VSDQ_I_DI 2 "register_operand")
2638 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2641 rtx mask = operands[0];
2642 enum rtx_code code = GET_CODE (operands[1]);
2652 if (operands[3] == CONST0_RTX (<MODE>mode))
2657 if (!REG_P (operands[3]))
2658 operands[3] = force_reg (<MODE>mode, operands[3]);
2666 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2670 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2674 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2678 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2682 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2686 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2690 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2694 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2698 /* Handle NE as !EQ. */
2699 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2700 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2704 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2714 (define_expand "vec_cmp<mode><v_int_equiv>"
2715 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2716 (match_operator 1 "comparison_operator"
2717 [(match_operand:VDQF 2 "register_operand")
2718 (match_operand:VDQF 3 "nonmemory_operand")]))]
2721 int use_zero_form = 0;
2722 enum rtx_code code = GET_CODE (operands[1]);
2723 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2725 rtx (*comparison) (rtx, rtx, rtx) = NULL;
2734 if (operands[3] == CONST0_RTX (<MODE>mode))
2741 if (!REG_P (operands[3]))
2742 operands[3] = force_reg (<MODE>mode, operands[3]);
2752 comparison = gen_aarch64_cmlt<mode>;
2757 std::swap (operands[2], operands[3]);
2761 comparison = gen_aarch64_cmgt<mode>;
2766 comparison = gen_aarch64_cmle<mode>;
2771 std::swap (operands[2], operands[3]);
2775 comparison = gen_aarch64_cmge<mode>;
2779 comparison = gen_aarch64_cmeq<mode>;
2797 /* All of the above must not raise any FP exceptions. Thus we first
2798 check each operand for NaNs and force any elements containing NaN to
2799 zero before using them in the compare.
2800 Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2801 (cm<cc> (isnan (a) ? 0.0 : a,
2802 isnan (b) ? 0.0 : b))
2803 We use the following transformations for doing the comparisions:
2807 a UNLT b -> b GT a. */
2809 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2810 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2811 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2812 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2813 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2814 emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2815 emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2816 lowpart_subreg (<V_INT_EQUIV>mode,
2819 emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2820 lowpart_subreg (<V_INT_EQUIV>mode,
2823 gcc_assert (comparison != NULL);
2824 emit_insn (comparison (operands[0],
2825 lowpart_subreg (<MODE>mode,
2826 tmp0, <V_INT_EQUIV>mode),
2827 lowpart_subreg (<MODE>mode,
2828 tmp1, <V_INT_EQUIV>mode)));
2829 emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2839 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2840 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2846 a NE b -> ~(a EQ b) */
2847 gcc_assert (comparison != NULL);
2848 emit_insn (comparison (operands[0], operands[2], operands[3]));
2850 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2854 /* LTGT is not guranteed to not generate a FP exception. So let's
2855 go the faster way : ((a > b) || (b > a)). */
2856 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2857 operands[2], operands[3]));
2858 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2859 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2865 /* cmeq (a, a) & cmeq (b, b). */
2866 emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2867 operands[2], operands[2]));
2868 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2869 emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2871 if (code == UNORDERED)
2872 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2873 else if (code == UNEQ)
2875 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
2876 emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
2887 (define_expand "vec_cmpu<mode><mode>"
2888 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2889 (match_operator 1 "comparison_operator"
2890 [(match_operand:VSDQ_I_DI 2 "register_operand")
2891 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2894 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2895 operands[2], operands[3]));
2899 (define_expand "vcond<mode><mode>"
2900 [(set (match_operand:VALLDI 0 "register_operand")
2901 (if_then_else:VALLDI
2902 (match_operator 3 "comparison_operator"
2903 [(match_operand:VALLDI 4 "register_operand")
2904 (match_operand:VALLDI 5 "nonmemory_operand")])
2905 (match_operand:VALLDI 1 "nonmemory_operand")
2906 (match_operand:VALLDI 2 "nonmemory_operand")))]
2909 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2910 enum rtx_code code = GET_CODE (operands[3]);
2912 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2913 it as well as switch operands 1/2 in order to avoid the additional
2917 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2918 operands[4], operands[5]);
2919 std::swap (operands[1], operands[2]);
2921 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2922 operands[4], operands[5]));
2923 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2924 operands[2], mask));
2929 (define_expand "vcond<v_cmp_mixed><mode>"
2930 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2931 (if_then_else:<V_cmp_mixed>
2932 (match_operator 3 "comparison_operator"
2933 [(match_operand:VDQF_COND 4 "register_operand")
2934 (match_operand:VDQF_COND 5 "nonmemory_operand")])
2935 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2936 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2939 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2940 enum rtx_code code = GET_CODE (operands[3]);
2942 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2943 it as well as switch operands 1/2 in order to avoid the additional
2947 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2948 operands[4], operands[5]);
2949 std::swap (operands[1], operands[2]);
2951 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2952 operands[4], operands[5]));
2953 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
2954 operands[0], operands[1],
2955 operands[2], mask));
2960 (define_expand "vcondu<mode><mode>"
2961 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2962 (if_then_else:VSDQ_I_DI
2963 (match_operator 3 "comparison_operator"
2964 [(match_operand:VSDQ_I_DI 4 "register_operand")
2965 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2966 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2967 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2970 rtx mask = gen_reg_rtx (<MODE>mode);
2971 enum rtx_code code = GET_CODE (operands[3]);
2973 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2974 it as well as switch operands 1/2 in order to avoid the additional
2978 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2979 operands[4], operands[5]);
2980 std::swap (operands[1], operands[2]);
2982 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2983 operands[4], operands[5]));
2984 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2985 operands[2], mask));
2989 (define_expand "vcondu<mode><v_cmp_mixed>"
2990 [(set (match_operand:VDQF 0 "register_operand")
2992 (match_operator 3 "comparison_operator"
2993 [(match_operand:<V_cmp_mixed> 4 "register_operand")
2994 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2995 (match_operand:VDQF 1 "nonmemory_operand")
2996 (match_operand:VDQF 2 "nonmemory_operand")))]
2999 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3000 enum rtx_code code = GET_CODE (operands[3]);
3002 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3003 it as well as switch operands 1/2 in order to avoid the additional
3007 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3008 operands[4], operands[5]);
3009 std::swap (operands[1], operands[2]);
3011 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
3013 operands[4], operands[5]));
3014 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3015 operands[2], mask));
3019 ;; Patterns for AArch64 SIMD Intrinsics.
3021 ;; Lane extraction with sign extension to general purpose register.
3022 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3023 [(set (match_operand:GPI 0 "register_operand" "=r")
3026 (match_operand:VDQQH 1 "register_operand" "w")
3027 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3030 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3031 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3033 [(set_attr "type" "neon_to_gp<q>")]
3036 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
3037 [(set (match_operand:GPI 0 "register_operand" "=r")
3040 (match_operand:VDQQH 1 "register_operand" "w")
3041 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3044 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3045 INTVAL (operands[2]));
3046 return "umov\\t%w0, %1.<Vetype>[%2]";
3048 [(set_attr "type" "neon_to_gp<q>")]
3051 ;; Lane extraction of a value, neither sign nor zero extension
3052 ;; is guaranteed so upper bits should be considered undefined.
3053 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3054 (define_insn "aarch64_get_lane<mode>"
3055 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3057 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3058 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3061 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3062 switch (which_alternative)
3065 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3067 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3069 return "st1\\t{%1.<Vetype>}[%2], %0";
3074 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3077 (define_insn "load_pair_lanes<mode>"
3078 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3080 (match_operand:VDC 1 "memory_operand" "Utq")
3081 (match_operand:VDC 2 "memory_operand" "m")))]
3082 "TARGET_SIMD && !STRICT_ALIGNMENT
3083 && rtx_equal_p (XEXP (operands[2], 0),
3084 plus_constant (Pmode,
3085 XEXP (operands[1], 0),
3086 GET_MODE_SIZE (<MODE>mode)))"
3088 [(set_attr "type" "neon_load1_1reg_q")]
3091 (define_insn "store_pair_lanes<mode>"
3092 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
3094 (match_operand:VDC 1 "register_operand" "w, r")
3095 (match_operand:VDC 2 "register_operand" "w, r")))]
3099 stp\\t%x1, %x2, %y0"
3100 [(set_attr "type" "neon_stp, store_16")]
3103 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3106 (define_insn "*aarch64_combinez<mode>"
3107 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3109 (match_operand:VDC 1 "general_operand" "w,?r,m")
3110 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3111 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3116 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3117 (set_attr "simd" "yes,*,yes")
3118 (set_attr "fp" "*,yes,*")]
3121 (define_insn "*aarch64_combinez_be<mode>"
3122 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3124 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3125 (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3126 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3131 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3132 (set_attr "simd" "yes,*,yes")
3133 (set_attr "fp" "*,yes,*")]
3136 (define_expand "aarch64_combine<mode>"
3137 [(match_operand:<VDBL> 0 "register_operand")
3138 (match_operand:VDC 1 "register_operand")
3139 (match_operand:VDC 2 "register_operand")]
3142 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3148 (define_expand "aarch64_simd_combine<mode>"
3149 [(match_operand:<VDBL> 0 "register_operand")
3150 (match_operand:VDC 1 "register_operand")
3151 (match_operand:VDC 2 "register_operand")]
3154 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3155 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3158 [(set_attr "type" "multiple")]
3161 ;; <su><addsub>l<q>.
3163 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3164 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3165 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3166 (match_operand:VQW 1 "register_operand" "w")
3167 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3168 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3169 (match_operand:VQW 2 "register_operand" "w")
3172 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3173 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3176 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3177 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3178 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3179 (match_operand:VQW 1 "register_operand" "w")
3180 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3181 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3182 (match_operand:VQW 2 "register_operand" "w")
3185 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3186 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3190 (define_expand "aarch64_saddl2<mode>"
3191 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3192 (match_operand:VQW 1 "register_operand" "w")
3193 (match_operand:VQW 2 "register_operand" "w")]
3196 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3197 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3202 (define_expand "aarch64_uaddl2<mode>"
3203 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3204 (match_operand:VQW 1 "register_operand" "w")
3205 (match_operand:VQW 2 "register_operand" "w")]
3208 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3209 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3214 (define_expand "aarch64_ssubl2<mode>"
3215 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3216 (match_operand:VQW 1 "register_operand" "w")
3217 (match_operand:VQW 2 "register_operand" "w")]
3220 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3221 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3226 (define_expand "aarch64_usubl2<mode>"
3227 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3228 (match_operand:VQW 1 "register_operand" "w")
3229 (match_operand:VQW 2 "register_operand" "w")]
3232 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3233 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3238 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3239 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3240 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3241 (match_operand:VD_BHSI 1 "register_operand" "w"))
3243 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3245 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3246 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3249 ;; <su><addsub>w<q>.
3251 (define_expand "widen_ssum<mode>3"
3252 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3253 (plus:<VDBLW> (sign_extend:<VDBLW>
3254 (match_operand:VQW 1 "register_operand" ""))
3255 (match_operand:<VDBLW> 2 "register_operand" "")))]
3258 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3259 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3261 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3263 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3268 (define_expand "widen_ssum<mode>3"
3269 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3270 (plus:<VWIDE> (sign_extend:<VWIDE>
3271 (match_operand:VD_BHSI 1 "register_operand" ""))
3272 (match_operand:<VWIDE> 2 "register_operand" "")))]
3275 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3279 (define_expand "widen_usum<mode>3"
3280 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3281 (plus:<VDBLW> (zero_extend:<VDBLW>
3282 (match_operand:VQW 1 "register_operand" ""))
3283 (match_operand:<VDBLW> 2 "register_operand" "")))]
3286 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3287 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3289 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3291 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3296 (define_expand "widen_usum<mode>3"
3297 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3298 (plus:<VWIDE> (zero_extend:<VWIDE>
3299 (match_operand:VD_BHSI 1 "register_operand" ""))
3300 (match_operand:<VWIDE> 2 "register_operand" "")))]
3303 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3307 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
3308 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3309 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3311 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3313 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3314 [(set_attr "type" "neon_sub_widen")]
3317 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
3318 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3319 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3322 (match_operand:VQW 2 "register_operand" "w")
3323 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3325 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3326 [(set_attr "type" "neon_sub_widen")]
3329 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
3330 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3331 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3334 (match_operand:VQW 2 "register_operand" "w")
3335 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3337 "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3338 [(set_attr "type" "neon_sub_widen")]
3341 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
3342 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3344 (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
3345 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3347 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3348 [(set_attr "type" "neon_add_widen")]
3351 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
3352 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3356 (match_operand:VQW 2 "register_operand" "w")
3357 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3358 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3360 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3361 [(set_attr "type" "neon_add_widen")]
3364 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
3365 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3369 (match_operand:VQW 2 "register_operand" "w")
3370 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3371 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3373 "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3374 [(set_attr "type" "neon_add_widen")]
3377 (define_expand "aarch64_saddw2<mode>"
3378 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3379 (match_operand:<VWIDE> 1 "register_operand" "w")
3380 (match_operand:VQW 2 "register_operand" "w")]
3383 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3384 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3389 (define_expand "aarch64_uaddw2<mode>"
3390 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3391 (match_operand:<VWIDE> 1 "register_operand" "w")
3392 (match_operand:VQW 2 "register_operand" "w")]
3395 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3396 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3402 (define_expand "aarch64_ssubw2<mode>"
3403 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3404 (match_operand:<VWIDE> 1 "register_operand" "w")
3405 (match_operand:VQW 2 "register_operand" "w")]
3408 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3409 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3414 (define_expand "aarch64_usubw2<mode>"
3415 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3416 (match_operand:<VWIDE> 1 "register_operand" "w")
3417 (match_operand:VQW 2 "register_operand" "w")]
3420 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3421 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3426 ;; <su><r>h<addsub>.
3428 (define_expand "<u>avg<mode>3_floor"
3429 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3430 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3431 (match_operand:VDQ_BHSI 2 "register_operand")]
3436 (define_expand "<u>avg<mode>3_ceil"
3437 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3438 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3439 (match_operand:VDQ_BHSI 2 "register_operand")]
3444 (define_insn "aarch64_<sur>h<addsub><mode>"
3445 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3446 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3447 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3450 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3451 [(set_attr "type" "neon_<addsub>_halve<q>")]
3454 ;; <r><addsub>hn<q>.
3456 (define_insn "aarch64_<sur><addsub>hn<mode>"
3457 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3458 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3459 (match_operand:VQN 2 "register_operand" "w")]
3462 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3463 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3466 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3467 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3468 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3469 (match_operand:VQN 2 "register_operand" "w")
3470 (match_operand:VQN 3 "register_operand" "w")]
3473 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3474 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3479 (define_insn "aarch64_pmul<mode>"
3480 [(set (match_operand:VB 0 "register_operand" "=w")
3481 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3482 (match_operand:VB 2 "register_operand" "w")]
3485 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3486 [(set_attr "type" "neon_mul_<Vetype><q>")]
3491 (define_insn "aarch64_fmulx<mode>"
3492 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3494 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3495 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3498 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3499 [(set_attr "type" "neon_fp_mul_<stype>")]
3502 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3504 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3505 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3507 [(match_operand:VDQSF 1 "register_operand" "w")
3508 (vec_duplicate:VDQSF
3510 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3511 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3515 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3516 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3518 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3521 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3523 (define_insn "*aarch64_mulx_elt<mode>"
3524 [(set (match_operand:VDQF 0 "register_operand" "=w")
3526 [(match_operand:VDQF 1 "register_operand" "w")
3529 (match_operand:VDQF 2 "register_operand" "w")
3530 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3534 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3535 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3537 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3542 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3543 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3545 [(match_operand:VHSDF 1 "register_operand" "w")
3546 (vec_duplicate:VHSDF
3547 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3550 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3551 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3554 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3555 ;; vmulxd_lane_f64 == vmulx_lane_f64
3556 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3558 (define_insn "*aarch64_vgetfmulx<mode>"
3559 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3561 [(match_operand:<VEL> 1 "register_operand" "w")
3563 (match_operand:VDQF 2 "register_operand" "w")
3564 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3568 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3569 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3571 [(set_attr "type" "fmul<Vetype>")]
3575 (define_insn "aarch64_<su_optab><optab><mode>"
3576 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3577 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3578 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3580 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3581 [(set_attr "type" "neon_<optab><q>")]
3584 ;; suqadd and usqadd
3586 (define_insn "aarch64_<sur>qadd<mode>"
3587 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3588 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3589 (match_operand:VSDQ_I 2 "register_operand" "w")]
3592 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3593 [(set_attr "type" "neon_qadd<q>")]
3598 (define_insn "aarch64_sqmovun<mode>"
3599 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3600 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3603 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3604 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3607 ;; sqmovn and uqmovn
3609 (define_insn "aarch64_<sur>qmovn<mode>"
3610 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3611 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3614 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3615 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3620 (define_insn "aarch64_s<optab><mode>"
3621 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3623 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3625 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3626 [(set_attr "type" "neon_<optab><q>")]
3631 (define_insn "aarch64_sq<r>dmulh<mode>"
3632 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3634 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3635 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3638 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3639 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3644 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3645 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3647 [(match_operand:VDQHS 1 "register_operand" "w")
3649 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3650 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3654 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3655 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3656 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3659 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3660 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3662 [(match_operand:VDQHS 1 "register_operand" "w")
3664 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3665 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3669 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3670 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3671 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3674 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3675 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3677 [(match_operand:SD_HSI 1 "register_operand" "w")
3679 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3680 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3684 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3685 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3686 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3689 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3690 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3692 [(match_operand:SD_HSI 1 "register_operand" "w")
3694 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3695 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3699 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3700 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3701 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3706 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3707 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3709 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3710 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3711 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3714 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3715 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3718 ;; sqrdml[as]h_lane.
3720 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3721 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3723 [(match_operand:VDQHS 1 "register_operand" "0")
3724 (match_operand:VDQHS 2 "register_operand" "w")
3726 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3727 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3731 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3733 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3735 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3738 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3739 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3741 [(match_operand:SD_HSI 1 "register_operand" "0")
3742 (match_operand:SD_HSI 2 "register_operand" "w")
3744 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3745 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3749 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3751 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3753 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3756 ;; sqrdml[as]h_laneq.
3758 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3759 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3761 [(match_operand:VDQHS 1 "register_operand" "0")
3762 (match_operand:VDQHS 2 "register_operand" "w")
3764 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3765 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3769 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3771 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3773 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3776 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3777 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3779 [(match_operand:SD_HSI 1 "register_operand" "0")
3780 (match_operand:SD_HSI 2 "register_operand" "w")
3782 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3783 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3787 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3789 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3791 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3796 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3797 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3799 (match_operand:<VWIDE> 1 "register_operand" "0")
3802 (sign_extend:<VWIDE>
3803 (match_operand:VSD_HSI 2 "register_operand" "w"))
3804 (sign_extend:<VWIDE>
3805 (match_operand:VSD_HSI 3 "register_operand" "w")))
3808 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3809 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3814 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3815 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3817 (match_operand:<VWIDE> 1 "register_operand" "0")
3820 (sign_extend:<VWIDE>
3821 (match_operand:VD_HSI 2 "register_operand" "w"))
3822 (sign_extend:<VWIDE>
3823 (vec_duplicate:VD_HSI
3825 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3826 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3831 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3833 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3835 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3838 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3839 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3841 (match_operand:<VWIDE> 1 "register_operand" "0")
3844 (sign_extend:<VWIDE>
3845 (match_operand:VD_HSI 2 "register_operand" "w"))
3846 (sign_extend:<VWIDE>
3847 (vec_duplicate:VD_HSI
3849 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3850 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3855 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3857 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3859 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3862 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3863 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3865 (match_operand:<VWIDE> 1 "register_operand" "0")
3868 (sign_extend:<VWIDE>
3869 (match_operand:SD_HSI 2 "register_operand" "w"))
3870 (sign_extend:<VWIDE>
3872 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3873 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3878 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3880 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3882 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3885 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3886 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3888 (match_operand:<VWIDE> 1 "register_operand" "0")
3891 (sign_extend:<VWIDE>
3892 (match_operand:SD_HSI 2 "register_operand" "w"))
3893 (sign_extend:<VWIDE>
3895 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3896 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3901 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3903 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3905 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3910 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3911 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3913 (match_operand:<VWIDE> 1 "register_operand" "0")
3916 (sign_extend:<VWIDE>
3917 (match_operand:VD_HSI 2 "register_operand" "w"))
3918 (sign_extend:<VWIDE>
3919 (vec_duplicate:VD_HSI
3920 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3923 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3924 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3929 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3930 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3932 (match_operand:<VWIDE> 1 "register_operand" "0")
3935 (sign_extend:<VWIDE>
3937 (match_operand:VQ_HSI 2 "register_operand" "w")
3938 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3939 (sign_extend:<VWIDE>
3941 (match_operand:VQ_HSI 3 "register_operand" "w")
3945 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3946 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3949 (define_expand "aarch64_sqdmlal2<mode>"
3950 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3951 (match_operand:<VWIDE> 1 "register_operand" "w")
3952 (match_operand:VQ_HSI 2 "register_operand" "w")
3953 (match_operand:VQ_HSI 3 "register_operand" "w")]
3956 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3957 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3958 operands[2], operands[3], p));
3962 (define_expand "aarch64_sqdmlsl2<mode>"
3963 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3964 (match_operand:<VWIDE> 1 "register_operand" "w")
3965 (match_operand:VQ_HSI 2 "register_operand" "w")
3966 (match_operand:VQ_HSI 3 "register_operand" "w")]
3969 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3970 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3971 operands[2], operands[3], p));
3977 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3978 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3980 (match_operand:<VWIDE> 1 "register_operand" "0")
3983 (sign_extend:<VWIDE>
3985 (match_operand:VQ_HSI 2 "register_operand" "w")
3986 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3987 (sign_extend:<VWIDE>
3988 (vec_duplicate:<VHALF>
3990 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3991 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3996 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3998 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4000 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4003 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
4004 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4006 (match_operand:<VWIDE> 1 "register_operand" "0")
4009 (sign_extend:<VWIDE>
4011 (match_operand:VQ_HSI 2 "register_operand" "w")
4012 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4013 (sign_extend:<VWIDE>
4014 (vec_duplicate:<VHALF>
4016 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4017 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4022 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4024 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4026 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4029 (define_expand "aarch64_sqdmlal2_lane<mode>"
4030 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4031 (match_operand:<VWIDE> 1 "register_operand" "w")
4032 (match_operand:VQ_HSI 2 "register_operand" "w")
4033 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4034 (match_operand:SI 4 "immediate_operand" "i")]
4037 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4038 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
4039 operands[2], operands[3],
4044 (define_expand "aarch64_sqdmlal2_laneq<mode>"
4045 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4046 (match_operand:<VWIDE> 1 "register_operand" "w")
4047 (match_operand:VQ_HSI 2 "register_operand" "w")
4048 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4049 (match_operand:SI 4 "immediate_operand" "i")]
4052 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4053 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
4054 operands[2], operands[3],
4059 (define_expand "aarch64_sqdmlsl2_lane<mode>"
4060 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4061 (match_operand:<VWIDE> 1 "register_operand" "w")
4062 (match_operand:VQ_HSI 2 "register_operand" "w")
4063 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4064 (match_operand:SI 4 "immediate_operand" "i")]
4067 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4068 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
4069 operands[2], operands[3],
4074 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
4075 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4076 (match_operand:<VWIDE> 1 "register_operand" "w")
4077 (match_operand:VQ_HSI 2 "register_operand" "w")
4078 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4079 (match_operand:SI 4 "immediate_operand" "i")]
4082 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4083 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4084 operands[2], operands[3],
4089 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4090 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4092 (match_operand:<VWIDE> 1 "register_operand" "0")
4095 (sign_extend:<VWIDE>
4097 (match_operand:VQ_HSI 2 "register_operand" "w")
4098 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4099 (sign_extend:<VWIDE>
4100 (vec_duplicate:<VHALF>
4101 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4104 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4105 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4108 (define_expand "aarch64_sqdmlal2_n<mode>"
4109 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4110 (match_operand:<VWIDE> 1 "register_operand" "w")
4111 (match_operand:VQ_HSI 2 "register_operand" "w")
4112 (match_operand:<VEL> 3 "register_operand" "w")]
4115 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4116 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4117 operands[2], operands[3],
4122 (define_expand "aarch64_sqdmlsl2_n<mode>"
4123 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4124 (match_operand:<VWIDE> 1 "register_operand" "w")
4125 (match_operand:VQ_HSI 2 "register_operand" "w")
4126 (match_operand:<VEL> 3 "register_operand" "w")]
4129 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4130 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4131 operands[2], operands[3],
4138 (define_insn "aarch64_sqdmull<mode>"
4139 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4142 (sign_extend:<VWIDE>
4143 (match_operand:VSD_HSI 1 "register_operand" "w"))
4144 (sign_extend:<VWIDE>
4145 (match_operand:VSD_HSI 2 "register_operand" "w")))
4148 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4149 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4154 (define_insn "aarch64_sqdmull_lane<mode>"
4155 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4158 (sign_extend:<VWIDE>
4159 (match_operand:VD_HSI 1 "register_operand" "w"))
4160 (sign_extend:<VWIDE>
4161 (vec_duplicate:VD_HSI
4163 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4164 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4169 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4170 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4172 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4175 (define_insn "aarch64_sqdmull_laneq<mode>"
4176 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4179 (sign_extend:<VWIDE>
4180 (match_operand:VD_HSI 1 "register_operand" "w"))
4181 (sign_extend:<VWIDE>
4182 (vec_duplicate:VD_HSI
4184 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4185 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4190 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4191 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4193 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4196 (define_insn "aarch64_sqdmull_lane<mode>"
4197 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4200 (sign_extend:<VWIDE>
4201 (match_operand:SD_HSI 1 "register_operand" "w"))
4202 (sign_extend:<VWIDE>
4204 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4205 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4210 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4211 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4213 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4216 (define_insn "aarch64_sqdmull_laneq<mode>"
4217 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4220 (sign_extend:<VWIDE>
4221 (match_operand:SD_HSI 1 "register_operand" "w"))
4222 (sign_extend:<VWIDE>
4224 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4225 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4230 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4231 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4233 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4238 (define_insn "aarch64_sqdmull_n<mode>"
4239 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4242 (sign_extend:<VWIDE>
4243 (match_operand:VD_HSI 1 "register_operand" "w"))
4244 (sign_extend:<VWIDE>
4245 (vec_duplicate:VD_HSI
4246 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4250 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4251 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4258 (define_insn "aarch64_sqdmull2<mode>_internal"
4259 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4262 (sign_extend:<VWIDE>
4264 (match_operand:VQ_HSI 1 "register_operand" "w")
4265 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4266 (sign_extend:<VWIDE>
4268 (match_operand:VQ_HSI 2 "register_operand" "w")
4273 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4274 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4277 (define_expand "aarch64_sqdmull2<mode>"
4278 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4279 (match_operand:VQ_HSI 1 "register_operand" "w")
4280 (match_operand:VQ_HSI 2 "register_operand" "w")]
4283 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4284 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4291 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4292 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4295 (sign_extend:<VWIDE>
4297 (match_operand:VQ_HSI 1 "register_operand" "w")
4298 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4299 (sign_extend:<VWIDE>
4300 (vec_duplicate:<VHALF>
4302 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4303 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4308 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4309 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4311 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4314 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4315 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4318 (sign_extend:<VWIDE>
4320 (match_operand:VQ_HSI 1 "register_operand" "w")
4321 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4322 (sign_extend:<VWIDE>
4323 (vec_duplicate:<VHALF>
4325 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4326 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4331 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4332 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4334 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4337 (define_expand "aarch64_sqdmull2_lane<mode>"
4338 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4339 (match_operand:VQ_HSI 1 "register_operand" "w")
4340 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4341 (match_operand:SI 3 "immediate_operand" "i")]
4344 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4345 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4346 operands[2], operands[3],
4351 (define_expand "aarch64_sqdmull2_laneq<mode>"
4352 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4353 (match_operand:VQ_HSI 1 "register_operand" "w")
4354 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4355 (match_operand:SI 3 "immediate_operand" "i")]
4358 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4359 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4360 operands[2], operands[3],
4367 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4368 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4371 (sign_extend:<VWIDE>
4373 (match_operand:VQ_HSI 1 "register_operand" "w")
4374 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4375 (sign_extend:<VWIDE>
4376 (vec_duplicate:<VHALF>
4377 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4381 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4382 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4385 (define_expand "aarch64_sqdmull2_n<mode>"
4386 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4387 (match_operand:VQ_HSI 1 "register_operand" "w")
4388 (match_operand:<VEL> 2 "register_operand" "w")]
4391 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4392 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4399 (define_insn "aarch64_<sur>shl<mode>"
4400 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4402 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4403 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4406 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4407 [(set_attr "type" "neon_shift_reg<q>")]
4413 (define_insn "aarch64_<sur>q<r>shl<mode>"
4414 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4416 [(match_operand:VSDQ_I 1 "register_operand" "w")
4417 (match_operand:VSDQ_I 2 "register_operand" "w")]
4420 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4421 [(set_attr "type" "neon_sat_shift_reg<q>")]
4426 (define_insn "aarch64_<sur>shll_n<mode>"
4427 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4428 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4430 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4434 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4435 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4437 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4439 [(set_attr "type" "neon_shift_imm_long")]
4444 (define_insn "aarch64_<sur>shll2_n<mode>"
4445 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4446 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4447 (match_operand:SI 2 "immediate_operand" "i")]
4451 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4452 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4454 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4456 [(set_attr "type" "neon_shift_imm_long")]
4461 (define_insn "aarch64_<sur>shr_n<mode>"
4462 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4463 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4465 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4468 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4469 [(set_attr "type" "neon_sat_shift_imm<q>")]
4474 (define_insn "aarch64_<sur>sra_n<mode>"
4475 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4476 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4477 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4479 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4482 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4483 [(set_attr "type" "neon_shift_acc<q>")]
4488 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4489 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4490 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4491 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4493 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4496 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4497 [(set_attr "type" "neon_shift_imm<q>")]
4502 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4503 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4504 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4506 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4509 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4510 [(set_attr "type" "neon_sat_shift_imm<q>")]
4516 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4517 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4518 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4520 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4523 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4524 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4528 ;; cm(eq|ge|gt|lt|le)
4529 ;; Note, we have constraints for Dz and Z as different expanders
4530 ;; have different ideas of what should be passed to this pattern.
4532 (define_insn "aarch64_cm<optab><mode>"
4533 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4535 (COMPARISONS:<V_INT_EQUIV>
4536 (match_operand:VDQ_I 1 "register_operand" "w,w")
4537 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4541 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4542 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4543 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4546 (define_insn_and_split "aarch64_cm<optab>di"
4547 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4550 (match_operand:DI 1 "register_operand" "w,w,r")
4551 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4553 (clobber (reg:CC CC_REGNUM))]
4556 "&& reload_completed"
4557 [(set (match_operand:DI 0 "register_operand")
4560 (match_operand:DI 1 "register_operand")
4561 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4564 /* If we are in the general purpose register file,
4565 we split to a sequence of comparison and store. */
4566 if (GP_REGNUM_P (REGNO (operands[0]))
4567 && GP_REGNUM_P (REGNO (operands[1])))
4569 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4570 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4571 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4572 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4575 /* Otherwise, we expand to a similar pattern which does not
4576 clobber CC_REGNUM. */
4578 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4581 (define_insn "*aarch64_cm<optab>di"
4582 [(set (match_operand:DI 0 "register_operand" "=w,w")
4585 (match_operand:DI 1 "register_operand" "w,w")
4586 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4588 "TARGET_SIMD && reload_completed"
4590 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4591 cm<optab>\t%d0, %d1, #0"
4592 [(set_attr "type" "neon_compare, neon_compare_zero")]
4597 (define_insn "aarch64_cm<optab><mode>"
4598 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4600 (UCOMPARISONS:<V_INT_EQUIV>
4601 (match_operand:VDQ_I 1 "register_operand" "w")
4602 (match_operand:VDQ_I 2 "register_operand" "w")
4605 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4606 [(set_attr "type" "neon_compare<q>")]
4609 (define_insn_and_split "aarch64_cm<optab>di"
4610 [(set (match_operand:DI 0 "register_operand" "=w,r")
4613 (match_operand:DI 1 "register_operand" "w,r")
4614 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4616 (clobber (reg:CC CC_REGNUM))]
4619 "&& reload_completed"
4620 [(set (match_operand:DI 0 "register_operand")
4623 (match_operand:DI 1 "register_operand")
4624 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4627 /* If we are in the general purpose register file,
4628 we split to a sequence of comparison and store. */
4629 if (GP_REGNUM_P (REGNO (operands[0]))
4630 && GP_REGNUM_P (REGNO (operands[1])))
4632 machine_mode mode = CCmode;
4633 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4634 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4635 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4638 /* Otherwise, we expand to a similar pattern which does not
4639 clobber CC_REGNUM. */
4641 [(set_attr "type" "neon_compare,multiple")]
4644 (define_insn "*aarch64_cm<optab>di"
4645 [(set (match_operand:DI 0 "register_operand" "=w")
4648 (match_operand:DI 1 "register_operand" "w")
4649 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4651 "TARGET_SIMD && reload_completed"
4652 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4653 [(set_attr "type" "neon_compare")]
4658 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4659 ;; we don't have any insns using ne, and aarch64_vcond outputs
4660 ;; not (neg (eq (and x y) 0))
4661 ;; which is rewritten by simplify_rtx as
4662 ;; plus (eq (and x y) 0) -1.
4664 (define_insn "aarch64_cmtst<mode>"
4665 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4669 (match_operand:VDQ_I 1 "register_operand" "w")
4670 (match_operand:VDQ_I 2 "register_operand" "w"))
4671 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4672 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4675 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4676 [(set_attr "type" "neon_tst<q>")]
4679 (define_insn_and_split "aarch64_cmtstdi"
4680 [(set (match_operand:DI 0 "register_operand" "=w,r")
4684 (match_operand:DI 1 "register_operand" "w,r")
4685 (match_operand:DI 2 "register_operand" "w,r"))
4687 (clobber (reg:CC CC_REGNUM))]
4690 "&& reload_completed"
4691 [(set (match_operand:DI 0 "register_operand")
4695 (match_operand:DI 1 "register_operand")
4696 (match_operand:DI 2 "register_operand"))
4699 /* If we are in the general purpose register file,
4700 we split to a sequence of comparison and store. */
4701 if (GP_REGNUM_P (REGNO (operands[0]))
4702 && GP_REGNUM_P (REGNO (operands[1])))
4704 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4705 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4706 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4707 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4708 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4711 /* Otherwise, we expand to a similar pattern which does not
4712 clobber CC_REGNUM. */
4714 [(set_attr "type" "neon_tst,multiple")]
4717 (define_insn "*aarch64_cmtstdi"
4718 [(set (match_operand:DI 0 "register_operand" "=w")
4722 (match_operand:DI 1 "register_operand" "w")
4723 (match_operand:DI 2 "register_operand" "w"))
4726 "cmtst\t%d0, %d1, %d2"
4727 [(set_attr "type" "neon_tst")]
4730 ;; fcm(eq|ge|gt|le|lt)
4732 (define_insn "aarch64_cm<optab><mode>"
4733 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4735 (COMPARISONS:<V_INT_EQUIV>
4736 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4737 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4741 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4742 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4743 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4747 ;; Note we can also handle what would be fac(le|lt) by
4748 ;; generating fac(ge|gt).
4750 (define_insn "aarch64_fac<optab><mode>"
4751 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4753 (FAC_COMPARISONS:<V_INT_EQUIV>
4755 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4757 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4760 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4761 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4766 (define_insn "aarch64_addp<mode>"
4767 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4769 [(match_operand:VD_BHSI 1 "register_operand" "w")
4770 (match_operand:VD_BHSI 2 "register_operand" "w")]
4773 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4774 [(set_attr "type" "neon_reduc_add<q>")]
4777 (define_insn "aarch64_addpdi"
4778 [(set (match_operand:DI 0 "register_operand" "=w")
4780 [(match_operand:V2DI 1 "register_operand" "w")]
4784 [(set_attr "type" "neon_reduc_add")]
4789 (define_expand "sqrt<mode>2"
4790 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4791 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4794 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4798 (define_insn "*sqrt<mode>2"
4799 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4800 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4802 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4803 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4806 ;; Patterns for vector struct loads and stores.
4808 (define_insn "aarch64_simd_ld2<mode>"
4809 [(set (match_operand:OI 0 "register_operand" "=w")
4810 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4811 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4814 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4815 [(set_attr "type" "neon_load2_2reg<q>")]
4818 (define_insn "aarch64_simd_ld2r<mode>"
4819 [(set (match_operand:OI 0 "register_operand" "=w")
4820 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4821 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4824 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4825 [(set_attr "type" "neon_load2_all_lanes<q>")]
4828 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4829 [(set (match_operand:OI 0 "register_operand" "=w")
4830 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4831 (match_operand:OI 2 "register_operand" "0")
4832 (match_operand:SI 3 "immediate_operand" "i")
4833 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4837 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4838 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4840 [(set_attr "type" "neon_load2_one_lane")]
4843 (define_expand "vec_load_lanesoi<mode>"
4844 [(set (match_operand:OI 0 "register_operand" "=w")
4845 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4846 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4850 if (BYTES_BIG_ENDIAN)
4852 rtx tmp = gen_reg_rtx (OImode);
4853 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4854 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4855 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4858 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4862 (define_insn "aarch64_simd_st2<mode>"
4863 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4864 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4865 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4868 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4869 [(set_attr "type" "neon_store2_2reg<q>")]
4872 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4873 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4874 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4875 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4876 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4877 (match_operand:SI 2 "immediate_operand" "i")]
4881 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4882 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4884 [(set_attr "type" "neon_store2_one_lane<q>")]
4887 (define_expand "vec_store_lanesoi<mode>"
4888 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4889 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4890 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4894 if (BYTES_BIG_ENDIAN)
4896 rtx tmp = gen_reg_rtx (OImode);
4897 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4898 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4899 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4902 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4906 (define_insn "aarch64_simd_ld3<mode>"
4907 [(set (match_operand:CI 0 "register_operand" "=w")
4908 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4909 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4912 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4913 [(set_attr "type" "neon_load3_3reg<q>")]
4916 (define_insn "aarch64_simd_ld3r<mode>"
4917 [(set (match_operand:CI 0 "register_operand" "=w")
4918 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4919 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4922 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4923 [(set_attr "type" "neon_load3_all_lanes<q>")]
4926 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4927 [(set (match_operand:CI 0 "register_operand" "=w")
4928 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4929 (match_operand:CI 2 "register_operand" "0")
4930 (match_operand:SI 3 "immediate_operand" "i")
4931 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4935 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4936 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4938 [(set_attr "type" "neon_load3_one_lane")]
4941 (define_expand "vec_load_lanesci<mode>"
4942 [(set (match_operand:CI 0 "register_operand" "=w")
4943 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4944 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4948 if (BYTES_BIG_ENDIAN)
4950 rtx tmp = gen_reg_rtx (CImode);
4951 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4952 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4953 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4956 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4960 (define_insn "aarch64_simd_st3<mode>"
4961 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4962 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4963 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4966 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4967 [(set_attr "type" "neon_store3_3reg<q>")]
4970 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4971 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4972 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4973 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4974 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4975 (match_operand:SI 2 "immediate_operand" "i")]
4979 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4980 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4982 [(set_attr "type" "neon_store3_one_lane<q>")]
4985 (define_expand "vec_store_lanesci<mode>"
4986 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4987 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4988 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4992 if (BYTES_BIG_ENDIAN)
4994 rtx tmp = gen_reg_rtx (CImode);
4995 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4996 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4997 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
5000 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
5004 (define_insn "aarch64_simd_ld4<mode>"
5005 [(set (match_operand:XI 0 "register_operand" "=w")
5006 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5007 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5010 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5011 [(set_attr "type" "neon_load4_4reg<q>")]
5014 (define_insn "aarch64_simd_ld4r<mode>"
5015 [(set (match_operand:XI 0 "register_operand" "=w")
5016 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5017 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5020 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5021 [(set_attr "type" "neon_load4_all_lanes<q>")]
5024 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
5025 [(set (match_operand:XI 0 "register_operand" "=w")
5026 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5027 (match_operand:XI 2 "register_operand" "0")
5028 (match_operand:SI 3 "immediate_operand" "i")
5029 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5033 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5034 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
5036 [(set_attr "type" "neon_load4_one_lane")]
5039 (define_expand "vec_load_lanesxi<mode>"
5040 [(set (match_operand:XI 0 "register_operand" "=w")
5041 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5042 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5046 if (BYTES_BIG_ENDIAN)
5048 rtx tmp = gen_reg_rtx (XImode);
5049 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5050 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
5051 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
5054 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
5058 (define_insn "aarch64_simd_st4<mode>"
5059 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5060 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5061 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5064 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5065 [(set_attr "type" "neon_store4_4reg<q>")]
5068 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5069 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
5070 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5071 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5072 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5073 (match_operand:SI 2 "immediate_operand" "i")]
5077 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5078 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5080 [(set_attr "type" "neon_store4_one_lane<q>")]
5083 (define_expand "vec_store_lanesxi<mode>"
5084 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5085 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5086 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5090 if (BYTES_BIG_ENDIAN)
5092 rtx tmp = gen_reg_rtx (XImode);
5093 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5094 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5095 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5098 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5102 (define_insn_and_split "aarch64_rev_reglist<mode>"
5103 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5105 [(match_operand:VSTRUCT 1 "register_operand" "w")
5106 (match_operand:V16QI 2 "register_operand" "w")]
5107 UNSPEC_REV_REGLIST))]
5110 "&& reload_completed"
5114 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5115 for (i = 0; i < nregs; i++)
5117 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5118 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5119 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5123 [(set_attr "type" "neon_tbl1_q")
5124 (set_attr "length" "<insn_count>")]
5127 ;; Reload patterns for AdvSIMD register list operands.
5129 (define_expand "mov<mode>"
5130 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
5131 (match_operand:VSTRUCT 1 "general_operand" ""))]
5134 if (can_create_pseudo_p ())
5136 if (GET_CODE (operands[0]) != REG)
5137 operands[1] = force_reg (<MODE>mode, operands[1]);
5142 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5143 [(match_operand:CI 0 "register_operand" "=w")
5144 (match_operand:DI 1 "register_operand" "r")
5145 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5148 rtx mem = gen_rtx_MEM (CImode, operands[1]);
5149 emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5153 (define_insn "aarch64_ld1_x3_<mode>"
5154 [(set (match_operand:CI 0 "register_operand" "=w")
5156 [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5157 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5159 "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5160 [(set_attr "type" "neon_load1_3reg<q>")]
5163 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5164 [(match_operand:DI 0 "register_operand" "")
5165 (match_operand:OI 1 "register_operand" "")
5166 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5169 rtx mem = gen_rtx_MEM (OImode, operands[0]);
5170 emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5174 (define_insn "aarch64_st1_x2_<mode>"
5175 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5177 [(match_operand:OI 1 "register_operand" "w")
5178 (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5180 "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5181 [(set_attr "type" "neon_store1_2reg<q>")]
5184 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5185 [(match_operand:DI 0 "register_operand" "")
5186 (match_operand:CI 1 "register_operand" "")
5187 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5190 rtx mem = gen_rtx_MEM (CImode, operands[0]);
5191 emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5195 (define_insn "aarch64_st1_x3_<mode>"
5196 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5198 [(match_operand:CI 1 "register_operand" "w")
5199 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5201 "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5202 [(set_attr "type" "neon_store1_3reg<q>")]
5205 (define_insn "*aarch64_mov<mode>"
5206 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5207 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5208 "TARGET_SIMD && !BYTES_BIG_ENDIAN
5209 && (register_operand (operands[0], <MODE>mode)
5210 || register_operand (operands[1], <MODE>mode))"
5213 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5214 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5215 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5216 neon_load<nregs>_<nregs>reg_q")
5217 (set_attr "length" "<insn_count>,4,4")]
5220 (define_insn "aarch64_be_ld1<mode>"
5221 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
5222 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5223 "aarch64_simd_struct_operand" "Utv")]
5226 "ld1\\t{%0<Vmtype>}, %1"
5227 [(set_attr "type" "neon_load1_1reg<q>")]
5230 (define_insn "aarch64_be_st1<mode>"
5231 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5232 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5235 "st1\\t{%1<Vmtype>}, %0"
5236 [(set_attr "type" "neon_store1_1reg<q>")]
5239 (define_insn "*aarch64_be_movoi"
5240 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5241 (match_operand:OI 1 "general_operand" " w,w,m"))]
5242 "TARGET_SIMD && BYTES_BIG_ENDIAN
5243 && (register_operand (operands[0], OImode)
5244 || register_operand (operands[1], OImode))"
5249 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5250 (set_attr "length" "8,4,4")]
5253 (define_insn "*aarch64_be_movci"
5254 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5255 (match_operand:CI 1 "general_operand" " w,w,o"))]
5256 "TARGET_SIMD && BYTES_BIG_ENDIAN
5257 && (register_operand (operands[0], CImode)
5258 || register_operand (operands[1], CImode))"
5260 [(set_attr "type" "multiple")
5261 (set_attr "length" "12,4,4")]
5264 (define_insn "*aarch64_be_movxi"
5265 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5266 (match_operand:XI 1 "general_operand" " w,w,o"))]
5267 "TARGET_SIMD && BYTES_BIG_ENDIAN
5268 && (register_operand (operands[0], XImode)
5269 || register_operand (operands[1], XImode))"
5271 [(set_attr "type" "multiple")
5272 (set_attr "length" "16,4,4")]
5276 [(set (match_operand:OI 0 "register_operand")
5277 (match_operand:OI 1 "register_operand"))]
5278 "TARGET_SIMD && reload_completed"
5281 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5286 [(set (match_operand:CI 0 "nonimmediate_operand")
5287 (match_operand:CI 1 "general_operand"))]
5288 "TARGET_SIMD && reload_completed"
5291 if (register_operand (operands[0], CImode)
5292 && register_operand (operands[1], CImode))
5294 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5297 else if (BYTES_BIG_ENDIAN)
5299 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5300 simplify_gen_subreg (OImode, operands[1], CImode, 0));
5301 emit_move_insn (gen_lowpart (V16QImode,
5302 simplify_gen_subreg (TImode, operands[0],
5304 gen_lowpart (V16QImode,
5305 simplify_gen_subreg (TImode, operands[1],
5314 [(set (match_operand:XI 0 "nonimmediate_operand")
5315 (match_operand:XI 1 "general_operand"))]
5316 "TARGET_SIMD && reload_completed"
5319 if (register_operand (operands[0], XImode)
5320 && register_operand (operands[1], XImode))
5322 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5325 else if (BYTES_BIG_ENDIAN)
5327 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5328 simplify_gen_subreg (OImode, operands[1], XImode, 0));
5329 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5330 simplify_gen_subreg (OImode, operands[1], XImode, 32));
5337 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5338 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5339 (match_operand:DI 1 "register_operand" "w")
5340 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5343 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5344 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5347 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5352 (define_insn "aarch64_ld2<mode>_dreg"
5353 [(set (match_operand:OI 0 "register_operand" "=w")
5354 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5355 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5358 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5359 [(set_attr "type" "neon_load2_2reg<q>")]
5362 (define_insn "aarch64_ld2<mode>_dreg"
5363 [(set (match_operand:OI 0 "register_operand" "=w")
5364 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5365 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5368 "ld1\\t{%S0.1d - %T0.1d}, %1"
5369 [(set_attr "type" "neon_load1_2reg<q>")]
5372 (define_insn "aarch64_ld3<mode>_dreg"
5373 [(set (match_operand:CI 0 "register_operand" "=w")
5374 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5375 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5378 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5379 [(set_attr "type" "neon_load3_3reg<q>")]
5382 (define_insn "aarch64_ld3<mode>_dreg"
5383 [(set (match_operand:CI 0 "register_operand" "=w")
5384 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5385 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5388 "ld1\\t{%S0.1d - %U0.1d}, %1"
5389 [(set_attr "type" "neon_load1_3reg<q>")]
5392 (define_insn "aarch64_ld4<mode>_dreg"
5393 [(set (match_operand:XI 0 "register_operand" "=w")
5394 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5395 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5398 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5399 [(set_attr "type" "neon_load4_4reg<q>")]
5402 (define_insn "aarch64_ld4<mode>_dreg"
5403 [(set (match_operand:XI 0 "register_operand" "=w")
5404 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5405 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5408 "ld1\\t{%S0.1d - %V0.1d}, %1"
5409 [(set_attr "type" "neon_load1_4reg<q>")]
5412 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5413 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5414 (match_operand:DI 1 "register_operand" "r")
5415 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5418 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5419 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5421 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5425 (define_expand "aarch64_ld1<VALL_F16:mode>"
5426 [(match_operand:VALL_F16 0 "register_operand")
5427 (match_operand:DI 1 "register_operand")]
5430 machine_mode mode = <VALL_F16:MODE>mode;
5431 rtx mem = gen_rtx_MEM (mode, operands[1]);
5433 if (BYTES_BIG_ENDIAN)
5434 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5436 emit_move_insn (operands[0], mem);
5440 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5441 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5442 (match_operand:DI 1 "register_operand" "r")
5443 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5446 machine_mode mode = <VSTRUCT:MODE>mode;
5447 rtx mem = gen_rtx_MEM (mode, operands[1]);
5449 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5453 (define_expand "aarch64_ld1x2<VQ:mode>"
5454 [(match_operand:OI 0 "register_operand" "=w")
5455 (match_operand:DI 1 "register_operand" "r")
5456 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5459 machine_mode mode = OImode;
5460 rtx mem = gen_rtx_MEM (mode, operands[1]);
5462 emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5466 (define_expand "aarch64_ld1x2<VDC:mode>"
5467 [(match_operand:OI 0 "register_operand" "=w")
5468 (match_operand:DI 1 "register_operand" "r")
5469 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5472 machine_mode mode = OImode;
5473 rtx mem = gen_rtx_MEM (mode, operands[1]);
5475 emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5480 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5481 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5482 (match_operand:DI 1 "register_operand" "w")
5483 (match_operand:VSTRUCT 2 "register_operand" "0")
5484 (match_operand:SI 3 "immediate_operand" "i")
5485 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5488 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5489 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5492 aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5493 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5494 operands[0], mem, operands[2], operands[3]));
5498 ;; Expanders for builtins to extract vector registers from large
5499 ;; opaque integer modes.
5503 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5504 [(match_operand:VDC 0 "register_operand" "=w")
5505 (match_operand:VSTRUCT 1 "register_operand" "w")
5506 (match_operand:SI 2 "immediate_operand" "i")]
5509 int part = INTVAL (operands[2]);
5510 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5511 int offset = part * 16;
5513 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5514 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5520 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5521 [(match_operand:VQ 0 "register_operand" "=w")
5522 (match_operand:VSTRUCT 1 "register_operand" "w")
5523 (match_operand:SI 2 "immediate_operand" "i")]
5526 int part = INTVAL (operands[2]);
5527 int offset = part * 16;
5529 emit_move_insn (operands[0],
5530 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5534 ;; Permuted-store expanders for neon intrinsics.
5536 ;; Permute instructions
5540 (define_expand "vec_perm<mode>"
5541 [(match_operand:VB 0 "register_operand")
5542 (match_operand:VB 1 "register_operand")
5543 (match_operand:VB 2 "register_operand")
5544 (match_operand:VB 3 "register_operand")]
5547 aarch64_expand_vec_perm (operands[0], operands[1],
5548 operands[2], operands[3], <nunits>);
5552 (define_insn "aarch64_tbl1<mode>"
5553 [(set (match_operand:VB 0 "register_operand" "=w")
5554 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5555 (match_operand:VB 2 "register_operand" "w")]
5558 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5559 [(set_attr "type" "neon_tbl1<q>")]
5562 ;; Two source registers.
5564 (define_insn "aarch64_tbl2v16qi"
5565 [(set (match_operand:V16QI 0 "register_operand" "=w")
5566 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5567 (match_operand:V16QI 2 "register_operand" "w")]
5570 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5571 [(set_attr "type" "neon_tbl2_q")]
5574 (define_insn "aarch64_tbl3<mode>"
5575 [(set (match_operand:VB 0 "register_operand" "=w")
5576 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5577 (match_operand:VB 2 "register_operand" "w")]
5580 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5581 [(set_attr "type" "neon_tbl3")]
5584 (define_insn "aarch64_tbx4<mode>"
5585 [(set (match_operand:VB 0 "register_operand" "=w")
5586 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5587 (match_operand:OI 2 "register_operand" "w")
5588 (match_operand:VB 3 "register_operand" "w")]
5591 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5592 [(set_attr "type" "neon_tbl4")]
5595 ;; Three source registers.
5597 (define_insn "aarch64_qtbl3<mode>"
5598 [(set (match_operand:VB 0 "register_operand" "=w")
5599 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5600 (match_operand:VB 2 "register_operand" "w")]
5603 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5604 [(set_attr "type" "neon_tbl3")]
5607 (define_insn "aarch64_qtbx3<mode>"
5608 [(set (match_operand:VB 0 "register_operand" "=w")
5609 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5610 (match_operand:CI 2 "register_operand" "w")
5611 (match_operand:VB 3 "register_operand" "w")]
5614 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5615 [(set_attr "type" "neon_tbl3")]
5618 ;; Four source registers.
5620 (define_insn "aarch64_qtbl4<mode>"
5621 [(set (match_operand:VB 0 "register_operand" "=w")
5622 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5623 (match_operand:VB 2 "register_operand" "w")]
5626 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5627 [(set_attr "type" "neon_tbl4")]
5630 (define_insn "aarch64_qtbx4<mode>"
5631 [(set (match_operand:VB 0 "register_operand" "=w")
5632 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5633 (match_operand:XI 2 "register_operand" "w")
5634 (match_operand:VB 3 "register_operand" "w")]
5637 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5638 [(set_attr "type" "neon_tbl4")]
5641 (define_insn_and_split "aarch64_combinev16qi"
5642 [(set (match_operand:OI 0 "register_operand" "=w")
5643 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5644 (match_operand:V16QI 2 "register_operand" "w")]
5648 "&& reload_completed"
5651 aarch64_split_combinev16qi (operands);
5654 [(set_attr "type" "multiple")]
5657 ;; This instruction's pattern is generated directly by
5658 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5659 ;; need corresponding changes there.
5660 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5661 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5662 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5663 (match_operand:VALL_F16 2 "register_operand" "w")]
5666 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5667 [(set_attr "type" "neon_permute<q>")]
5670 ;; This instruction's pattern is generated directly by
5671 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5672 ;; need corresponding changes there. Note that the immediate (third)
5673 ;; operand is a lane index not a byte index.
5674 (define_insn "aarch64_ext<mode>"
5675 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5676 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5677 (match_operand:VALL_F16 2 "register_operand" "w")
5678 (match_operand:SI 3 "immediate_operand" "i")]
5682 operands[3] = GEN_INT (INTVAL (operands[3])
5683 * GET_MODE_UNIT_SIZE (<MODE>mode));
5684 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5686 [(set_attr "type" "neon_ext<q>")]
5689 ;; This instruction's pattern is generated directly by
5690 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5691 ;; need corresponding changes there.
5692 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5693 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5694 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5697 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5698 [(set_attr "type" "neon_rev<q>")]
5701 (define_insn "aarch64_st2<mode>_dreg"
5702 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5703 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5704 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5707 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5708 [(set_attr "type" "neon_store2_2reg")]
5711 (define_insn "aarch64_st2<mode>_dreg"
5712 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5713 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5714 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5717 "st1\\t{%S1.1d - %T1.1d}, %0"
5718 [(set_attr "type" "neon_store1_2reg")]
5721 (define_insn "aarch64_st3<mode>_dreg"
5722 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5723 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5724 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5727 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5728 [(set_attr "type" "neon_store3_3reg")]
5731 (define_insn "aarch64_st3<mode>_dreg"
5732 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5733 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5734 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5737 "st1\\t{%S1.1d - %U1.1d}, %0"
5738 [(set_attr "type" "neon_store1_3reg")]
5741 (define_insn "aarch64_st4<mode>_dreg"
5742 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5743 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5744 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5747 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5748 [(set_attr "type" "neon_store4_4reg")]
5751 (define_insn "aarch64_st4<mode>_dreg"
5752 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5753 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5754 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5757 "st1\\t{%S1.1d - %V1.1d}, %0"
5758 [(set_attr "type" "neon_store1_4reg")]
5761 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5762 [(match_operand:DI 0 "register_operand" "r")
5763 (match_operand:VSTRUCT 1 "register_operand" "w")
5764 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5767 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5768 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5770 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5774 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5775 [(match_operand:DI 0 "register_operand" "r")
5776 (match_operand:VSTRUCT 1 "register_operand" "w")
5777 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5780 machine_mode mode = <VSTRUCT:MODE>mode;
5781 rtx mem = gen_rtx_MEM (mode, operands[0]);
5783 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5787 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5788 [(match_operand:DI 0 "register_operand" "r")
5789 (match_operand:VSTRUCT 1 "register_operand" "w")
5790 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5791 (match_operand:SI 2 "immediate_operand")]
5794 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5795 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5798 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5799 mem, operands[1], operands[2]));
5803 (define_expand "aarch64_st1<VALL_F16:mode>"
5804 [(match_operand:DI 0 "register_operand")
5805 (match_operand:VALL_F16 1 "register_operand")]
5808 machine_mode mode = <VALL_F16:MODE>mode;
5809 rtx mem = gen_rtx_MEM (mode, operands[0]);
5811 if (BYTES_BIG_ENDIAN)
5812 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5814 emit_move_insn (mem, operands[1]);
5818 ;; Expander for builtins to insert vector registers into large
5819 ;; opaque integer modes.
5821 ;; Q-register list. We don't need a D-reg inserter as we zero
5822 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5824 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5825 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5826 (match_operand:VSTRUCT 1 "register_operand" "0")
5827 (match_operand:VQ 2 "register_operand" "w")
5828 (match_operand:SI 3 "immediate_operand" "i")]
5831 int part = INTVAL (operands[3]);
5832 int offset = part * 16;
5834 emit_move_insn (operands[0], operands[1]);
5835 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5840 ;; Standard pattern name vec_init<mode><Vel>.
5842 (define_expand "vec_init<mode><Vel>"
5843 [(match_operand:VALL_F16 0 "register_operand" "")
5844 (match_operand 1 "" "")]
5847 aarch64_expand_vector_init (operands[0], operands[1]);
5851 (define_insn "*aarch64_simd_ld1r<mode>"
5852 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5853 (vec_duplicate:VALL_F16
5854 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5856 "ld1r\\t{%0.<Vtype>}, %1"
5857 [(set_attr "type" "neon_load1_all_lanes")]
5860 (define_insn "aarch64_simd_ld1<mode>_x2"
5861 [(set (match_operand:OI 0 "register_operand" "=w")
5862 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5863 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5866 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5867 [(set_attr "type" "neon_load1_2reg<q>")]
5870 (define_insn "aarch64_simd_ld1<mode>_x2"
5871 [(set (match_operand:OI 0 "register_operand" "=w")
5872 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5873 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5876 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5877 [(set_attr "type" "neon_load1_2reg<q>")]
5881 (define_insn "aarch64_frecpe<mode>"
5882 [(set (match_operand:VHSDF 0 "register_operand" "=w")
5883 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5886 "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5887 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5890 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5891 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5892 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5895 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5896 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5899 (define_insn "aarch64_frecps<mode>"
5900 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5902 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5903 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5906 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5907 [(set_attr "type" "neon_fp_recps_<stype><q>")]
5910 (define_insn "aarch64_urecpe<mode>"
5911 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5912 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5915 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5916 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5918 ;; Standard pattern name vec_extract<mode><Vel>.
5920 (define_expand "vec_extract<mode><Vel>"
5921 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5922 (match_operand:VALL_F16 1 "register_operand" "")
5923 (match_operand:SI 2 "immediate_operand" "")]
5927 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5933 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5934 [(set (match_operand:V16QI 0 "register_operand" "=w")
5935 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%0")
5936 (match_operand:V16QI 2 "register_operand" "w")]
5938 "TARGET_SIMD && TARGET_AES"
5939 "aes<aes_op>\\t%0.16b, %2.16b"
5940 [(set_attr "type" "crypto_aese")]
5943 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
5944 [(set (match_operand:V16QI 0 "register_operand" "=w")
5945 (unspec:V16QI [(xor:V16QI
5946 (match_operand:V16QI 1 "register_operand" "%0")
5947 (match_operand:V16QI 2 "register_operand" "w"))
5948 (match_operand:V16QI 3 "aarch64_simd_imm_zero" "")]
5950 "TARGET_SIMD && TARGET_AES"
5951 "aes<aes_op>\\t%0.16b, %2.16b"
5952 [(set_attr "type" "crypto_aese")]
5955 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
5956 [(set (match_operand:V16QI 0 "register_operand" "=w")
5957 (unspec:V16QI [(match_operand:V16QI 3 "aarch64_simd_imm_zero" "")
5958 (xor:V16QI (match_operand:V16QI 1 "register_operand" "%0")
5959 (match_operand:V16QI 2 "register_operand" "w"))]
5961 "TARGET_SIMD && TARGET_AES"
5962 "aes<aes_op>\\t%0.16b, %2.16b"
5963 [(set_attr "type" "crypto_aese")]
5966 ;; When AES/AESMC fusion is enabled we want the register allocation to
5970 ;; So prefer to tie operand 1 to operand 0 when fusing.
5972 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5973 [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5974 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5976 "TARGET_SIMD && TARGET_AES"
5977 "aes<aesmc_op>\\t%0.16b, %1.16b"
5978 [(set_attr "type" "crypto_aesmc")
5979 (set_attr_alternative "enabled"
5980 [(if_then_else (match_test
5981 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5982 (const_string "yes" )
5983 (const_string "no"))
5984 (const_string "yes")])]
5987 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
5988 ;; and enforce the register dependency without scheduling or register
5989 ;; allocation messing up the order or introducing moves inbetween.
5990 ;; Mash the two together during combine.
5992 (define_insn "*aarch64_crypto_aese_fused"
5993 [(set (match_operand:V16QI 0 "register_operand" "=&w")
5996 [(match_operand:V16QI 1 "register_operand" "0")
5997 (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE)
5999 "TARGET_SIMD && TARGET_AES
6000 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6001 "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
6002 [(set_attr "type" "crypto_aese")
6003 (set_attr "length" "8")]
6006 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
6007 ;; and enforce the register dependency without scheduling or register
6008 ;; allocation messing up the order or introducing moves inbetween.
6009 ;; Mash the two together during combine.
6011 (define_insn "*aarch64_crypto_aesd_fused"
6012 [(set (match_operand:V16QI 0 "register_operand" "=&w")
6015 [(match_operand:V16QI 1 "register_operand" "0")
6016 (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD)
6018 "TARGET_SIMD && TARGET_AES
6019 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6020 "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
6021 [(set_attr "type" "crypto_aese")
6022 (set_attr "length" "8")]
6027 (define_insn "aarch64_crypto_sha1hsi"
6028 [(set (match_operand:SI 0 "register_operand" "=w")
6029 (unspec:SI [(match_operand:SI 1
6030 "register_operand" "w")]
6032 "TARGET_SIMD && TARGET_SHA2"
6034 [(set_attr "type" "crypto_sha1_fast")]
6037 (define_insn "aarch64_crypto_sha1hv4si"
6038 [(set (match_operand:SI 0 "register_operand" "=w")
6039 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6040 (parallel [(const_int 0)]))]
6042 "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
6044 [(set_attr "type" "crypto_sha1_fast")]
6047 (define_insn "aarch64_be_crypto_sha1hv4si"
6048 [(set (match_operand:SI 0 "register_operand" "=w")
6049 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6050 (parallel [(const_int 3)]))]
6052 "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
6054 [(set_attr "type" "crypto_sha1_fast")]
6057 (define_insn "aarch64_crypto_sha1su1v4si"
6058 [(set (match_operand:V4SI 0 "register_operand" "=w")
6059 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6060 (match_operand:V4SI 2 "register_operand" "w")]
6062 "TARGET_SIMD && TARGET_SHA2"
6063 "sha1su1\\t%0.4s, %2.4s"
6064 [(set_attr "type" "crypto_sha1_fast")]
6067 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
6068 [(set (match_operand:V4SI 0 "register_operand" "=w")
6069 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6070 (match_operand:SI 2 "register_operand" "w")
6071 (match_operand:V4SI 3 "register_operand" "w")]
6073 "TARGET_SIMD && TARGET_SHA2"
6074 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
6075 [(set_attr "type" "crypto_sha1_slow")]
6078 (define_insn "aarch64_crypto_sha1su0v4si"
6079 [(set (match_operand:V4SI 0 "register_operand" "=w")
6080 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6081 (match_operand:V4SI 2 "register_operand" "w")
6082 (match_operand:V4SI 3 "register_operand" "w")]
6084 "TARGET_SIMD && TARGET_SHA2"
6085 "sha1su0\\t%0.4s, %2.4s, %3.4s"
6086 [(set_attr "type" "crypto_sha1_xor")]
6091 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6092 [(set (match_operand:V4SI 0 "register_operand" "=w")
6093 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6094 (match_operand:V4SI 2 "register_operand" "w")
6095 (match_operand:V4SI 3 "register_operand" "w")]
6097 "TARGET_SIMD && TARGET_SHA2"
6098 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
6099 [(set_attr "type" "crypto_sha256_slow")]
6102 (define_insn "aarch64_crypto_sha256su0v4si"
6103 [(set (match_operand:V4SI 0 "register_operand" "=w")
6104 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6105 (match_operand:V4SI 2 "register_operand" "w")]
6107 "TARGET_SIMD && TARGET_SHA2"
6108 "sha256su0\\t%0.4s, %2.4s"
6109 [(set_attr "type" "crypto_sha256_fast")]
6112 (define_insn "aarch64_crypto_sha256su1v4si"
6113 [(set (match_operand:V4SI 0 "register_operand" "=w")
6114 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6115 (match_operand:V4SI 2 "register_operand" "w")
6116 (match_operand:V4SI 3 "register_operand" "w")]
6118 "TARGET_SIMD && TARGET_SHA2"
6119 "sha256su1\\t%0.4s, %2.4s, %3.4s"
6120 [(set_attr "type" "crypto_sha256_slow")]
6125 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6126 [(set (match_operand:V2DI 0 "register_operand" "=w")
6127 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6128 (match_operand:V2DI 2 "register_operand" "w")
6129 (match_operand:V2DI 3 "register_operand" "w")]
6131 "TARGET_SIMD && TARGET_SHA3"
6132 "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6133 [(set_attr "type" "crypto_sha512")]
6136 (define_insn "aarch64_crypto_sha512su0qv2di"
6137 [(set (match_operand:V2DI 0 "register_operand" "=w")
6138 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6139 (match_operand:V2DI 2 "register_operand" "w")]
6141 "TARGET_SIMD && TARGET_SHA3"
6142 "sha512su0\\t%0.2d, %2.2d"
6143 [(set_attr "type" "crypto_sha512")]
6146 (define_insn "aarch64_crypto_sha512su1qv2di"
6147 [(set (match_operand:V2DI 0 "register_operand" "=w")
6148 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6149 (match_operand:V2DI 2 "register_operand" "w")
6150 (match_operand:V2DI 3 "register_operand" "w")]
6152 "TARGET_SIMD && TARGET_SHA3"
6153 "sha512su1\\t%0.2d, %2.2d, %3.2d"
6154 [(set_attr "type" "crypto_sha512")]
6159 (define_insn "eor3q<mode>4"
6160 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6163 (match_operand:VQ_I 2 "register_operand" "w")
6164 (match_operand:VQ_I 3 "register_operand" "w"))
6165 (match_operand:VQ_I 1 "register_operand" "w")))]
6166 "TARGET_SIMD && TARGET_SHA3"
6167 "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6168 [(set_attr "type" "crypto_sha3")]
6171 (define_insn "aarch64_rax1qv2di"
6172 [(set (match_operand:V2DI 0 "register_operand" "=w")
6175 (match_operand:V2DI 2 "register_operand" "w")
6177 (match_operand:V2DI 1 "register_operand" "w")))]
6178 "TARGET_SIMD && TARGET_SHA3"
6179 "rax1\\t%0.2d, %1.2d, %2.2d"
6180 [(set_attr "type" "crypto_sha3")]
6183 (define_insn "aarch64_xarqv2di"
6184 [(set (match_operand:V2DI 0 "register_operand" "=w")
6187 (match_operand:V2DI 1 "register_operand" "%w")
6188 (match_operand:V2DI 2 "register_operand" "w"))
6189 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6190 "TARGET_SIMD && TARGET_SHA3"
6191 "xar\\t%0.2d, %1.2d, %2.2d, %3"
6192 [(set_attr "type" "crypto_sha3")]
6195 (define_insn "bcaxq<mode>4"
6196 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6199 (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6200 (match_operand:VQ_I 2 "register_operand" "w"))
6201 (match_operand:VQ_I 1 "register_operand" "w")))]
6202 "TARGET_SIMD && TARGET_SHA3"
6203 "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6204 [(set_attr "type" "crypto_sha3")]
6209 (define_insn "aarch64_sm3ss1qv4si"
6210 [(set (match_operand:V4SI 0 "register_operand" "=w")
6211 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6212 (match_operand:V4SI 2 "register_operand" "w")
6213 (match_operand:V4SI 3 "register_operand" "w")]
6215 "TARGET_SIMD && TARGET_SM4"
6216 "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6217 [(set_attr "type" "crypto_sm3")]
6221 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6222 [(set (match_operand:V4SI 0 "register_operand" "=w")
6223 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6224 (match_operand:V4SI 2 "register_operand" "w")
6225 (match_operand:V4SI 3 "register_operand" "w")
6226 (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6228 "TARGET_SIMD && TARGET_SM4"
6229 "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6230 [(set_attr "type" "crypto_sm3")]
6233 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6234 [(set (match_operand:V4SI 0 "register_operand" "=w")
6235 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6236 (match_operand:V4SI 2 "register_operand" "w")
6237 (match_operand:V4SI 3 "register_operand" "w")]
6239 "TARGET_SIMD && TARGET_SM4"
6240 "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6241 [(set_attr "type" "crypto_sm3")]
6246 (define_insn "aarch64_sm4eqv4si"
6247 [(set (match_operand:V4SI 0 "register_operand" "=w")
6248 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6249 (match_operand:V4SI 2 "register_operand" "w")]
6251 "TARGET_SIMD && TARGET_SM4"
6252 "sm4e\\t%0.4s, %2.4s"
6253 [(set_attr "type" "crypto_sm4")]
6256 (define_insn "aarch64_sm4ekeyqv4si"
6257 [(set (match_operand:V4SI 0 "register_operand" "=w")
6258 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6259 (match_operand:V4SI 2 "register_operand" "w")]
6261 "TARGET_SIMD && TARGET_SM4"
6262 "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6263 [(set_attr "type" "crypto_sm4")]
6268 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6269 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6271 [(match_operand:VDQSF 1 "register_operand" "0")
6272 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6273 (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6277 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6278 <nunits> * 2, false);
6279 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6280 <nunits> * 2, false);
6282 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6291 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6292 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6294 [(match_operand:VDQSF 1 "register_operand" "0")
6295 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6296 (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6300 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6301 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6303 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6311 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6312 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6315 (vec_select:<VFMLA_SEL_W>
6316 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6317 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6319 (vec_select:<VFMLA_SEL_W>
6320 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6321 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6322 (match_operand:VDQSF 1 "register_operand" "0")))]
6324 "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6325 [(set_attr "type" "neon_fp_mul_s")]
6328 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6329 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6333 (vec_select:<VFMLA_SEL_W>
6334 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6335 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6337 (vec_select:<VFMLA_SEL_W>
6338 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6339 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6340 (match_operand:VDQSF 1 "register_operand" "0")))]
6342 "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6343 [(set_attr "type" "neon_fp_mul_s")]
6346 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6347 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6350 (vec_select:<VFMLA_SEL_W>
6351 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6352 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6354 (vec_select:<VFMLA_SEL_W>
6355 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6356 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6357 (match_operand:VDQSF 1 "register_operand" "0")))]
6359 "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6360 [(set_attr "type" "neon_fp_mul_s")]
6363 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6364 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6368 (vec_select:<VFMLA_SEL_W>
6369 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6370 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6372 (vec_select:<VFMLA_SEL_W>
6373 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6374 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6375 (match_operand:VDQSF 1 "register_operand" "0")))]
6377 "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6378 [(set_attr "type" "neon_fp_mul_s")]
6381 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6382 [(set (match_operand:V2SF 0 "register_operand" "")
6383 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6384 (match_operand:V4HF 2 "register_operand" "")
6385 (match_operand:V4HF 3 "register_operand" "")
6386 (match_operand:SI 4 "aarch64_imm2" "")]
6390 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6391 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6393 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6402 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6403 [(set (match_operand:V2SF 0 "register_operand" "")
6404 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6405 (match_operand:V4HF 2 "register_operand" "")
6406 (match_operand:V4HF 3 "register_operand" "")
6407 (match_operand:SI 4 "aarch64_imm2" "")]
6411 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6412 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6414 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6422 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6423 [(set (match_operand:V2SF 0 "register_operand" "=w")
6427 (match_operand:V4HF 2 "register_operand" "w")
6428 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6432 (match_operand:V4HF 3 "register_operand" "x")
6433 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6434 (match_operand:V2SF 1 "register_operand" "0")))]
6436 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6437 [(set_attr "type" "neon_fp_mul_s")]
6440 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6441 [(set (match_operand:V2SF 0 "register_operand" "=w")
6446 (match_operand:V4HF 2 "register_operand" "w")
6447 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6451 (match_operand:V4HF 3 "register_operand" "x")
6452 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6453 (match_operand:V2SF 1 "register_operand" "0")))]
6455 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6456 [(set_attr "type" "neon_fp_mul_s")]
6459 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6460 [(set (match_operand:V2SF 0 "register_operand" "=w")
6464 (match_operand:V4HF 2 "register_operand" "w")
6465 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6469 (match_operand:V4HF 3 "register_operand" "x")
6470 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6471 (match_operand:V2SF 1 "register_operand" "0")))]
6473 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6474 [(set_attr "type" "neon_fp_mul_s")]
6477 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6478 [(set (match_operand:V2SF 0 "register_operand" "=w")
6483 (match_operand:V4HF 2 "register_operand" "w")
6484 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6488 (match_operand:V4HF 3 "register_operand" "x")
6489 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6490 (match_operand:V2SF 1 "register_operand" "0")))]
6492 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6493 [(set_attr "type" "neon_fp_mul_s")]
6496 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6497 [(set (match_operand:V4SF 0 "register_operand" "")
6498 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6499 (match_operand:V8HF 2 "register_operand" "")
6500 (match_operand:V8HF 3 "register_operand" "")
6501 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6505 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6506 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6508 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6516 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6517 [(set (match_operand:V4SF 0 "register_operand" "")
6518 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6519 (match_operand:V8HF 2 "register_operand" "")
6520 (match_operand:V8HF 3 "register_operand" "")
6521 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6525 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6526 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6528 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6536 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6537 [(set (match_operand:V4SF 0 "register_operand" "=w")
6541 (match_operand:V8HF 2 "register_operand" "w")
6542 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6546 (match_operand:V8HF 3 "register_operand" "x")
6547 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6548 (match_operand:V4SF 1 "register_operand" "0")))]
6550 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6551 [(set_attr "type" "neon_fp_mul_s")]
6554 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6555 [(set (match_operand:V4SF 0 "register_operand" "=w")
6560 (match_operand:V8HF 2 "register_operand" "w")
6561 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6565 (match_operand:V8HF 3 "register_operand" "x")
6566 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6567 (match_operand:V4SF 1 "register_operand" "0")))]
6569 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6570 [(set_attr "type" "neon_fp_mul_s")]
6573 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6574 [(set (match_operand:V4SF 0 "register_operand" "=w")
6578 (match_operand:V8HF 2 "register_operand" "w")
6579 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6583 (match_operand:V8HF 3 "register_operand" "x")
6584 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6585 (match_operand:V4SF 1 "register_operand" "0")))]
6587 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6588 [(set_attr "type" "neon_fp_mul_s")]
6591 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6592 [(set (match_operand:V4SF 0 "register_operand" "=w")
6597 (match_operand:V8HF 2 "register_operand" "w")
6598 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6602 (match_operand:V8HF 3 "register_operand" "x")
6603 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6604 (match_operand:V4SF 1 "register_operand" "0")))]
6606 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6607 [(set_attr "type" "neon_fp_mul_s")]
6610 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6611 [(set (match_operand:V2SF 0 "register_operand" "")
6612 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6613 (match_operand:V4HF 2 "register_operand" "")
6614 (match_operand:V8HF 3 "register_operand" "")
6615 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6619 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6620 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6622 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6631 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6632 [(set (match_operand:V2SF 0 "register_operand" "")
6633 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6634 (match_operand:V4HF 2 "register_operand" "")
6635 (match_operand:V8HF 3 "register_operand" "")
6636 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6640 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6641 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6643 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6652 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6653 [(set (match_operand:V2SF 0 "register_operand" "=w")
6657 (match_operand:V4HF 2 "register_operand" "w")
6658 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6662 (match_operand:V8HF 3 "register_operand" "x")
6663 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6664 (match_operand:V2SF 1 "register_operand" "0")))]
6666 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6667 [(set_attr "type" "neon_fp_mul_s")]
6670 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6671 [(set (match_operand:V2SF 0 "register_operand" "=w")
6676 (match_operand:V4HF 2 "register_operand" "w")
6677 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6681 (match_operand:V8HF 3 "register_operand" "x")
6682 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6683 (match_operand:V2SF 1 "register_operand" "0")))]
6685 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6686 [(set_attr "type" "neon_fp_mul_s")]
6689 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6690 [(set (match_operand:V2SF 0 "register_operand" "=w")
6694 (match_operand:V4HF 2 "register_operand" "w")
6695 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6699 (match_operand:V8HF 3 "register_operand" "x")
6700 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6701 (match_operand:V2SF 1 "register_operand" "0")))]
6703 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6704 [(set_attr "type" "neon_fp_mul_s")]
6707 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6708 [(set (match_operand:V2SF 0 "register_operand" "=w")
6713 (match_operand:V4HF 2 "register_operand" "w")
6714 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6718 (match_operand:V8HF 3 "register_operand" "x")
6719 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6720 (match_operand:V2SF 1 "register_operand" "0")))]
6722 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6723 [(set_attr "type" "neon_fp_mul_s")]
6726 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6727 [(set (match_operand:V4SF 0 "register_operand" "")
6728 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6729 (match_operand:V8HF 2 "register_operand" "")
6730 (match_operand:V4HF 3 "register_operand" "")
6731 (match_operand:SI 4 "aarch64_imm2" "")]
6735 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6736 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6738 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6746 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6747 [(set (match_operand:V4SF 0 "register_operand" "")
6748 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6749 (match_operand:V8HF 2 "register_operand" "")
6750 (match_operand:V4HF 3 "register_operand" "")
6751 (match_operand:SI 4 "aarch64_imm2" "")]
6755 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6756 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6758 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6766 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6767 [(set (match_operand:V4SF 0 "register_operand" "=w")
6771 (match_operand:V8HF 2 "register_operand" "w")
6772 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6776 (match_operand:V4HF 3 "register_operand" "x")
6777 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6778 (match_operand:V4SF 1 "register_operand" "0")))]
6780 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6781 [(set_attr "type" "neon_fp_mul_s")]
6784 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6785 [(set (match_operand:V4SF 0 "register_operand" "=w")
6790 (match_operand:V8HF 2 "register_operand" "w")
6791 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6795 (match_operand:V4HF 3 "register_operand" "x")
6796 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6797 (match_operand:V4SF 1 "register_operand" "0")))]
6799 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6800 [(set_attr "type" "neon_fp_mul_s")]
6803 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6804 [(set (match_operand:V4SF 0 "register_operand" "=w")
6808 (match_operand:V8HF 2 "register_operand" "w")
6809 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6813 (match_operand:V4HF 3 "register_operand" "x")
6814 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6815 (match_operand:V4SF 1 "register_operand" "0")))]
6817 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6818 [(set_attr "type" "neon_fp_mul_s")]
6821 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6822 [(set (match_operand:V4SF 0 "register_operand" "=w")
6827 (match_operand:V8HF 2 "register_operand" "w")
6828 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6832 (match_operand:V4HF 3 "register_operand" "x")
6833 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6834 (match_operand:V4SF 1 "register_operand" "0")))]
6836 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6837 [(set_attr "type" "neon_fp_mul_s")]
6842 (define_insn "aarch64_crypto_pmulldi"
6843 [(set (match_operand:TI 0 "register_operand" "=w")
6844 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
6845 (match_operand:DI 2 "register_operand" "w")]
6847 "TARGET_SIMD && TARGET_AES"
6848 "pmull\\t%0.1q, %1.1d, %2.1d"
6849 [(set_attr "type" "crypto_pmull")]
6852 (define_insn "aarch64_crypto_pmullv2di"
6853 [(set (match_operand:TI 0 "register_operand" "=w")
6854 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
6855 (match_operand:V2DI 2 "register_operand" "w")]
6857 "TARGET_SIMD && TARGET_AES"
6858 "pmull2\\t%0.1q, %1.2d, %2.2d"
6859 [(set_attr "type" "crypto_pmull")]