1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2020 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
23 (match_operand:VALL_F16 1 "general_operand"))]
26 /* Force the operand into a register if it is not an
27 immediate whose use can be replaced with xzr.
28 If the mode is 16 bytes wide, then we will be doing
29 a stp in DI mode, so we check the validity of that.
30 If the mode is 8 bytes wide, then we will do doing a
31 normal str, so the check need not apply. */
32 if (GET_CODE (operands[0]) == MEM
33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34 && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
35 && aarch64_mem_pair_operand (operands[0], DImode))
36 || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
37 operands[1] = force_reg (<MODE>mode, operands[1]);
41 (define_expand "movmisalign<mode>"
42 [(set (match_operand:VALL 0 "nonimmediate_operand")
43 (match_operand:VALL 1 "general_operand"))]
46 /* This pattern is not permitted to fail during expansion: if both arguments
47 are non-registers (e.g. memory := constant, which can be created by the
48 auto-vectorizer), force operand 1 into a register. */
49 if (!register_operand (operands[0], <MODE>mode)
50 && !register_operand (operands[1], <MODE>mode))
51 operands[1] = force_reg (<MODE>mode, operands[1]);
54 (define_insn "aarch64_simd_dup<mode>"
55 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
57 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
60 dup\\t%0.<Vtype>, %1.<Vetype>[0]
61 dup\\t%0.<Vtype>, %<vw>1"
62 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
65 (define_insn "aarch64_simd_dup<mode>"
66 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
67 (vec_duplicate:VDQF_F16
68 (match_operand:<VEL> 1 "register_operand" "w")))]
70 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
71 [(set_attr "type" "neon_dup<q>")]
74 (define_insn "aarch64_dup_lane<mode>"
75 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
76 (vec_duplicate:VALL_F16
78 (match_operand:VALL_F16 1 "register_operand" "w")
79 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
83 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
84 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
86 [(set_attr "type" "neon_dup<q>")]
89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
90 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
91 (vec_duplicate:VALL_F16_NO_V2Q
93 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
98 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
99 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
101 [(set_attr "type" "neon_dup<q>")]
104 (define_insn "*aarch64_simd_mov<VD:mode>"
105 [(set (match_operand:VD 0 "nonimmediate_operand"
106 "=w, m, m, w, ?r, ?w, ?r, w")
107 (match_operand:VD 1 "general_operand"
108 "m, Dz, w, w, w, r, r, Dn"))]
110 && (register_operand (operands[0], <MODE>mode)
111 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
113 switch (which_alternative)
115 case 0: return "ldr\t%d0, %1";
116 case 1: return "str\txzr, %0";
117 case 2: return "str\t%d1, %0";
118 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
119 case 4: return "umov\t%0, %1.d[0]";
120 case 5: return "fmov\t%d0, %1";
121 case 6: return "mov\t%0, %1";
123 return aarch64_output_simd_mov_immediate (operands[1], 64);
124 default: gcc_unreachable ();
127 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
128 neon_logic<q>, neon_to_gp<q>, f_mcr,\
129 mov_reg, neon_move<q>")]
132 (define_insn "*aarch64_simd_mov<VQ:mode>"
133 [(set (match_operand:VQ 0 "nonimmediate_operand"
134 "=w, Umn, m, w, ?r, ?w, ?r, w")
135 (match_operand:VQ 1 "general_operand"
136 "m, Dz, w, w, w, r, r, Dn"))]
138 && (register_operand (operands[0], <MODE>mode)
139 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
141 switch (which_alternative)
144 return "ldr\t%q0, %1";
146 return "stp\txzr, xzr, %0";
148 return "str\t%q1, %0";
150 return "mov\t%0.<Vbtype>, %1.<Vbtype>";
156 return aarch64_output_simd_mov_immediate (operands[1], 128);
161 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
162 neon_logic<q>, multiple, multiple,\
163 multiple, neon_move<q>")
164 (set_attr "length" "4,4,4,4,8,8,8,4")]
167 ;; When storing lane zero we can use the normal STR and its more permissive
170 (define_insn "aarch64_store_lane0<mode>"
171 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
172 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
173 (parallel [(match_operand 2 "const_int_operand" "n")])))]
175 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
176 "str\\t%<Vetype>1, %0"
177 [(set_attr "type" "neon_store1_1reg<q>")]
180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
181 [(set (match_operand:DREG 0 "register_operand" "=w")
182 (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
183 (set (match_operand:DREG2 2 "register_operand" "=w")
184 (match_operand:DREG2 3 "memory_operand" "m"))]
186 && rtx_equal_p (XEXP (operands[3], 0),
187 plus_constant (Pmode,
188 XEXP (operands[1], 0),
189 GET_MODE_SIZE (<DREG:MODE>mode)))"
191 [(set_attr "type" "neon_ldp")]
194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
195 [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
196 (match_operand:DREG 1 "register_operand" "w"))
197 (set (match_operand:DREG2 2 "memory_operand" "=m")
198 (match_operand:DREG2 3 "register_operand" "w"))]
200 && rtx_equal_p (XEXP (operands[2], 0),
201 plus_constant (Pmode,
202 XEXP (operands[0], 0),
203 GET_MODE_SIZE (<DREG:MODE>mode)))"
205 [(set_attr "type" "neon_stp")]
208 (define_insn "load_pair<VQ:mode><VQ2:mode>"
209 [(set (match_operand:VQ 0 "register_operand" "=w")
210 (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
211 (set (match_operand:VQ2 2 "register_operand" "=w")
212 (match_operand:VQ2 3 "memory_operand" "m"))]
214 && rtx_equal_p (XEXP (operands[3], 0),
215 plus_constant (Pmode,
216 XEXP (operands[1], 0),
217 GET_MODE_SIZE (<VQ:MODE>mode)))"
219 [(set_attr "type" "neon_ldp_q")]
222 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
223 [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
224 (match_operand:VQ 1 "register_operand" "w"))
225 (set (match_operand:VQ2 2 "memory_operand" "=m")
226 (match_operand:VQ2 3 "register_operand" "w"))]
227 "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
228 plus_constant (Pmode,
229 XEXP (operands[0], 0),
230 GET_MODE_SIZE (<VQ:MODE>mode)))"
232 [(set_attr "type" "neon_stp_q")]
237 [(set (match_operand:VQ 0 "register_operand" "")
238 (match_operand:VQ 1 "register_operand" ""))]
239 "TARGET_SIMD && reload_completed
240 && GP_REGNUM_P (REGNO (operands[0]))
241 && GP_REGNUM_P (REGNO (operands[1]))"
244 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
249 [(set (match_operand:VQ 0 "register_operand" "")
250 (match_operand:VQ 1 "register_operand" ""))]
251 "TARGET_SIMD && reload_completed
252 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
253 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
256 aarch64_split_simd_move (operands[0], operands[1]);
260 (define_expand "@aarch64_split_simd_mov<mode>"
261 [(set (match_operand:VQ 0)
262 (match_operand:VQ 1))]
265 rtx dst = operands[0];
266 rtx src = operands[1];
268 if (GP_REGNUM_P (REGNO (src)))
270 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
271 rtx src_high_part = gen_highpart (<VHALF>mode, src);
274 (gen_move_lo_quad_<mode> (dst, src_low_part));
276 (gen_move_hi_quad_<mode> (dst, src_high_part));
281 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
282 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
283 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
284 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
287 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
289 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
295 (define_insn "aarch64_simd_mov_from_<mode>low"
296 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
298 (match_operand:VQ 1 "register_operand" "w")
299 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
300 "TARGET_SIMD && reload_completed"
302 [(set_attr "type" "neon_to_gp<q>")
303 (set_attr "length" "4")
306 (define_insn "aarch64_simd_mov_from_<mode>high"
307 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
309 (match_operand:VQ 1 "register_operand" "w")
310 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
311 "TARGET_SIMD && reload_completed"
313 [(set_attr "type" "neon_to_gp<q>")
314 (set_attr "length" "4")
317 (define_insn "orn<mode>3"
318 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
319 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
320 (match_operand:VDQ_I 2 "register_operand" "w")))]
322 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
323 [(set_attr "type" "neon_logic<q>")]
326 (define_insn "bic<mode>3"
327 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
328 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
329 (match_operand:VDQ_I 2 "register_operand" "w")))]
331 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
332 [(set_attr "type" "neon_logic<q>")]
335 (define_insn "add<mode>3"
336 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
337 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
338 (match_operand:VDQ_I 2 "register_operand" "w")))]
340 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
341 [(set_attr "type" "neon_add<q>")]
344 (define_insn "sub<mode>3"
345 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
346 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
347 (match_operand:VDQ_I 2 "register_operand" "w")))]
349 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
350 [(set_attr "type" "neon_sub<q>")]
353 (define_insn "mul<mode>3"
354 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
355 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
356 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
358 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
359 [(set_attr "type" "neon_mul_<Vetype><q>")]
362 (define_insn "bswap<mode>2"
363 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
364 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
366 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
367 [(set_attr "type" "neon_rev<q>")]
370 (define_insn "aarch64_rbit<mode>"
371 [(set (match_operand:VB 0 "register_operand" "=w")
372 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
375 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
376 [(set_attr "type" "neon_rbit")]
379 (define_expand "ctz<mode>2"
380 [(set (match_operand:VS 0 "register_operand")
381 (ctz:VS (match_operand:VS 1 "register_operand")))]
384 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
385 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
387 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
388 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
393 (define_expand "xorsign<mode>3"
394 [(match_operand:VHSDF 0 "register_operand")
395 (match_operand:VHSDF 1 "register_operand")
396 (match_operand:VHSDF 2 "register_operand")]
400 machine_mode imode = <V_INT_EQUIV>mode;
401 rtx v_bitmask = gen_reg_rtx (imode);
402 rtx op1x = gen_reg_rtx (imode);
403 rtx op2x = gen_reg_rtx (imode);
405 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
406 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
408 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
410 emit_move_insn (v_bitmask,
411 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
412 HOST_WIDE_INT_M1U << bits));
414 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
415 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
416 emit_move_insn (operands[0],
417 lowpart_subreg (<MODE>mode, op1x, imode));
422 ;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
423 ;; fact that their usage need to guarantee that the source vectors are
424 ;; contiguous. It would be wrong to describe the operation without being able
425 ;; to describe the permute that is also required, but even if that is done
426 ;; the permute would have been created as a LOAD_LANES which means the values
427 ;; in the registers are in the wrong order.
428 (define_insn "aarch64_fcadd<rot><mode>"
429 [(set (match_operand:VHSDF 0 "register_operand" "=w")
430 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
431 (match_operand:VHSDF 2 "register_operand" "w")]
434 "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
435 [(set_attr "type" "neon_fcadd")]
438 (define_insn "aarch64_fcmla<rot><mode>"
439 [(set (match_operand:VHSDF 0 "register_operand" "=w")
440 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
441 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
442 (match_operand:VHSDF 3 "register_operand" "w")]
445 "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
446 [(set_attr "type" "neon_fcmla")]
450 (define_insn "aarch64_fcmla_lane<rot><mode>"
451 [(set (match_operand:VHSDF 0 "register_operand" "=w")
452 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
453 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
454 (match_operand:VHSDF 3 "register_operand" "w")
455 (match_operand:SI 4 "const_int_operand" "n")]
459 operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
460 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
462 [(set_attr "type" "neon_fcmla")]
465 (define_insn "aarch64_fcmla_laneq<rot>v4hf"
466 [(set (match_operand:V4HF 0 "register_operand" "=w")
467 (plus:V4HF (match_operand:V4HF 1 "register_operand" "0")
468 (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
469 (match_operand:V8HF 3 "register_operand" "w")
470 (match_operand:SI 4 "const_int_operand" "n")]
474 operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
475 return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
477 [(set_attr "type" "neon_fcmla")]
480 (define_insn "aarch64_fcmlaq_lane<rot><mode>"
481 [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
482 (plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0")
483 (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
484 (match_operand:<VHALF> 3 "register_operand" "w")
485 (match_operand:SI 4 "const_int_operand" "n")]
489 int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
491 = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
492 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
494 [(set_attr "type" "neon_fcmla")]
497 ;; These instructions map to the __builtins for the Dot Product operations.
498 (define_insn "aarch64_<sur>dot<vsi2qi>"
499 [(set (match_operand:VS 0 "register_operand" "=w")
500 (plus:VS (match_operand:VS 1 "register_operand" "0")
501 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
502 (match_operand:<VSI2QI> 3 "register_operand" "w")]
505 "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
506 [(set_attr "type" "neon_dot<q>")]
509 ;; These expands map to the Dot Product optab the vectorizer checks for.
510 ;; The auto-vectorizer expects a dot product builtin that also does an
511 ;; accumulation into the provided register.
512 ;; Given the following pattern
514 ;; for (i=0; i<len; i++) {
520 ;; This can be auto-vectorized to
521 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
523 ;; given enough iterations. However the vectorizer can keep unrolling the loop
524 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
525 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
528 ;; and so the vectorizer provides r, in which the result has to be accumulated.
529 (define_expand "<sur>dot_prod<vsi2qi>"
530 [(set (match_operand:VS 0 "register_operand")
531 (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
532 (match_operand:<VSI2QI> 2 "register_operand")]
534 (match_operand:VS 3 "register_operand")))]
538 gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
540 emit_insn (gen_rtx_SET (operands[0], operands[3]));
544 ;; These instructions map to the __builtins for the Dot Product
545 ;; indexed operations.
546 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
547 [(set (match_operand:VS 0 "register_operand" "=w")
548 (plus:VS (match_operand:VS 1 "register_operand" "0")
549 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
550 (match_operand:V8QI 3 "register_operand" "<h_con>")
551 (match_operand:SI 4 "immediate_operand" "i")]
555 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
556 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
558 [(set_attr "type" "neon_dot<q>")]
561 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
562 [(set (match_operand:VS 0 "register_operand" "=w")
563 (plus:VS (match_operand:VS 1 "register_operand" "0")
564 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
565 (match_operand:V16QI 3 "register_operand" "<h_con>")
566 (match_operand:SI 4 "immediate_operand" "i")]
570 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
571 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
573 [(set_attr "type" "neon_dot<q>")]
576 (define_expand "copysign<mode>3"
577 [(match_operand:VHSDF 0 "register_operand")
578 (match_operand:VHSDF 1 "register_operand")
579 (match_operand:VHSDF 2 "register_operand")]
580 "TARGET_FLOAT && TARGET_SIMD"
582 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
583 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
585 emit_move_insn (v_bitmask,
586 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
587 HOST_WIDE_INT_M1U << bits));
588 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
589 operands[2], operands[1]));
594 (define_insn "*aarch64_mul3_elt<mode>"
595 [(set (match_operand:VMUL 0 "register_operand" "=w")
599 (match_operand:VMUL 1 "register_operand" "<h_con>")
600 (parallel [(match_operand:SI 2 "immediate_operand")])))
601 (match_operand:VMUL 3 "register_operand" "w")))]
604 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
605 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
607 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
610 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
611 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
612 (mult:VMUL_CHANGE_NLANES
613 (vec_duplicate:VMUL_CHANGE_NLANES
615 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
616 (parallel [(match_operand:SI 2 "immediate_operand")])))
617 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
620 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
621 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
623 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
626 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
627 [(set (match_operand:VMUL 0 "register_operand" "=w")
630 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
631 (match_operand:VMUL 2 "register_operand" "w")))]
633 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
634 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
637 (define_insn "@aarch64_rsqrte<mode>"
638 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
639 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
642 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
643 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
645 (define_insn "@aarch64_rsqrts<mode>"
646 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
647 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
648 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
651 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
652 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
654 (define_expand "rsqrt<mode>2"
655 [(set (match_operand:VALLF 0 "register_operand")
656 (unspec:VALLF [(match_operand:VALLF 1 "register_operand")]
660 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
664 (define_insn "*aarch64_mul3_elt_to_64v2df"
665 [(set (match_operand:DF 0 "register_operand" "=w")
668 (match_operand:V2DF 1 "register_operand" "w")
669 (parallel [(match_operand:SI 2 "immediate_operand")]))
670 (match_operand:DF 3 "register_operand" "w")))]
673 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
674 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
676 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
679 (define_insn "neg<mode>2"
680 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
681 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
683 "neg\t%0.<Vtype>, %1.<Vtype>"
684 [(set_attr "type" "neon_neg<q>")]
687 (define_insn "abs<mode>2"
688 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
689 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
691 "abs\t%0.<Vtype>, %1.<Vtype>"
692 [(set_attr "type" "neon_abs<q>")]
695 ;; The intrinsic version of integer ABS must not be allowed to
696 ;; combine with any operation with an integerated ABS step, such
698 (define_insn "aarch64_abs<mode>"
699 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
701 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
704 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
705 [(set_attr "type" "neon_abs<q>")]
708 ;; It's tempting to represent SABD as ABS (MINUS op1 op2).
709 ;; This isn't accurate as ABS treats always its input as a signed value.
710 ;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
711 ;; Whereas SABD would return 192 (-64 signed) on the above example.
712 ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
713 (define_insn "aarch64_<su>abd<mode>_3"
714 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
717 (match_operand:VDQ_BHSI 1 "register_operand" "w")
718 (match_operand:VDQ_BHSI 2 "register_operand" "w"))
723 "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
724 [(set_attr "type" "neon_abd<q>")]
727 (define_insn "aarch64_<sur>abdl2<mode>_3"
728 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
729 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
730 (match_operand:VDQV_S 2 "register_operand" "w")]
733 "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
734 [(set_attr "type" "neon_abd<q>")]
737 (define_insn "aarch64_<sur>abal<mode>_4"
738 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
739 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
740 (match_operand:VDQV_S 2 "register_operand" "w")
741 (match_operand:<VDBLW> 3 "register_operand" "0")]
744 "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
745 [(set_attr "type" "neon_arith_acc<q>")]
748 (define_insn "aarch64_<sur>adalp<mode>_3"
749 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
750 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
751 (match_operand:<VDBLW> 2 "register_operand" "0")]
754 "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
755 [(set_attr "type" "neon_reduc_add<q>")]
758 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
759 ;; inputs in operands 1 and 2. The sequence also has to perform a widening
760 ;; reduction of the difference into a V4SI vector and accumulate that into
761 ;; operand 3 before copying that into the result operand 0.
762 ;; Perform that with a sequence of:
763 ;; UABDL2 tmp.8h, op1.16b, op2.16b
764 ;; UABAL tmp.8h, op1.16b, op2.16b
765 ;; UADALP op3.4s, tmp.8h
766 ;; MOV op0, op3 // should be eliminated in later passes.
768 ;; For TARGET_DOTPROD we do:
769 ;; MOV tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
770 ;; UABD tmp2.16b, op1.16b, op2.16b
771 ;; UDOT op3.4s, tmp2.16b, tmp1.16b
772 ;; MOV op0, op3 // RA will tie the operands of UDOT appropriately.
774 ;; The signed version just uses the signed variants of the above instructions
775 ;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
778 (define_expand "<sur>sadv16qi"
779 [(use (match_operand:V4SI 0 "register_operand"))
780 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
781 (use (match_operand:V16QI 2 "register_operand"))] ABAL)
782 (use (match_operand:V4SI 3 "register_operand"))]
787 rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
788 rtx abd = gen_reg_rtx (V16QImode);
789 emit_insn (gen_aarch64_<sur>abdv16qi_3 (abd, operands[1], operands[2]));
790 emit_insn (gen_aarch64_udotv16qi (operands[0], operands[3],
794 rtx reduc = gen_reg_rtx (V8HImode);
795 emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
797 emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
798 operands[2], reduc));
799 emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
801 emit_move_insn (operands[0], operands[3]);
806 (define_insn "aba<mode>_3"
807 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
808 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
809 (match_operand:VDQ_BHSI 1 "register_operand" "w")
810 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
811 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
813 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
814 [(set_attr "type" "neon_arith_acc<q>")]
817 (define_insn "fabd<mode>3"
818 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
821 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
822 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
824 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
825 [(set_attr "type" "neon_fp_abd_<stype><q>")]
828 ;; For AND (vector, register) and BIC (vector, immediate)
829 (define_insn "and<mode>3"
830 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
831 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
832 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
835 switch (which_alternative)
838 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
840 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
846 [(set_attr "type" "neon_logic<q>")]
849 ;; For ORR (vector, register) and ORR (vector, immediate)
850 (define_insn "ior<mode>3"
851 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
852 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
853 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
856 switch (which_alternative)
859 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
861 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
867 [(set_attr "type" "neon_logic<q>")]
870 (define_insn "xor<mode>3"
871 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
872 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
873 (match_operand:VDQ_I 2 "register_operand" "w")))]
875 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
876 [(set_attr "type" "neon_logic<q>")]
879 (define_insn "one_cmpl<mode>2"
880 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
881 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
883 "not\t%0.<Vbtype>, %1.<Vbtype>"
884 [(set_attr "type" "neon_logic<q>")]
887 (define_insn "aarch64_simd_vec_set<mode>"
888 [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
890 (vec_duplicate:VALL_F16
891 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
892 (match_operand:VALL_F16 3 "register_operand" "0,0,0")
893 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
896 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
897 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
898 switch (which_alternative)
901 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
903 return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
905 return "ld1\\t{%0.<Vetype>}[%p2], %1";
910 [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
913 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
914 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
916 (vec_duplicate:VALL_F16
918 (match_operand:VALL_F16 3 "register_operand" "w")
920 [(match_operand:SI 4 "immediate_operand" "i")])))
921 (match_operand:VALL_F16 1 "register_operand" "0")
922 (match_operand:SI 2 "immediate_operand" "i")))]
925 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
926 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
927 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
929 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
931 [(set_attr "type" "neon_ins<q>")]
934 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
935 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
936 (vec_merge:VALL_F16_NO_V2Q
937 (vec_duplicate:VALL_F16_NO_V2Q
939 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
941 [(match_operand:SI 4 "immediate_operand" "i")])))
942 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
943 (match_operand:SI 2 "immediate_operand" "i")))]
946 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
947 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
948 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
949 INTVAL (operands[4]));
951 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
953 [(set_attr "type" "neon_ins<q>")]
956 (define_expand "signbit<mode>2"
957 [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
958 (use (match_operand:VDQSF 1 "register_operand"))]
961 int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
962 rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
964 operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
966 emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
971 (define_insn "aarch64_simd_lshr<mode>"
972 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
973 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
974 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
976 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
977 [(set_attr "type" "neon_shift_imm<q>")]
980 (define_insn "aarch64_simd_ashr<mode>"
981 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
982 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
983 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
985 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
986 [(set_attr "type" "neon_shift_imm<q>")]
989 (define_insn "*aarch64_simd_sra<mode>"
990 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
993 (match_operand:VDQ_I 1 "register_operand" "w")
994 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr"))
995 (match_operand:VDQ_I 3 "register_operand" "0")))]
997 "<sra_op>sra\t%0.<Vtype>, %1.<Vtype>, %2"
998 [(set_attr "type" "neon_shift_acc<q>")]
1001 (define_insn "aarch64_simd_imm_shl<mode>"
1002 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1003 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1004 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
1006 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
1007 [(set_attr "type" "neon_shift_imm<q>")]
1010 (define_insn "aarch64_simd_reg_sshl<mode>"
1011 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1012 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1013 (match_operand:VDQ_I 2 "register_operand" "w")))]
1015 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1016 [(set_attr "type" "neon_shift_reg<q>")]
1019 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
1020 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1021 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1022 (match_operand:VDQ_I 2 "register_operand" "w")]
1023 UNSPEC_ASHIFT_UNSIGNED))]
1025 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1026 [(set_attr "type" "neon_shift_reg<q>")]
1029 (define_insn "aarch64_simd_reg_shl<mode>_signed"
1030 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1031 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1032 (match_operand:VDQ_I 2 "register_operand" "w")]
1033 UNSPEC_ASHIFT_SIGNED))]
1035 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1036 [(set_attr "type" "neon_shift_reg<q>")]
1039 (define_expand "ashl<mode>3"
1040 [(match_operand:VDQ_I 0 "register_operand")
1041 (match_operand:VDQ_I 1 "register_operand")
1042 (match_operand:SI 2 "general_operand")]
1045 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1048 if (CONST_INT_P (operands[2]))
1050 shift_amount = INTVAL (operands[2]);
1051 if (shift_amount >= 0 && shift_amount < bit_width)
1053 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1055 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
1062 operands[2] = force_reg (SImode, operands[2]);
1065 else if (MEM_P (operands[2]))
1067 operands[2] = force_reg (SImode, operands[2]);
1070 if (REG_P (operands[2]))
1072 rtx tmp = gen_reg_rtx (<MODE>mode);
1073 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
1074 convert_to_mode (<VEL>mode,
1077 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1086 (define_expand "lshr<mode>3"
1087 [(match_operand:VDQ_I 0 "register_operand")
1088 (match_operand:VDQ_I 1 "register_operand")
1089 (match_operand:SI 2 "general_operand")]
1092 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1095 if (CONST_INT_P (operands[2]))
1097 shift_amount = INTVAL (operands[2]);
1098 if (shift_amount > 0 && shift_amount <= bit_width)
1100 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1102 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
1108 operands[2] = force_reg (SImode, operands[2]);
1110 else if (MEM_P (operands[2]))
1112 operands[2] = force_reg (SImode, operands[2]);
1115 if (REG_P (operands[2]))
1117 rtx tmp = gen_reg_rtx (SImode);
1118 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1119 emit_insn (gen_negsi2 (tmp, operands[2]));
1120 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1121 convert_to_mode (<VEL>mode,
1123 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
1133 (define_expand "ashr<mode>3"
1134 [(match_operand:VDQ_I 0 "register_operand")
1135 (match_operand:VDQ_I 1 "register_operand")
1136 (match_operand:SI 2 "general_operand")]
1139 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1142 if (CONST_INT_P (operands[2]))
1144 shift_amount = INTVAL (operands[2]);
1145 if (shift_amount > 0 && shift_amount <= bit_width)
1147 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1149 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1155 operands[2] = force_reg (SImode, operands[2]);
1157 else if (MEM_P (operands[2]))
1159 operands[2] = force_reg (SImode, operands[2]);
1162 if (REG_P (operands[2]))
1164 rtx tmp = gen_reg_rtx (SImode);
1165 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1166 emit_insn (gen_negsi2 (tmp, operands[2]));
1167 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1168 convert_to_mode (<VEL>mode,
1170 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
1180 (define_expand "vashl<mode>3"
1181 [(match_operand:VDQ_I 0 "register_operand")
1182 (match_operand:VDQ_I 1 "register_operand")
1183 (match_operand:VDQ_I 2 "register_operand")]
1186 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1191 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1192 ;; Negating individual lanes most certainly offsets the
1193 ;; gain from vectorization.
1194 (define_expand "vashr<mode>3"
1195 [(match_operand:VDQ_BHSI 0 "register_operand")
1196 (match_operand:VDQ_BHSI 1 "register_operand")
1197 (match_operand:VDQ_BHSI 2 "register_operand")]
1200 rtx neg = gen_reg_rtx (<MODE>mode);
1201 emit (gen_neg<mode>2 (neg, operands[2]));
1202 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1208 (define_expand "aarch64_ashr_simddi"
1209 [(match_operand:DI 0 "register_operand")
1210 (match_operand:DI 1 "register_operand")
1211 (match_operand:SI 2 "aarch64_shift_imm64_di")]
1214 /* An arithmetic shift right by 64 fills the result with copies of the sign
1215 bit, just like asr by 63 - however the standard pattern does not handle
1217 if (INTVAL (operands[2]) == 64)
1218 operands[2] = GEN_INT (63);
1219 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1224 (define_expand "vlshr<mode>3"
1225 [(match_operand:VDQ_BHSI 0 "register_operand")
1226 (match_operand:VDQ_BHSI 1 "register_operand")
1227 (match_operand:VDQ_BHSI 2 "register_operand")]
1230 rtx neg = gen_reg_rtx (<MODE>mode);
1231 emit (gen_neg<mode>2 (neg, operands[2]));
1232 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1237 (define_expand "aarch64_lshr_simddi"
1238 [(match_operand:DI 0 "register_operand")
1239 (match_operand:DI 1 "register_operand")
1240 (match_operand:SI 2 "aarch64_shift_imm64_di")]
1243 if (INTVAL (operands[2]) == 64)
1244 emit_move_insn (operands[0], const0_rtx);
1246 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1251 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1252 (define_insn "vec_shr_<mode>"
1253 [(set (match_operand:VD 0 "register_operand" "=w")
1254 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1255 (match_operand:SI 2 "immediate_operand" "i")]
1259 if (BYTES_BIG_ENDIAN)
1260 return "shl %d0, %d1, %2";
1262 return "ushr %d0, %d1, %2";
1264 [(set_attr "type" "neon_shift_imm")]
1267 (define_expand "vec_set<mode>"
1268 [(match_operand:VALL_F16 0 "register_operand")
1269 (match_operand:<VEL> 1 "register_operand")
1270 (match_operand:SI 2 "immediate_operand")]
1273 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1274 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1275 GEN_INT (elem), operands[0]));
1281 (define_insn "aarch64_mla<mode>"
1282 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1283 (plus:VDQ_BHSI (mult:VDQ_BHSI
1284 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1285 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1286 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1288 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1289 [(set_attr "type" "neon_mla_<Vetype><q>")]
1292 (define_insn "*aarch64_mla_elt<mode>"
1293 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1296 (vec_duplicate:VDQHS
1298 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1299 (parallel [(match_operand:SI 2 "immediate_operand")])))
1300 (match_operand:VDQHS 3 "register_operand" "w"))
1301 (match_operand:VDQHS 4 "register_operand" "0")))]
1304 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1305 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1307 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1310 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1311 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1314 (vec_duplicate:VDQHS
1316 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1317 (parallel [(match_operand:SI 2 "immediate_operand")])))
1318 (match_operand:VDQHS 3 "register_operand" "w"))
1319 (match_operand:VDQHS 4 "register_operand" "0")))]
1322 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1323 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1325 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1328 (define_insn "*aarch64_mla_elt_merge<mode>"
1329 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1331 (mult:VDQHS (vec_duplicate:VDQHS
1332 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1333 (match_operand:VDQHS 2 "register_operand" "w"))
1334 (match_operand:VDQHS 3 "register_operand" "0")))]
1336 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1337 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1340 (define_insn "aarch64_mls<mode>"
1341 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1342 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1343 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1344 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1346 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1347 [(set_attr "type" "neon_mla_<Vetype><q>")]
1350 (define_insn "*aarch64_mls_elt<mode>"
1351 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1353 (match_operand:VDQHS 4 "register_operand" "0")
1355 (vec_duplicate:VDQHS
1357 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1358 (parallel [(match_operand:SI 2 "immediate_operand")])))
1359 (match_operand:VDQHS 3 "register_operand" "w"))))]
1362 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1363 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1365 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1368 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1369 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1371 (match_operand:VDQHS 4 "register_operand" "0")
1373 (vec_duplicate:VDQHS
1375 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1376 (parallel [(match_operand:SI 2 "immediate_operand")])))
1377 (match_operand:VDQHS 3 "register_operand" "w"))))]
1380 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1381 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1383 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1386 (define_insn "*aarch64_mls_elt_merge<mode>"
1387 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1389 (match_operand:VDQHS 1 "register_operand" "0")
1390 (mult:VDQHS (vec_duplicate:VDQHS
1391 (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1392 (match_operand:VDQHS 3 "register_operand" "w"))))]
1394 "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1395 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1398 ;; Max/Min operations.
1399 (define_insn "<su><maxmin><mode>3"
1400 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1401 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1402 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1404 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1405 [(set_attr "type" "neon_minmax<q>")]
1408 (define_expand "<su><maxmin>v2di3"
1409 [(set (match_operand:V2DI 0 "register_operand")
1410 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand")
1411 (match_operand:V2DI 2 "register_operand")))]
1414 enum rtx_code cmp_operator;
1435 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1436 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1437 operands[2], cmp_fmt, operands[1], operands[2]));
1441 ;; Pairwise Integer Max/Min operations.
1442 (define_insn "aarch64_<maxmin_uns>p<mode>"
1443 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1444 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1445 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1448 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1449 [(set_attr "type" "neon_minmax<q>")]
1452 ;; Pairwise FP Max/Min operations.
1453 (define_insn "aarch64_<maxmin_uns>p<mode>"
1454 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1455 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1456 (match_operand:VHSDF 2 "register_operand" "w")]
1459 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1460 [(set_attr "type" "neon_minmax<q>")]
1463 ;; vec_concat gives a new vector with the low elements from operand 1, and
1464 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1465 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1466 ;; What that means, is that the RTL descriptions of the below patterns
1467 ;; need to change depending on endianness.
1469 ;; Move to the low architectural bits of the register.
1470 ;; On little-endian this is { operand, zeroes }
1471 ;; On big-endian this is { zeroes, operand }
1473 (define_insn "move_lo_quad_internal_<mode>"
1474 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1476 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1477 (vec_duplicate:<VHALF> (const_int 0))))]
1478 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1483 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1484 (set_attr "length" "4")
1485 (set_attr "arch" "simd,fp,simd")]
1488 (define_insn "move_lo_quad_internal_<mode>"
1489 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1491 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1493 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1498 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1499 (set_attr "length" "4")
1500 (set_attr "arch" "simd,fp,simd")]
1503 (define_insn "move_lo_quad_internal_be_<mode>"
1504 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1506 (vec_duplicate:<VHALF> (const_int 0))
1507 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1508 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1513 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1514 (set_attr "length" "4")
1515 (set_attr "arch" "simd,fp,simd")]
1518 (define_insn "move_lo_quad_internal_be_<mode>"
1519 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1522 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1523 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1528 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1529 (set_attr "length" "4")
1530 (set_attr "arch" "simd,fp,simd")]
1533 (define_expand "move_lo_quad_<mode>"
1534 [(match_operand:VQ 0 "register_operand")
1535 (match_operand:VQ 1 "register_operand")]
1538 if (BYTES_BIG_ENDIAN)
1539 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1541 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1546 ;; Move operand1 to the high architectural bits of the register, keeping
1547 ;; the low architectural bits of operand2.
1548 ;; For little-endian this is { operand2, operand1 }
1549 ;; For big-endian this is { operand1, operand2 }
1551 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1552 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1556 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1557 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1558 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1560 ins\\t%0.d[1], %1.d[0]
1562 [(set_attr "type" "neon_ins")]
1565 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1566 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1568 (match_operand:<VHALF> 1 "register_operand" "w,r")
1571 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1572 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1574 ins\\t%0.d[1], %1.d[0]
1576 [(set_attr "type" "neon_ins")]
1579 (define_expand "move_hi_quad_<mode>"
1580 [(match_operand:VQ 0 "register_operand")
1581 (match_operand:<VHALF> 1 "register_operand")]
1584 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1585 if (BYTES_BIG_ENDIAN)
1586 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1589 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1594 ;; Narrowing operations.
1597 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1598 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1599 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1601 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1602 [(set_attr "type" "neon_shift_imm_narrow_q")]
1605 (define_expand "vec_pack_trunc_<mode>"
1606 [(match_operand:<VNARROWD> 0 "register_operand")
1607 (match_operand:VDN 1 "register_operand")
1608 (match_operand:VDN 2 "register_operand")]
1611 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1612 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1613 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1615 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1616 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1617 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1623 (define_insn "vec_pack_trunc_<mode>"
1624 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1625 (vec_concat:<VNARROWQ2>
1626 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1627 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1630 if (BYTES_BIG_ENDIAN)
1631 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1633 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1635 [(set_attr "type" "multiple")
1636 (set_attr "length" "8")]
1639 ;; Widening operations.
1641 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1642 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1643 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1644 (match_operand:VQW 1 "register_operand" "w")
1645 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1648 "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1649 [(set_attr "type" "neon_shift_imm_long")]
1652 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1653 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1654 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1655 (match_operand:VQW 1 "register_operand" "w")
1656 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1659 "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1660 [(set_attr "type" "neon_shift_imm_long")]
1663 (define_expand "vec_unpack<su>_hi_<mode>"
1664 [(match_operand:<VWIDE> 0 "register_operand")
1665 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1668 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1669 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1675 (define_expand "vec_unpack<su>_lo_<mode>"
1676 [(match_operand:<VWIDE> 0 "register_operand")
1677 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1680 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1681 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1687 ;; Widening arithmetic.
1689 (define_insn "*aarch64_<su>mlal_lo<mode>"
1690 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1693 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1694 (match_operand:VQW 2 "register_operand" "w")
1695 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1696 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1697 (match_operand:VQW 4 "register_operand" "w")
1699 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1701 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1702 [(set_attr "type" "neon_mla_<Vetype>_long")]
1705 (define_insn "*aarch64_<su>mlal_hi<mode>"
1706 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1709 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1710 (match_operand:VQW 2 "register_operand" "w")
1711 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1712 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1713 (match_operand:VQW 4 "register_operand" "w")
1715 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1717 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1718 [(set_attr "type" "neon_mla_<Vetype>_long")]
1721 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1722 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1724 (match_operand:<VWIDE> 1 "register_operand" "0")
1726 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1727 (match_operand:VQW 2 "register_operand" "w")
1728 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1729 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1730 (match_operand:VQW 4 "register_operand" "w")
1733 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1734 [(set_attr "type" "neon_mla_<Vetype>_long")]
1737 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1738 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1740 (match_operand:<VWIDE> 1 "register_operand" "0")
1742 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1743 (match_operand:VQW 2 "register_operand" "w")
1744 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1745 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1746 (match_operand:VQW 4 "register_operand" "w")
1749 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1750 [(set_attr "type" "neon_mla_<Vetype>_long")]
1753 (define_insn "*aarch64_<su>mlal<mode>"
1754 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1758 (match_operand:VD_BHSI 1 "register_operand" "w"))
1760 (match_operand:VD_BHSI 2 "register_operand" "w")))
1761 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1763 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1764 [(set_attr "type" "neon_mla_<Vetype>_long")]
1767 (define_insn "*aarch64_<su>mlsl<mode>"
1768 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1770 (match_operand:<VWIDE> 1 "register_operand" "0")
1773 (match_operand:VD_BHSI 2 "register_operand" "w"))
1775 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1777 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1778 [(set_attr "type" "neon_mla_<Vetype>_long")]
1781 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1782 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1783 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1784 (match_operand:VQW 1 "register_operand" "w")
1785 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1786 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1787 (match_operand:VQW 2 "register_operand" "w")
1790 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1791 [(set_attr "type" "neon_mul_<Vetype>_long")]
1794 (define_expand "vec_widen_<su>mult_lo_<mode>"
1795 [(match_operand:<VWIDE> 0 "register_operand")
1796 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
1797 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
1800 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1801 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1808 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1809 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1810 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1811 (match_operand:VQW 1 "register_operand" "w")
1812 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1813 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1814 (match_operand:VQW 2 "register_operand" "w")
1817 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1818 [(set_attr "type" "neon_mul_<Vetype>_long")]
1821 (define_expand "vec_widen_<su>mult_hi_<mode>"
1822 [(match_operand:<VWIDE> 0 "register_operand")
1823 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
1824 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
1827 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1828 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1836 ;; FP vector operations.
1837 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1838 ;; double-precision (64-bit) floating-point data types and arithmetic as
1839 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1840 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1842 ;; Floating-point operations can raise an exception. Vectorizing such
1843 ;; operations are safe because of reasons explained below.
1845 ;; ARMv8 permits an extension to enable trapped floating-point
1846 ;; exception handling, however this is an optional feature. In the
1847 ;; event of a floating-point exception being raised by vectorised
1849 ;; 1. If trapped floating-point exceptions are available, then a trap
1850 ;; will be taken when any lane raises an enabled exception. A trap
1851 ;; handler may determine which lane raised the exception.
1852 ;; 2. Alternatively a sticky exception flag is set in the
1853 ;; floating-point status register (FPSR). Software may explicitly
1854 ;; test the exception flags, in which case the tests will either
1855 ;; prevent vectorisation, allowing precise identification of the
1856 ;; failing operation, or if tested outside of vectorisable regions
1857 ;; then the specific operation and lane are not of interest.
1859 ;; FP arithmetic operations.
1861 (define_insn "add<mode>3"
1862 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1863 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1864 (match_operand:VHSDF 2 "register_operand" "w")))]
1866 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1867 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1870 (define_insn "sub<mode>3"
1871 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1872 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1873 (match_operand:VHSDF 2 "register_operand" "w")))]
1875 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1876 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1879 (define_insn "mul<mode>3"
1880 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1881 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1882 (match_operand:VHSDF 2 "register_operand" "w")))]
1884 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1885 [(set_attr "type" "neon_fp_mul_<stype><q>")]
1888 (define_expand "div<mode>3"
1889 [(set (match_operand:VHSDF 0 "register_operand")
1890 (div:VHSDF (match_operand:VHSDF 1 "register_operand")
1891 (match_operand:VHSDF 2 "register_operand")))]
1894 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1897 operands[1] = force_reg (<MODE>mode, operands[1]);
1900 (define_insn "*div<mode>3"
1901 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1902 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1903 (match_operand:VHSDF 2 "register_operand" "w")))]
1905 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1906 [(set_attr "type" "neon_fp_div_<stype><q>")]
1909 (define_insn "neg<mode>2"
1910 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1911 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1913 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1914 [(set_attr "type" "neon_fp_neg_<stype><q>")]
1917 (define_insn "abs<mode>2"
1918 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1919 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1921 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1922 [(set_attr "type" "neon_fp_abs_<stype><q>")]
1925 (define_insn "fma<mode>4"
1926 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1927 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1928 (match_operand:VHSDF 2 "register_operand" "w")
1929 (match_operand:VHSDF 3 "register_operand" "0")))]
1931 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1932 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1935 (define_insn "*aarch64_fma4_elt<mode>"
1936 [(set (match_operand:VDQF 0 "register_operand" "=w")
1940 (match_operand:VDQF 1 "register_operand" "<h_con>")
1941 (parallel [(match_operand:SI 2 "immediate_operand")])))
1942 (match_operand:VDQF 3 "register_operand" "w")
1943 (match_operand:VDQF 4 "register_operand" "0")))]
1946 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1947 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1949 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1952 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1953 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1955 (vec_duplicate:VDQSF
1957 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1958 (parallel [(match_operand:SI 2 "immediate_operand")])))
1959 (match_operand:VDQSF 3 "register_operand" "w")
1960 (match_operand:VDQSF 4 "register_operand" "0")))]
1963 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1964 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1966 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1969 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1970 [(set (match_operand:VMUL 0 "register_operand" "=w")
1973 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1974 (match_operand:VMUL 2 "register_operand" "w")
1975 (match_operand:VMUL 3 "register_operand" "0")))]
1977 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1978 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1981 (define_insn "*aarch64_fma4_elt_to_64v2df"
1982 [(set (match_operand:DF 0 "register_operand" "=w")
1985 (match_operand:V2DF 1 "register_operand" "w")
1986 (parallel [(match_operand:SI 2 "immediate_operand")]))
1987 (match_operand:DF 3 "register_operand" "w")
1988 (match_operand:DF 4 "register_operand" "0")))]
1991 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1992 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1994 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1997 (define_insn "fnma<mode>4"
1998 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2000 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
2001 (match_operand:VHSDF 2 "register_operand" "w")
2002 (match_operand:VHSDF 3 "register_operand" "0")))]
2004 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2005 [(set_attr "type" "neon_fp_mla_<stype><q>")]
2008 (define_insn "*aarch64_fnma4_elt<mode>"
2009 [(set (match_operand:VDQF 0 "register_operand" "=w")
2012 (match_operand:VDQF 3 "register_operand" "w"))
2015 (match_operand:VDQF 1 "register_operand" "<h_con>")
2016 (parallel [(match_operand:SI 2 "immediate_operand")])))
2017 (match_operand:VDQF 4 "register_operand" "0")))]
2020 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2021 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
2023 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2026 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
2027 [(set (match_operand:VDQSF 0 "register_operand" "=w")
2030 (match_operand:VDQSF 3 "register_operand" "w"))
2031 (vec_duplicate:VDQSF
2033 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2034 (parallel [(match_operand:SI 2 "immediate_operand")])))
2035 (match_operand:VDQSF 4 "register_operand" "0")))]
2038 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2039 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
2041 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2044 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
2045 [(set (match_operand:VMUL 0 "register_operand" "=w")
2048 (match_operand:VMUL 2 "register_operand" "w"))
2050 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2051 (match_operand:VMUL 3 "register_operand" "0")))]
2053 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2054 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2057 (define_insn "*aarch64_fnma4_elt_to_64v2df"
2058 [(set (match_operand:DF 0 "register_operand" "=w")
2061 (match_operand:V2DF 1 "register_operand" "w")
2062 (parallel [(match_operand:SI 2 "immediate_operand")]))
2064 (match_operand:DF 3 "register_operand" "w"))
2065 (match_operand:DF 4 "register_operand" "0")))]
2068 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2069 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
2071 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2074 ;; Vector versions of the floating-point frint patterns.
2075 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
2076 (define_insn "<frint_pattern><mode>2"
2077 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2078 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2081 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
2082 [(set_attr "type" "neon_fp_round_<stype><q>")]
2085 ;; Vector versions of the fcvt standard patterns.
2086 ;; Expands to lbtrunc, lround, lceil, lfloor
2087 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
2088 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2089 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2090 [(match_operand:VHSDF 1 "register_operand" "w")]
2093 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
2094 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
2097 ;; HF Scalar variants of related SIMD instructions.
2098 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
2099 [(set (match_operand:HI 0 "register_operand" "=w")
2100 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
2102 "TARGET_SIMD_F16INST"
2103 "fcvt<frint_suffix><su>\t%h0, %h1"
2104 [(set_attr "type" "neon_fp_to_int_s")]
2107 (define_insn "<optab>_trunchfhi2"
2108 [(set (match_operand:HI 0 "register_operand" "=w")
2109 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
2110 "TARGET_SIMD_F16INST"
2111 "fcvtz<su>\t%h0, %h1"
2112 [(set_attr "type" "neon_fp_to_int_s")]
2115 (define_insn "<optab>hihf2"
2116 [(set (match_operand:HF 0 "register_operand" "=w")
2117 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
2118 "TARGET_SIMD_F16INST"
2119 "<su_optab>cvtf\t%h0, %h1"
2120 [(set_attr "type" "neon_int_to_fp_s")]
2123 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
2124 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2125 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2127 (match_operand:VDQF 1 "register_operand" "w")
2128 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
2131 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
2132 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
2134 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
2136 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
2137 output_asm_insn (buf, operands);
2140 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
2143 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
2144 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2145 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2146 [(match_operand:VHSDF 1 "register_operand")]
2151 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2152 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2153 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2154 [(match_operand:VHSDF 1 "register_operand")]
2159 (define_expand "ftrunc<VHSDF:mode>2"
2160 [(set (match_operand:VHSDF 0 "register_operand")
2161 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2166 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2167 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2169 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2171 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2172 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2175 ;; Conversions between vectors of floats and doubles.
2176 ;; Contains a mix of patterns to match standard pattern names
2177 ;; and those for intrinsics.
2179 ;; Float widening operations.
2181 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2182 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2183 (float_extend:<VWIDE> (vec_select:<VHALF>
2184 (match_operand:VQ_HSF 1 "register_operand" "w")
2185 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2188 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2189 [(set_attr "type" "neon_fp_cvt_widen_s")]
2192 ;; Convert between fixed-point and floating-point (vector modes)
2194 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2195 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2196 (unspec:<VHSDF:FCVT_TARGET>
2197 [(match_operand:VHSDF 1 "register_operand" "w")
2198 (match_operand:SI 2 "immediate_operand" "i")]
2201 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2202 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2205 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2206 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2207 (unspec:<VDQ_HSDI:FCVT_TARGET>
2208 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2209 (match_operand:SI 2 "immediate_operand" "i")]
2212 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2213 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2216 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2217 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2218 ;; the meaning of HI and LO changes depending on the target endianness.
2219 ;; While elsewhere we map the higher numbered elements of a vector to
2220 ;; the lower architectural lanes of the vector, for these patterns we want
2221 ;; to always treat "hi" as referring to the higher architectural lanes.
2222 ;; Consequently, while the patterns below look inconsistent with our
2223 ;; other big-endian patterns their behavior is as required.
2225 (define_expand "vec_unpacks_lo_<mode>"
2226 [(match_operand:<VWIDE> 0 "register_operand")
2227 (match_operand:VQ_HSF 1 "register_operand")]
2230 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2231 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2237 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2238 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2239 (float_extend:<VWIDE> (vec_select:<VHALF>
2240 (match_operand:VQ_HSF 1 "register_operand" "w")
2241 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2244 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2245 [(set_attr "type" "neon_fp_cvt_widen_s")]
2248 (define_expand "vec_unpacks_hi_<mode>"
2249 [(match_operand:<VWIDE> 0 "register_operand")
2250 (match_operand:VQ_HSF 1 "register_operand")]
2253 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2254 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2259 (define_insn "aarch64_float_extend_lo_<Vwide>"
2260 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2261 (float_extend:<VWIDE>
2262 (match_operand:VDF 1 "register_operand" "w")))]
2264 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2265 [(set_attr "type" "neon_fp_cvt_widen_s")]
2268 ;; Float narrowing operations.
2270 (define_insn "aarch64_float_truncate_lo_<mode>"
2271 [(set (match_operand:VDF 0 "register_operand" "=w")
2273 (match_operand:<VWIDE> 1 "register_operand" "w")))]
2275 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2276 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2279 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2280 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2282 (match_operand:VDF 1 "register_operand" "0")
2284 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2285 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2286 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2287 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2290 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2291 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2294 (match_operand:<VWIDE> 2 "register_operand" "w"))
2295 (match_operand:VDF 1 "register_operand" "0")))]
2296 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2297 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2298 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2301 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2302 [(match_operand:<VDBL> 0 "register_operand")
2303 (match_operand:VDF 1 "register_operand")
2304 (match_operand:<VWIDE> 2 "register_operand")]
2307 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2308 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2309 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2310 emit_insn (gen (operands[0], operands[1], operands[2]));
2315 (define_expand "vec_pack_trunc_v2df"
2316 [(set (match_operand:V4SF 0 "register_operand")
2318 (float_truncate:V2SF
2319 (match_operand:V2DF 1 "register_operand"))
2320 (float_truncate:V2SF
2321 (match_operand:V2DF 2 "register_operand"))
2325 rtx tmp = gen_reg_rtx (V2SFmode);
2326 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2327 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2329 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2330 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2331 tmp, operands[hi]));
2336 (define_expand "vec_pack_trunc_df"
2337 [(set (match_operand:V2SF 0 "register_operand")
2340 (match_operand:DF 1 "register_operand"))
2342 (match_operand:DF 2 "register_operand"))
2346 rtx tmp = gen_reg_rtx (V2SFmode);
2347 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2348 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2350 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2351 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2352 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2358 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2360 ;; a = (b < c) ? b : c;
2361 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
2362 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
2365 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2366 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2367 ;; operand will be returned when both operands are zero (i.e. they may not
2368 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2369 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2372 (define_insn "<su><maxmin><mode>3"
2373 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2374 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2375 (match_operand:VHSDF 2 "register_operand" "w")))]
2377 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2378 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2381 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2382 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2383 ;; which implement the IEEE fmax ()/fmin () functions.
2384 (define_insn "<maxmin_uns><mode>3"
2385 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2386 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2387 (match_operand:VHSDF 2 "register_operand" "w")]
2390 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2391 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2394 ;; 'across lanes' add.
2396 (define_expand "reduc_plus_scal_<mode>"
2397 [(match_operand:<VEL> 0 "register_operand")
2398 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand")]
2402 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2403 rtx scratch = gen_reg_rtx (<MODE>mode);
2404 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2405 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2410 (define_insn "aarch64_faddp<mode>"
2411 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2412 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2413 (match_operand:VHSDF 2 "register_operand" "w")]
2416 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2417 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2420 (define_insn "aarch64_reduc_plus_internal<mode>"
2421 [(set (match_operand:VDQV 0 "register_operand" "=w")
2422 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2425 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2426 [(set_attr "type" "neon_reduc_add<q>")]
2429 (define_insn "aarch64_reduc_plus_internalv2si"
2430 [(set (match_operand:V2SI 0 "register_operand" "=w")
2431 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2434 "addp\\t%0.2s, %1.2s, %1.2s"
2435 [(set_attr "type" "neon_reduc_add")]
2438 (define_insn "reduc_plus_scal_<mode>"
2439 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2440 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2443 "faddp\\t%<Vetype>0, %1.<Vtype>"
2444 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2447 (define_expand "reduc_plus_scal_v4sf"
2448 [(set (match_operand:SF 0 "register_operand")
2449 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2453 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2454 rtx scratch = gen_reg_rtx (V4SFmode);
2455 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2456 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2457 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2461 (define_insn "clrsb<mode>2"
2462 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2463 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2465 "cls\\t%0.<Vtype>, %1.<Vtype>"
2466 [(set_attr "type" "neon_cls<q>")]
2469 (define_insn "clz<mode>2"
2470 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2471 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2473 "clz\\t%0.<Vtype>, %1.<Vtype>"
2474 [(set_attr "type" "neon_cls<q>")]
2477 (define_insn "popcount<mode>2"
2478 [(set (match_operand:VB 0 "register_operand" "=w")
2479 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2481 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2482 [(set_attr "type" "neon_cnt<q>")]
2485 ;; 'across lanes' max and min ops.
2487 ;; Template for outputting a scalar, so we can create __builtins which can be
2488 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
2489 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2490 [(match_operand:<VEL> 0 "register_operand")
2491 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2495 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2496 rtx scratch = gen_reg_rtx (<MODE>mode);
2497 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2499 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2504 ;; Likewise for integer cases, signed and unsigned.
2505 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2506 [(match_operand:<VEL> 0 "register_operand")
2507 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2511 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2512 rtx scratch = gen_reg_rtx (<MODE>mode);
2513 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2515 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2520 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2521 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2522 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2525 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2526 [(set_attr "type" "neon_reduc_minmax<q>")]
2529 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2530 [(set (match_operand:V2SI 0 "register_operand" "=w")
2531 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2534 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2535 [(set_attr "type" "neon_reduc_minmax")]
2538 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2539 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2540 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2543 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2544 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2547 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2549 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2552 ;; Thus our BSL is of the form:
2553 ;; op0 = bsl (mask, op2, op3)
2554 ;; We can use any of:
2557 ;; bsl mask, op1, op2
2558 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2559 ;; bit op0, op2, mask
2560 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2561 ;; bif op0, op1, mask
2563 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2564 ;; Some forms of straight-line code may generate the equivalent form
2565 ;; in *aarch64_simd_bsl<mode>_alt.
2567 (define_insn "aarch64_simd_bsl<mode>_internal"
2568 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2572 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2573 (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2574 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2575 (match_dup:<V_INT_EQUIV> 3)
2579 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2580 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2581 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2582 [(set_attr "type" "neon_bsl<q>")]
2585 ;; We need this form in addition to the above pattern to match the case
2586 ;; when combine tries merging three insns such that the second operand of
2587 ;; the outer XOR matches the second operand of the inner XOR rather than
2588 ;; the first. The two are equivalent but since recog doesn't try all
2589 ;; permutations of commutative operations, we have to have a separate pattern.
2591 (define_insn "*aarch64_simd_bsl<mode>_alt"
2592 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2596 (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2597 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2598 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2599 (match_dup:<V_INT_EQUIV> 2)))]
2602 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2603 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2604 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2605 [(set_attr "type" "neon_bsl<q>")]
2608 ;; DImode is special, we want to avoid computing operations which are
2609 ;; more naturally computed in general purpose registers in the vector
2610 ;; registers. If we do that, we need to move all three operands from general
2611 ;; purpose registers to vector registers, then back again. However, we
2612 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2613 ;; optimizations based on the component operations of a BSL.
2615 ;; That means we need a splitter back to the individual operations, if they
2616 ;; would be better calculated on the integer side.
2618 (define_insn_and_split "aarch64_simd_bsldi_internal"
2619 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2623 (match_operand:DI 3 "register_operand" "w,0,w,r")
2624 (match_operand:DI 2 "register_operand" "w,w,0,r"))
2625 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2630 bsl\\t%0.8b, %2.8b, %3.8b
2631 bit\\t%0.8b, %2.8b, %1.8b
2632 bif\\t%0.8b, %3.8b, %1.8b
2634 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2635 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2637 /* Split back to individual operations. If we're before reload, and
2638 able to create a temporary register, do so. If we're after reload,
2639 we've got an early-clobber destination register, so use that.
2640 Otherwise, we can't create pseudos and we can't yet guarantee that
2641 operands[0] is safe to write, so FAIL to split. */
2644 if (reload_completed)
2645 scratch = operands[0];
2646 else if (can_create_pseudo_p ())
2647 scratch = gen_reg_rtx (DImode);
2651 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2652 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2653 emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2656 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2657 (set_attr "length" "4,4,4,12")]
2660 (define_insn_and_split "aarch64_simd_bsldi_alt"
2661 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2665 (match_operand:DI 3 "register_operand" "w,w,0,r")
2666 (match_operand:DI 2 "register_operand" "w,0,w,r"))
2667 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2672 bsl\\t%0.8b, %3.8b, %2.8b
2673 bit\\t%0.8b, %3.8b, %1.8b
2674 bif\\t%0.8b, %2.8b, %1.8b
2676 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2677 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2679 /* Split back to individual operations. If we're before reload, and
2680 able to create a temporary register, do so. If we're after reload,
2681 we've got an early-clobber destination register, so use that.
2682 Otherwise, we can't create pseudos and we can't yet guarantee that
2683 operands[0] is safe to write, so FAIL to split. */
2686 if (reload_completed)
2687 scratch = operands[0];
2688 else if (can_create_pseudo_p ())
2689 scratch = gen_reg_rtx (DImode);
2693 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2694 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2695 emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2698 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2699 (set_attr "length" "4,4,4,12")]
2702 (define_expand "aarch64_simd_bsl<mode>"
2703 [(match_operand:VALLDIF 0 "register_operand")
2704 (match_operand:<V_INT_EQUIV> 1 "register_operand")
2705 (match_operand:VALLDIF 2 "register_operand")
2706 (match_operand:VALLDIF 3 "register_operand")]
2709 /* We can't alias operands together if they have different modes. */
2710 rtx tmp = operands[0];
2711 if (FLOAT_MODE_P (<MODE>mode))
2713 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2714 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2715 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2717 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2718 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2722 if (tmp != operands[0])
2723 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2728 (define_expand "vcond_mask_<mode><v_int_equiv>"
2729 [(match_operand:VALLDI 0 "register_operand")
2730 (match_operand:VALLDI 1 "nonmemory_operand")
2731 (match_operand:VALLDI 2 "nonmemory_operand")
2732 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2735 /* If we have (a = (P) ? -1 : 0);
2736 Then we can simply move the generated mask (result must be int). */
2737 if (operands[1] == CONSTM1_RTX (<MODE>mode)
2738 && operands[2] == CONST0_RTX (<MODE>mode))
2739 emit_move_insn (operands[0], operands[3]);
2740 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
2741 else if (operands[1] == CONST0_RTX (<MODE>mode)
2742 && operands[2] == CONSTM1_RTX (<MODE>mode))
2743 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2746 if (!REG_P (operands[1]))
2747 operands[1] = force_reg (<MODE>mode, operands[1]);
2748 if (!REG_P (operands[2]))
2749 operands[2] = force_reg (<MODE>mode, operands[2]);
2750 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2751 operands[1], operands[2]));
2757 ;; Patterns comparing two vectors to produce a mask.
2759 (define_expand "vec_cmp<mode><mode>"
2760 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2761 (match_operator 1 "comparison_operator"
2762 [(match_operand:VSDQ_I_DI 2 "register_operand")
2763 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2766 rtx mask = operands[0];
2767 enum rtx_code code = GET_CODE (operands[1]);
2777 if (operands[3] == CONST0_RTX (<MODE>mode))
2782 if (!REG_P (operands[3]))
2783 operands[3] = force_reg (<MODE>mode, operands[3]);
2791 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2795 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2799 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2803 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2807 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2811 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2815 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2819 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2823 /* Handle NE as !EQ. */
2824 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2825 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2829 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2839 (define_expand "vec_cmp<mode><v_int_equiv>"
2840 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2841 (match_operator 1 "comparison_operator"
2842 [(match_operand:VDQF 2 "register_operand")
2843 (match_operand:VDQF 3 "nonmemory_operand")]))]
2846 int use_zero_form = 0;
2847 enum rtx_code code = GET_CODE (operands[1]);
2848 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2850 rtx (*comparison) (rtx, rtx, rtx) = NULL;
2859 if (operands[3] == CONST0_RTX (<MODE>mode))
2866 if (!REG_P (operands[3]))
2867 operands[3] = force_reg (<MODE>mode, operands[3]);
2877 comparison = gen_aarch64_cmlt<mode>;
2882 std::swap (operands[2], operands[3]);
2886 comparison = gen_aarch64_cmgt<mode>;
2891 comparison = gen_aarch64_cmle<mode>;
2896 std::swap (operands[2], operands[3]);
2900 comparison = gen_aarch64_cmge<mode>;
2904 comparison = gen_aarch64_cmeq<mode>;
2922 /* All of the above must not raise any FP exceptions. Thus we first
2923 check each operand for NaNs and force any elements containing NaN to
2924 zero before using them in the compare.
2925 Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2926 (cm<cc> (isnan (a) ? 0.0 : a,
2927 isnan (b) ? 0.0 : b))
2928 We use the following transformations for doing the comparisions:
2932 a UNLT b -> b GT a. */
2934 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2935 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2936 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2937 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2938 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2939 emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2940 emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2941 lowpart_subreg (<V_INT_EQUIV>mode,
2944 emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2945 lowpart_subreg (<V_INT_EQUIV>mode,
2948 gcc_assert (comparison != NULL);
2949 emit_insn (comparison (operands[0],
2950 lowpart_subreg (<MODE>mode,
2951 tmp0, <V_INT_EQUIV>mode),
2952 lowpart_subreg (<MODE>mode,
2953 tmp1, <V_INT_EQUIV>mode)));
2954 emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2964 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2965 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2971 a NE b -> ~(a EQ b) */
2972 gcc_assert (comparison != NULL);
2973 emit_insn (comparison (operands[0], operands[2], operands[3]));
2975 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2979 /* LTGT is not guranteed to not generate a FP exception. So let's
2980 go the faster way : ((a > b) || (b > a)). */
2981 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2982 operands[2], operands[3]));
2983 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2984 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2990 /* cmeq (a, a) & cmeq (b, b). */
2991 emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2992 operands[2], operands[2]));
2993 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2994 emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2996 if (code == UNORDERED)
2997 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2998 else if (code == UNEQ)
3000 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
3001 emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
3012 (define_expand "vec_cmpu<mode><mode>"
3013 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3014 (match_operator 1 "comparison_operator"
3015 [(match_operand:VSDQ_I_DI 2 "register_operand")
3016 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
3019 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
3020 operands[2], operands[3]));
3024 (define_expand "vcond<mode><mode>"
3025 [(set (match_operand:VALLDI 0 "register_operand")
3026 (if_then_else:VALLDI
3027 (match_operator 3 "comparison_operator"
3028 [(match_operand:VALLDI 4 "register_operand")
3029 (match_operand:VALLDI 5 "nonmemory_operand")])
3030 (match_operand:VALLDI 1 "nonmemory_operand")
3031 (match_operand:VALLDI 2 "nonmemory_operand")))]
3034 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3035 enum rtx_code code = GET_CODE (operands[3]);
3037 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3038 it as well as switch operands 1/2 in order to avoid the additional
3042 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3043 operands[4], operands[5]);
3044 std::swap (operands[1], operands[2]);
3046 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3047 operands[4], operands[5]));
3048 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3049 operands[2], mask));
3054 (define_expand "vcond<v_cmp_mixed><mode>"
3055 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
3056 (if_then_else:<V_cmp_mixed>
3057 (match_operator 3 "comparison_operator"
3058 [(match_operand:VDQF_COND 4 "register_operand")
3059 (match_operand:VDQF_COND 5 "nonmemory_operand")])
3060 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
3061 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
3064 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3065 enum rtx_code code = GET_CODE (operands[3]);
3067 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3068 it as well as switch operands 1/2 in order to avoid the additional
3072 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3073 operands[4], operands[5]);
3074 std::swap (operands[1], operands[2]);
3076 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3077 operands[4], operands[5]));
3078 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
3079 operands[0], operands[1],
3080 operands[2], mask));
3085 (define_expand "vcondu<mode><mode>"
3086 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3087 (if_then_else:VSDQ_I_DI
3088 (match_operator 3 "comparison_operator"
3089 [(match_operand:VSDQ_I_DI 4 "register_operand")
3090 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
3091 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
3092 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
3095 rtx mask = gen_reg_rtx (<MODE>mode);
3096 enum rtx_code code = GET_CODE (operands[3]);
3098 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3099 it as well as switch operands 1/2 in order to avoid the additional
3103 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3104 operands[4], operands[5]);
3105 std::swap (operands[1], operands[2]);
3107 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
3108 operands[4], operands[5]));
3109 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3110 operands[2], mask));
3114 (define_expand "vcondu<mode><v_cmp_mixed>"
3115 [(set (match_operand:VDQF 0 "register_operand")
3117 (match_operator 3 "comparison_operator"
3118 [(match_operand:<V_cmp_mixed> 4 "register_operand")
3119 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
3120 (match_operand:VDQF 1 "nonmemory_operand")
3121 (match_operand:VDQF 2 "nonmemory_operand")))]
3124 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3125 enum rtx_code code = GET_CODE (operands[3]);
3127 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3128 it as well as switch operands 1/2 in order to avoid the additional
3132 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3133 operands[4], operands[5]);
3134 std::swap (operands[1], operands[2]);
3136 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
3138 operands[4], operands[5]));
3139 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3140 operands[2], mask));
3144 ;; Patterns for AArch64 SIMD Intrinsics.
3146 ;; Lane extraction with sign extension to general purpose register.
3147 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3148 [(set (match_operand:GPI 0 "register_operand" "=r")
3150 (vec_select:<VDQQH:VEL>
3151 (match_operand:VDQQH 1 "register_operand" "w")
3152 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3155 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3156 INTVAL (operands[2]));
3157 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3159 [(set_attr "type" "neon_to_gp<VDQQH:q>")]
3162 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
3163 [(set (match_operand:GPI 0 "register_operand" "=r")
3165 (vec_select:<VDQQH:VEL>
3166 (match_operand:VDQQH 1 "register_operand" "w")
3167 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3170 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3171 INTVAL (operands[2]));
3172 return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
3174 [(set_attr "type" "neon_to_gp<VDQQH:q>")]
3177 ;; Lane extraction of a value, neither sign nor zero extension
3178 ;; is guaranteed so upper bits should be considered undefined.
3179 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3180 (define_insn "aarch64_get_lane<mode>"
3181 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3183 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3184 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3187 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3188 switch (which_alternative)
3191 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3193 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3195 return "st1\\t{%1.<Vetype>}[%2], %0";
3200 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3203 (define_insn "load_pair_lanes<mode>"
3204 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3206 (match_operand:VDC 1 "memory_operand" "Utq")
3207 (match_operand:VDC 2 "memory_operand" "m")))]
3208 "TARGET_SIMD && !STRICT_ALIGNMENT
3209 && rtx_equal_p (XEXP (operands[2], 0),
3210 plus_constant (Pmode,
3211 XEXP (operands[1], 0),
3212 GET_MODE_SIZE (<MODE>mode)))"
3214 [(set_attr "type" "neon_load1_1reg_q")]
3217 (define_insn "store_pair_lanes<mode>"
3218 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
3220 (match_operand:VDC 1 "register_operand" "w, r")
3221 (match_operand:VDC 2 "register_operand" "w, r")))]
3225 stp\\t%x1, %x2, %y0"
3226 [(set_attr "type" "neon_stp, store_16")]
3229 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3232 (define_insn "@aarch64_combinez<mode>"
3233 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3235 (match_operand:VDC 1 "general_operand" "w,?r,m")
3236 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3237 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3242 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3243 (set_attr "arch" "simd,fp,simd")]
3246 (define_insn "@aarch64_combinez_be<mode>"
3247 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3249 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3250 (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3251 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3256 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3257 (set_attr "arch" "simd,fp,simd")]
3260 (define_expand "aarch64_combine<mode>"
3261 [(match_operand:<VDBL> 0 "register_operand")
3262 (match_operand:VDC 1 "register_operand")
3263 (match_operand:VDC 2 "register_operand")]
3266 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3272 (define_expand "@aarch64_simd_combine<mode>"
3273 [(match_operand:<VDBL> 0 "register_operand")
3274 (match_operand:VDC 1 "register_operand")
3275 (match_operand:VDC 2 "register_operand")]
3278 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3279 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3282 [(set_attr "type" "multiple")]
3285 ;; <su><addsub>l<q>.
3287 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3288 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3289 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3290 (match_operand:VQW 1 "register_operand" "w")
3291 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3292 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3293 (match_operand:VQW 2 "register_operand" "w")
3296 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3297 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3300 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3301 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3302 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3303 (match_operand:VQW 1 "register_operand" "w")
3304 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3305 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3306 (match_operand:VQW 2 "register_operand" "w")
3309 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3310 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3314 (define_expand "aarch64_saddl2<mode>"
3315 [(match_operand:<VWIDE> 0 "register_operand")
3316 (match_operand:VQW 1 "register_operand")
3317 (match_operand:VQW 2 "register_operand")]
3320 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3321 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3326 (define_expand "aarch64_uaddl2<mode>"
3327 [(match_operand:<VWIDE> 0 "register_operand")
3328 (match_operand:VQW 1 "register_operand")
3329 (match_operand:VQW 2 "register_operand")]
3332 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3333 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3338 (define_expand "aarch64_ssubl2<mode>"
3339 [(match_operand:<VWIDE> 0 "register_operand")
3340 (match_operand:VQW 1 "register_operand")
3341 (match_operand:VQW 2 "register_operand")]
3344 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3345 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3350 (define_expand "aarch64_usubl2<mode>"
3351 [(match_operand:<VWIDE> 0 "register_operand")
3352 (match_operand:VQW 1 "register_operand")
3353 (match_operand:VQW 2 "register_operand")]
3356 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3357 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3362 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3363 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3364 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3365 (match_operand:VD_BHSI 1 "register_operand" "w"))
3367 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3369 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3370 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3373 ;; <su><addsub>w<q>.
3375 (define_expand "widen_ssum<mode>3"
3376 [(set (match_operand:<VDBLW> 0 "register_operand")
3377 (plus:<VDBLW> (sign_extend:<VDBLW>
3378 (match_operand:VQW 1 "register_operand"))
3379 (match_operand:<VDBLW> 2 "register_operand")))]
3382 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3383 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3385 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3387 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3392 (define_expand "widen_ssum<mode>3"
3393 [(set (match_operand:<VWIDE> 0 "register_operand")
3394 (plus:<VWIDE> (sign_extend:<VWIDE>
3395 (match_operand:VD_BHSI 1 "register_operand"))
3396 (match_operand:<VWIDE> 2 "register_operand")))]
3399 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3403 (define_expand "widen_usum<mode>3"
3404 [(set (match_operand:<VDBLW> 0 "register_operand")
3405 (plus:<VDBLW> (zero_extend:<VDBLW>
3406 (match_operand:VQW 1 "register_operand"))
3407 (match_operand:<VDBLW> 2 "register_operand")))]
3410 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3411 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3413 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3415 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3420 (define_expand "widen_usum<mode>3"
3421 [(set (match_operand:<VWIDE> 0 "register_operand")
3422 (plus:<VWIDE> (zero_extend:<VWIDE>
3423 (match_operand:VD_BHSI 1 "register_operand"))
3424 (match_operand:<VWIDE> 2 "register_operand")))]
3427 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3431 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
3432 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3433 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3435 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3437 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3438 [(set_attr "type" "neon_sub_widen")]
3441 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
3442 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3443 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3446 (match_operand:VQW 2 "register_operand" "w")
3447 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3449 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3450 [(set_attr "type" "neon_sub_widen")]
3453 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
3454 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3455 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3458 (match_operand:VQW 2 "register_operand" "w")
3459 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3461 "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3462 [(set_attr "type" "neon_sub_widen")]
3465 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
3466 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3468 (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
3469 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3471 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3472 [(set_attr "type" "neon_add_widen")]
3475 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
3476 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3480 (match_operand:VQW 2 "register_operand" "w")
3481 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3482 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3484 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3485 [(set_attr "type" "neon_add_widen")]
3488 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
3489 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3493 (match_operand:VQW 2 "register_operand" "w")
3494 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3495 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3497 "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3498 [(set_attr "type" "neon_add_widen")]
3501 (define_expand "aarch64_saddw2<mode>"
3502 [(match_operand:<VWIDE> 0 "register_operand")
3503 (match_operand:<VWIDE> 1 "register_operand")
3504 (match_operand:VQW 2 "register_operand")]
3507 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3508 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3513 (define_expand "aarch64_uaddw2<mode>"
3514 [(match_operand:<VWIDE> 0 "register_operand")
3515 (match_operand:<VWIDE> 1 "register_operand")
3516 (match_operand:VQW 2 "register_operand")]
3519 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3520 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3526 (define_expand "aarch64_ssubw2<mode>"
3527 [(match_operand:<VWIDE> 0 "register_operand")
3528 (match_operand:<VWIDE> 1 "register_operand")
3529 (match_operand:VQW 2 "register_operand")]
3532 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3533 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3538 (define_expand "aarch64_usubw2<mode>"
3539 [(match_operand:<VWIDE> 0 "register_operand")
3540 (match_operand:<VWIDE> 1 "register_operand")
3541 (match_operand:VQW 2 "register_operand")]
3544 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3545 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3550 ;; <su><r>h<addsub>.
3552 (define_expand "<u>avg<mode>3_floor"
3553 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3554 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3555 (match_operand:VDQ_BHSI 2 "register_operand")]
3560 (define_expand "<u>avg<mode>3_ceil"
3561 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3562 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3563 (match_operand:VDQ_BHSI 2 "register_operand")]
3568 (define_insn "aarch64_<sur>h<addsub><mode>"
3569 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3570 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3571 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3574 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3575 [(set_attr "type" "neon_<addsub>_halve<q>")]
3578 ;; <r><addsub>hn<q>.
3580 (define_insn "aarch64_<sur><addsub>hn<mode>"
3581 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3582 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3583 (match_operand:VQN 2 "register_operand" "w")]
3586 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3587 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3590 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3591 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3592 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3593 (match_operand:VQN 2 "register_operand" "w")
3594 (match_operand:VQN 3 "register_operand" "w")]
3597 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3598 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3603 (define_insn "aarch64_pmul<mode>"
3604 [(set (match_operand:VB 0 "register_operand" "=w")
3605 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3606 (match_operand:VB 2 "register_operand" "w")]
3609 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3610 [(set_attr "type" "neon_mul_<Vetype><q>")]
3615 (define_insn "aarch64_fmulx<mode>"
3616 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3618 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3619 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3622 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3623 [(set_attr "type" "neon_fp_mul_<stype>")]
3626 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3628 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3629 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3631 [(match_operand:VDQSF 1 "register_operand" "w")
3632 (vec_duplicate:VDQSF
3634 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3635 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3639 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3640 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3642 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3645 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3647 (define_insn "*aarch64_mulx_elt<mode>"
3648 [(set (match_operand:VDQF 0 "register_operand" "=w")
3650 [(match_operand:VDQF 1 "register_operand" "w")
3653 (match_operand:VDQF 2 "register_operand" "w")
3654 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3658 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3659 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3661 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3666 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3667 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3669 [(match_operand:VHSDF 1 "register_operand" "w")
3670 (vec_duplicate:VHSDF
3671 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3674 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3675 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3678 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3679 ;; vmulxd_lane_f64 == vmulx_lane_f64
3680 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3682 (define_insn "*aarch64_vgetfmulx<mode>"
3683 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3685 [(match_operand:<VEL> 1 "register_operand" "w")
3687 (match_operand:VDQF 2 "register_operand" "w")
3688 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3692 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3693 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3695 [(set_attr "type" "fmul<Vetype>")]
3699 (define_insn "aarch64_<su_optab><optab><mode>"
3700 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3701 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3702 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3704 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3705 [(set_attr "type" "neon_<optab><q>")]
3708 ;; suqadd and usqadd
3710 (define_insn "aarch64_<sur>qadd<mode>"
3711 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3712 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3713 (match_operand:VSDQ_I 2 "register_operand" "w")]
3716 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3717 [(set_attr "type" "neon_qadd<q>")]
3722 (define_insn "aarch64_sqmovun<mode>"
3723 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3724 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3727 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3728 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3731 ;; sqmovn and uqmovn
3733 (define_insn "aarch64_<sur>qmovn<mode>"
3734 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3735 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3738 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3739 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3744 (define_insn "aarch64_s<optab><mode>"
3745 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3747 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3749 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3750 [(set_attr "type" "neon_<optab><q>")]
3755 (define_insn "aarch64_sq<r>dmulh<mode>"
3756 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3758 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3759 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3762 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3763 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3768 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3769 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3771 [(match_operand:VDQHS 1 "register_operand" "w")
3773 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3774 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3778 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3779 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3780 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3783 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3784 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3786 [(match_operand:VDQHS 1 "register_operand" "w")
3788 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3789 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3793 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3794 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3795 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3798 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3799 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3801 [(match_operand:SD_HSI 1 "register_operand" "w")
3803 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3804 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3808 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3809 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3810 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3813 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3814 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3816 [(match_operand:SD_HSI 1 "register_operand" "w")
3818 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3819 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3823 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3824 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3825 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3830 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3831 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3833 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3834 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3835 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3838 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3839 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3842 ;; sqrdml[as]h_lane.
3844 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3845 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3847 [(match_operand:VDQHS 1 "register_operand" "0")
3848 (match_operand:VDQHS 2 "register_operand" "w")
3850 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3851 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3855 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3857 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3859 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3862 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3863 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3865 [(match_operand:SD_HSI 1 "register_operand" "0")
3866 (match_operand:SD_HSI 2 "register_operand" "w")
3868 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3869 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3873 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3875 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3877 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3880 ;; sqrdml[as]h_laneq.
3882 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3883 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3885 [(match_operand:VDQHS 1 "register_operand" "0")
3886 (match_operand:VDQHS 2 "register_operand" "w")
3888 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3889 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3893 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3895 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3897 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3900 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3901 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3903 [(match_operand:SD_HSI 1 "register_operand" "0")
3904 (match_operand:SD_HSI 2 "register_operand" "w")
3906 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3907 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3911 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3913 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3915 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3920 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3921 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3923 (match_operand:<VWIDE> 1 "register_operand" "0")
3926 (sign_extend:<VWIDE>
3927 (match_operand:VSD_HSI 2 "register_operand" "w"))
3928 (sign_extend:<VWIDE>
3929 (match_operand:VSD_HSI 3 "register_operand" "w")))
3932 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3933 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3938 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3939 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3941 (match_operand:<VWIDE> 1 "register_operand" "0")
3944 (sign_extend:<VWIDE>
3945 (match_operand:VD_HSI 2 "register_operand" "w"))
3946 (sign_extend:<VWIDE>
3947 (vec_duplicate:VD_HSI
3949 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3950 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3955 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3957 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3959 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3962 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3963 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3965 (match_operand:<VWIDE> 1 "register_operand" "0")
3968 (sign_extend:<VWIDE>
3969 (match_operand:VD_HSI 2 "register_operand" "w"))
3970 (sign_extend:<VWIDE>
3971 (vec_duplicate:VD_HSI
3973 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3974 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3979 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3981 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3983 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3986 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3987 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3989 (match_operand:<VWIDE> 1 "register_operand" "0")
3992 (sign_extend:<VWIDE>
3993 (match_operand:SD_HSI 2 "register_operand" "w"))
3994 (sign_extend:<VWIDE>
3996 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3997 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4002 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4004 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4006 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4009 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
4010 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4012 (match_operand:<VWIDE> 1 "register_operand" "0")
4015 (sign_extend:<VWIDE>
4016 (match_operand:SD_HSI 2 "register_operand" "w"))
4017 (sign_extend:<VWIDE>
4019 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4020 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4025 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4027 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4029 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4034 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
4035 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4037 (match_operand:<VWIDE> 1 "register_operand" "0")
4040 (sign_extend:<VWIDE>
4041 (match_operand:VD_HSI 2 "register_operand" "w"))
4042 (sign_extend:<VWIDE>
4043 (vec_duplicate:VD_HSI
4044 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4047 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4048 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4053 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
4054 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4056 (match_operand:<VWIDE> 1 "register_operand" "0")
4059 (sign_extend:<VWIDE>
4061 (match_operand:VQ_HSI 2 "register_operand" "w")
4062 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4063 (sign_extend:<VWIDE>
4065 (match_operand:VQ_HSI 3 "register_operand" "w")
4069 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4070 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4073 (define_expand "aarch64_sqdmlal2<mode>"
4074 [(match_operand:<VWIDE> 0 "register_operand")
4075 (match_operand:<VWIDE> 1 "register_operand")
4076 (match_operand:VQ_HSI 2 "register_operand")
4077 (match_operand:VQ_HSI 3 "register_operand")]
4080 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4081 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
4082 operands[2], operands[3], p));
4086 (define_expand "aarch64_sqdmlsl2<mode>"
4087 [(match_operand:<VWIDE> 0 "register_operand")
4088 (match_operand:<VWIDE> 1 "register_operand")
4089 (match_operand:VQ_HSI 2 "register_operand")
4090 (match_operand:VQ_HSI 3 "register_operand")]
4093 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4094 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
4095 operands[2], operands[3], p));
4101 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
4102 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4104 (match_operand:<VWIDE> 1 "register_operand" "0")
4107 (sign_extend:<VWIDE>
4109 (match_operand:VQ_HSI 2 "register_operand" "w")
4110 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4111 (sign_extend:<VWIDE>
4112 (vec_duplicate:<VHALF>
4114 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4115 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4120 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4122 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4124 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4127 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
4128 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4130 (match_operand:<VWIDE> 1 "register_operand" "0")
4133 (sign_extend:<VWIDE>
4135 (match_operand:VQ_HSI 2 "register_operand" "w")
4136 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4137 (sign_extend:<VWIDE>
4138 (vec_duplicate:<VHALF>
4140 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4141 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4146 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4148 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4150 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4153 (define_expand "aarch64_sqdmlal2_lane<mode>"
4154 [(match_operand:<VWIDE> 0 "register_operand")
4155 (match_operand:<VWIDE> 1 "register_operand")
4156 (match_operand:VQ_HSI 2 "register_operand")
4157 (match_operand:<VCOND> 3 "register_operand")
4158 (match_operand:SI 4 "immediate_operand")]
4161 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4162 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
4163 operands[2], operands[3],
4168 (define_expand "aarch64_sqdmlal2_laneq<mode>"
4169 [(match_operand:<VWIDE> 0 "register_operand")
4170 (match_operand:<VWIDE> 1 "register_operand")
4171 (match_operand:VQ_HSI 2 "register_operand")
4172 (match_operand:<VCONQ> 3 "register_operand")
4173 (match_operand:SI 4 "immediate_operand")]
4176 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4177 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
4178 operands[2], operands[3],
4183 (define_expand "aarch64_sqdmlsl2_lane<mode>"
4184 [(match_operand:<VWIDE> 0 "register_operand")
4185 (match_operand:<VWIDE> 1 "register_operand")
4186 (match_operand:VQ_HSI 2 "register_operand")
4187 (match_operand:<VCOND> 3 "register_operand")
4188 (match_operand:SI 4 "immediate_operand")]
4191 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4192 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
4193 operands[2], operands[3],
4198 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
4199 [(match_operand:<VWIDE> 0 "register_operand")
4200 (match_operand:<VWIDE> 1 "register_operand")
4201 (match_operand:VQ_HSI 2 "register_operand")
4202 (match_operand:<VCONQ> 3 "register_operand")
4203 (match_operand:SI 4 "immediate_operand")]
4206 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4207 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4208 operands[2], operands[3],
4213 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4214 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4216 (match_operand:<VWIDE> 1 "register_operand" "0")
4219 (sign_extend:<VWIDE>
4221 (match_operand:VQ_HSI 2 "register_operand" "w")
4222 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4223 (sign_extend:<VWIDE>
4224 (vec_duplicate:<VHALF>
4225 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4228 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4229 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4232 (define_expand "aarch64_sqdmlal2_n<mode>"
4233 [(match_operand:<VWIDE> 0 "register_operand")
4234 (match_operand:<VWIDE> 1 "register_operand")
4235 (match_operand:VQ_HSI 2 "register_operand")
4236 (match_operand:<VEL> 3 "register_operand")]
4239 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4240 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4241 operands[2], operands[3],
4246 (define_expand "aarch64_sqdmlsl2_n<mode>"
4247 [(match_operand:<VWIDE> 0 "register_operand")
4248 (match_operand:<VWIDE> 1 "register_operand")
4249 (match_operand:VQ_HSI 2 "register_operand")
4250 (match_operand:<VEL> 3 "register_operand")]
4253 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4254 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4255 operands[2], operands[3],
4262 (define_insn "aarch64_sqdmull<mode>"
4263 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4266 (sign_extend:<VWIDE>
4267 (match_operand:VSD_HSI 1 "register_operand" "w"))
4268 (sign_extend:<VWIDE>
4269 (match_operand:VSD_HSI 2 "register_operand" "w")))
4272 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4273 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4278 (define_insn "aarch64_sqdmull_lane<mode>"
4279 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4282 (sign_extend:<VWIDE>
4283 (match_operand:VD_HSI 1 "register_operand" "w"))
4284 (sign_extend:<VWIDE>
4285 (vec_duplicate:VD_HSI
4287 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4288 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4293 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4294 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4296 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4299 (define_insn "aarch64_sqdmull_laneq<mode>"
4300 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4303 (sign_extend:<VWIDE>
4304 (match_operand:VD_HSI 1 "register_operand" "w"))
4305 (sign_extend:<VWIDE>
4306 (vec_duplicate:VD_HSI
4308 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4309 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4314 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4315 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4317 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4320 (define_insn "aarch64_sqdmull_lane<mode>"
4321 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4324 (sign_extend:<VWIDE>
4325 (match_operand:SD_HSI 1 "register_operand" "w"))
4326 (sign_extend:<VWIDE>
4328 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4329 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4334 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4335 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4337 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4340 (define_insn "aarch64_sqdmull_laneq<mode>"
4341 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4344 (sign_extend:<VWIDE>
4345 (match_operand:SD_HSI 1 "register_operand" "w"))
4346 (sign_extend:<VWIDE>
4348 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4349 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4354 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4355 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4357 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4362 (define_insn "aarch64_sqdmull_n<mode>"
4363 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4366 (sign_extend:<VWIDE>
4367 (match_operand:VD_HSI 1 "register_operand" "w"))
4368 (sign_extend:<VWIDE>
4369 (vec_duplicate:VD_HSI
4370 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4374 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4375 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4382 (define_insn "aarch64_sqdmull2<mode>_internal"
4383 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4386 (sign_extend:<VWIDE>
4388 (match_operand:VQ_HSI 1 "register_operand" "w")
4389 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4390 (sign_extend:<VWIDE>
4392 (match_operand:VQ_HSI 2 "register_operand" "w")
4397 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4398 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4401 (define_expand "aarch64_sqdmull2<mode>"
4402 [(match_operand:<VWIDE> 0 "register_operand")
4403 (match_operand:VQ_HSI 1 "register_operand")
4404 (match_operand:VQ_HSI 2 "register_operand")]
4407 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4408 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4415 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4416 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4419 (sign_extend:<VWIDE>
4421 (match_operand:VQ_HSI 1 "register_operand" "w")
4422 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4423 (sign_extend:<VWIDE>
4424 (vec_duplicate:<VHALF>
4426 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4427 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4432 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4433 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4435 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4438 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4439 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4442 (sign_extend:<VWIDE>
4444 (match_operand:VQ_HSI 1 "register_operand" "w")
4445 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4446 (sign_extend:<VWIDE>
4447 (vec_duplicate:<VHALF>
4449 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4450 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4455 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4456 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4458 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4461 (define_expand "aarch64_sqdmull2_lane<mode>"
4462 [(match_operand:<VWIDE> 0 "register_operand")
4463 (match_operand:VQ_HSI 1 "register_operand")
4464 (match_operand:<VCOND> 2 "register_operand")
4465 (match_operand:SI 3 "immediate_operand")]
4468 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4469 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4470 operands[2], operands[3],
4475 (define_expand "aarch64_sqdmull2_laneq<mode>"
4476 [(match_operand:<VWIDE> 0 "register_operand")
4477 (match_operand:VQ_HSI 1 "register_operand")
4478 (match_operand:<VCONQ> 2 "register_operand")
4479 (match_operand:SI 3 "immediate_operand")]
4482 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4483 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4484 operands[2], operands[3],
4491 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4492 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4495 (sign_extend:<VWIDE>
4497 (match_operand:VQ_HSI 1 "register_operand" "w")
4498 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4499 (sign_extend:<VWIDE>
4500 (vec_duplicate:<VHALF>
4501 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4505 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4506 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4509 (define_expand "aarch64_sqdmull2_n<mode>"
4510 [(match_operand:<VWIDE> 0 "register_operand")
4511 (match_operand:VQ_HSI 1 "register_operand")
4512 (match_operand:<VEL> 2 "register_operand")]
4515 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4516 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4523 (define_insn "aarch64_<sur>shl<mode>"
4524 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4526 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4527 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4530 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4531 [(set_attr "type" "neon_shift_reg<q>")]
4537 (define_insn "aarch64_<sur>q<r>shl<mode>"
4538 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4540 [(match_operand:VSDQ_I 1 "register_operand" "w")
4541 (match_operand:VSDQ_I 2 "register_operand" "w")]
4544 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4545 [(set_attr "type" "neon_sat_shift_reg<q>")]
4550 (define_insn "aarch64_<sur>shll_n<mode>"
4551 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4552 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4554 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4558 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4559 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4561 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4563 [(set_attr "type" "neon_shift_imm_long")]
4568 (define_insn "aarch64_<sur>shll2_n<mode>"
4569 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4570 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4571 (match_operand:SI 2 "immediate_operand" "i")]
4575 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4576 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4578 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4580 [(set_attr "type" "neon_shift_imm_long")]
4585 (define_insn "aarch64_<sur>shr_n<mode>"
4586 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4587 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4589 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4592 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4593 [(set_attr "type" "neon_sat_shift_imm<q>")]
4598 (define_insn "aarch64_<sur>sra_n<mode>"
4599 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4600 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4601 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4603 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4606 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4607 [(set_attr "type" "neon_shift_acc<q>")]
4612 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4613 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4614 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4615 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4617 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4620 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4621 [(set_attr "type" "neon_shift_imm<q>")]
4626 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4627 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4628 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4630 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4633 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4634 [(set_attr "type" "neon_sat_shift_imm<q>")]
4640 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4641 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4642 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4644 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4647 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4648 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4652 ;; cm(eq|ge|gt|lt|le)
4653 ;; Note, we have constraints for Dz and Z as different expanders
4654 ;; have different ideas of what should be passed to this pattern.
4656 (define_insn "aarch64_cm<optab><mode>"
4657 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4659 (COMPARISONS:<V_INT_EQUIV>
4660 (match_operand:VDQ_I 1 "register_operand" "w,w")
4661 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4665 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4666 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4667 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4670 (define_insn_and_split "aarch64_cm<optab>di"
4671 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4674 (match_operand:DI 1 "register_operand" "w,w,r")
4675 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4677 (clobber (reg:CC CC_REGNUM))]
4680 "&& reload_completed"
4681 [(set (match_operand:DI 0 "register_operand")
4684 (match_operand:DI 1 "register_operand")
4685 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4688 /* If we are in the general purpose register file,
4689 we split to a sequence of comparison and store. */
4690 if (GP_REGNUM_P (REGNO (operands[0]))
4691 && GP_REGNUM_P (REGNO (operands[1])))
4693 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4694 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4695 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4696 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4699 /* Otherwise, we expand to a similar pattern which does not
4700 clobber CC_REGNUM. */
4702 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4705 (define_insn "*aarch64_cm<optab>di"
4706 [(set (match_operand:DI 0 "register_operand" "=w,w")
4709 (match_operand:DI 1 "register_operand" "w,w")
4710 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4712 "TARGET_SIMD && reload_completed"
4714 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4715 cm<optab>\t%d0, %d1, #0"
4716 [(set_attr "type" "neon_compare, neon_compare_zero")]
4721 (define_insn "aarch64_cm<optab><mode>"
4722 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4724 (UCOMPARISONS:<V_INT_EQUIV>
4725 (match_operand:VDQ_I 1 "register_operand" "w")
4726 (match_operand:VDQ_I 2 "register_operand" "w")
4729 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4730 [(set_attr "type" "neon_compare<q>")]
4733 (define_insn_and_split "aarch64_cm<optab>di"
4734 [(set (match_operand:DI 0 "register_operand" "=w,r")
4737 (match_operand:DI 1 "register_operand" "w,r")
4738 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4740 (clobber (reg:CC CC_REGNUM))]
4743 "&& reload_completed"
4744 [(set (match_operand:DI 0 "register_operand")
4747 (match_operand:DI 1 "register_operand")
4748 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4751 /* If we are in the general purpose register file,
4752 we split to a sequence of comparison and store. */
4753 if (GP_REGNUM_P (REGNO (operands[0]))
4754 && GP_REGNUM_P (REGNO (operands[1])))
4756 machine_mode mode = CCmode;
4757 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4758 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4759 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4762 /* Otherwise, we expand to a similar pattern which does not
4763 clobber CC_REGNUM. */
4765 [(set_attr "type" "neon_compare,multiple")]
4768 (define_insn "*aarch64_cm<optab>di"
4769 [(set (match_operand:DI 0 "register_operand" "=w")
4772 (match_operand:DI 1 "register_operand" "w")
4773 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4775 "TARGET_SIMD && reload_completed"
4776 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4777 [(set_attr "type" "neon_compare")]
4782 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4783 ;; we don't have any insns using ne, and aarch64_vcond outputs
4784 ;; not (neg (eq (and x y) 0))
4785 ;; which is rewritten by simplify_rtx as
4786 ;; plus (eq (and x y) 0) -1.
4788 (define_insn "aarch64_cmtst<mode>"
4789 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4793 (match_operand:VDQ_I 1 "register_operand" "w")
4794 (match_operand:VDQ_I 2 "register_operand" "w"))
4795 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4796 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4799 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4800 [(set_attr "type" "neon_tst<q>")]
4803 (define_insn_and_split "aarch64_cmtstdi"
4804 [(set (match_operand:DI 0 "register_operand" "=w,r")
4808 (match_operand:DI 1 "register_operand" "w,r")
4809 (match_operand:DI 2 "register_operand" "w,r"))
4811 (clobber (reg:CC CC_REGNUM))]
4814 "&& reload_completed"
4815 [(set (match_operand:DI 0 "register_operand")
4819 (match_operand:DI 1 "register_operand")
4820 (match_operand:DI 2 "register_operand"))
4823 /* If we are in the general purpose register file,
4824 we split to a sequence of comparison and store. */
4825 if (GP_REGNUM_P (REGNO (operands[0]))
4826 && GP_REGNUM_P (REGNO (operands[1])))
4828 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4829 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4830 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4831 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4832 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4835 /* Otherwise, we expand to a similar pattern which does not
4836 clobber CC_REGNUM. */
4838 [(set_attr "type" "neon_tst,multiple")]
4841 (define_insn "*aarch64_cmtstdi"
4842 [(set (match_operand:DI 0 "register_operand" "=w")
4846 (match_operand:DI 1 "register_operand" "w")
4847 (match_operand:DI 2 "register_operand" "w"))
4850 "cmtst\t%d0, %d1, %d2"
4851 [(set_attr "type" "neon_tst")]
4854 ;; fcm(eq|ge|gt|le|lt)
4856 (define_insn "aarch64_cm<optab><mode>"
4857 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4859 (COMPARISONS:<V_INT_EQUIV>
4860 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4861 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4865 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4866 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4867 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4871 ;; Note we can also handle what would be fac(le|lt) by
4872 ;; generating fac(ge|gt).
4874 (define_insn "aarch64_fac<optab><mode>"
4875 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4877 (FAC_COMPARISONS:<V_INT_EQUIV>
4879 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4881 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4884 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4885 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4890 (define_insn "aarch64_addp<mode>"
4891 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4893 [(match_operand:VD_BHSI 1 "register_operand" "w")
4894 (match_operand:VD_BHSI 2 "register_operand" "w")]
4897 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4898 [(set_attr "type" "neon_reduc_add<q>")]
4901 (define_insn "aarch64_addpdi"
4902 [(set (match_operand:DI 0 "register_operand" "=w")
4904 [(match_operand:V2DI 1 "register_operand" "w")]
4908 [(set_attr "type" "neon_reduc_add")]
4913 (define_expand "sqrt<mode>2"
4914 [(set (match_operand:VHSDF 0 "register_operand")
4915 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))]
4918 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4922 (define_insn "*sqrt<mode>2"
4923 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4924 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4926 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4927 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4930 ;; Patterns for vector struct loads and stores.
4932 (define_insn "aarch64_simd_ld2<mode>"
4933 [(set (match_operand:OI 0 "register_operand" "=w")
4934 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4935 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4938 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4939 [(set_attr "type" "neon_load2_2reg<q>")]
4942 (define_insn "aarch64_simd_ld2r<mode>"
4943 [(set (match_operand:OI 0 "register_operand" "=w")
4944 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4945 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4948 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4949 [(set_attr "type" "neon_load2_all_lanes<q>")]
4952 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4953 [(set (match_operand:OI 0 "register_operand" "=w")
4954 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4955 (match_operand:OI 2 "register_operand" "0")
4956 (match_operand:SI 3 "immediate_operand" "i")
4957 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4961 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4962 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4964 [(set_attr "type" "neon_load2_one_lane")]
4967 (define_expand "vec_load_lanesoi<mode>"
4968 [(set (match_operand:OI 0 "register_operand")
4969 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand")
4970 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4974 if (BYTES_BIG_ENDIAN)
4976 rtx tmp = gen_reg_rtx (OImode);
4977 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4978 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4979 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4982 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4986 (define_insn "aarch64_simd_st2<mode>"
4987 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4988 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4989 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4992 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4993 [(set_attr "type" "neon_store2_2reg<q>")]
4996 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4997 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4998 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4999 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5000 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5001 (match_operand:SI 2 "immediate_operand" "i")]
5005 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5006 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
5008 [(set_attr "type" "neon_store2_one_lane<q>")]
5011 (define_expand "vec_store_lanesoi<mode>"
5012 [(set (match_operand:OI 0 "aarch64_simd_struct_operand")
5013 (unspec:OI [(match_operand:OI 1 "register_operand")
5014 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5018 if (BYTES_BIG_ENDIAN)
5020 rtx tmp = gen_reg_rtx (OImode);
5021 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5022 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
5023 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
5026 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
5030 (define_insn "aarch64_simd_ld3<mode>"
5031 [(set (match_operand:CI 0 "register_operand" "=w")
5032 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5033 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5036 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5037 [(set_attr "type" "neon_load3_3reg<q>")]
5040 (define_insn "aarch64_simd_ld3r<mode>"
5041 [(set (match_operand:CI 0 "register_operand" "=w")
5042 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5043 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5046 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5047 [(set_attr "type" "neon_load3_all_lanes<q>")]
5050 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
5051 [(set (match_operand:CI 0 "register_operand" "=w")
5052 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5053 (match_operand:CI 2 "register_operand" "0")
5054 (match_operand:SI 3 "immediate_operand" "i")
5055 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5059 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5060 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
5062 [(set_attr "type" "neon_load3_one_lane")]
5065 (define_expand "vec_load_lanesci<mode>"
5066 [(set (match_operand:CI 0 "register_operand")
5067 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand")
5068 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5072 if (BYTES_BIG_ENDIAN)
5074 rtx tmp = gen_reg_rtx (CImode);
5075 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5076 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
5077 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
5080 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
5084 (define_insn "aarch64_simd_st3<mode>"
5085 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5086 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
5087 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5090 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5091 [(set_attr "type" "neon_store3_3reg<q>")]
5094 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5095 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
5096 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5097 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5098 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5099 (match_operand:SI 2 "immediate_operand" "i")]
5103 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5104 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
5106 [(set_attr "type" "neon_store3_one_lane<q>")]
5109 (define_expand "vec_store_lanesci<mode>"
5110 [(set (match_operand:CI 0 "aarch64_simd_struct_operand")
5111 (unspec:CI [(match_operand:CI 1 "register_operand")
5112 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5116 if (BYTES_BIG_ENDIAN)
5118 rtx tmp = gen_reg_rtx (CImode);
5119 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5120 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
5121 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
5124 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
5128 (define_insn "aarch64_simd_ld4<mode>"
5129 [(set (match_operand:XI 0 "register_operand" "=w")
5130 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5131 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5134 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5135 [(set_attr "type" "neon_load4_4reg<q>")]
5138 (define_insn "aarch64_simd_ld4r<mode>"
5139 [(set (match_operand:XI 0 "register_operand" "=w")
5140 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5141 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5144 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5145 [(set_attr "type" "neon_load4_all_lanes<q>")]
5148 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
5149 [(set (match_operand:XI 0 "register_operand" "=w")
5150 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5151 (match_operand:XI 2 "register_operand" "0")
5152 (match_operand:SI 3 "immediate_operand" "i")
5153 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5157 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5158 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
5160 [(set_attr "type" "neon_load4_one_lane")]
5163 (define_expand "vec_load_lanesxi<mode>"
5164 [(set (match_operand:XI 0 "register_operand")
5165 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand")
5166 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5170 if (BYTES_BIG_ENDIAN)
5172 rtx tmp = gen_reg_rtx (XImode);
5173 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5174 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
5175 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
5178 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
5182 (define_insn "aarch64_simd_st4<mode>"
5183 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5184 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5185 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5188 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5189 [(set_attr "type" "neon_store4_4reg<q>")]
5192 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5193 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
5194 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5195 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5196 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5197 (match_operand:SI 2 "immediate_operand" "i")]
5201 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5202 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5204 [(set_attr "type" "neon_store4_one_lane<q>")]
5207 (define_expand "vec_store_lanesxi<mode>"
5208 [(set (match_operand:XI 0 "aarch64_simd_struct_operand")
5209 (unspec:XI [(match_operand:XI 1 "register_operand")
5210 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5214 if (BYTES_BIG_ENDIAN)
5216 rtx tmp = gen_reg_rtx (XImode);
5217 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5218 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5219 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5222 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5226 (define_insn_and_split "aarch64_rev_reglist<mode>"
5227 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5229 [(match_operand:VSTRUCT 1 "register_operand" "w")
5230 (match_operand:V16QI 2 "register_operand" "w")]
5231 UNSPEC_REV_REGLIST))]
5234 "&& reload_completed"
5238 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5239 for (i = 0; i < nregs; i++)
5241 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5242 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5243 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5247 [(set_attr "type" "neon_tbl1_q")
5248 (set_attr "length" "<insn_count>")]
5251 ;; Reload patterns for AdvSIMD register list operands.
5253 (define_expand "mov<mode>"
5254 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
5255 (match_operand:VSTRUCT 1 "general_operand"))]
5258 if (can_create_pseudo_p ())
5260 if (GET_CODE (operands[0]) != REG)
5261 operands[1] = force_reg (<MODE>mode, operands[1]);
5266 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5267 [(match_operand:CI 0 "register_operand")
5268 (match_operand:DI 1 "register_operand")
5269 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5272 rtx mem = gen_rtx_MEM (CImode, operands[1]);
5273 emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5277 (define_insn "aarch64_ld1_x3_<mode>"
5278 [(set (match_operand:CI 0 "register_operand" "=w")
5280 [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5281 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5283 "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5284 [(set_attr "type" "neon_load1_3reg<q>")]
5287 (define_expand "aarch64_ld1x4<VALLDIF:mode>"
5288 [(match_operand:XI 0 "register_operand" "=w")
5289 (match_operand:DI 1 "register_operand" "r")
5290 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5293 rtx mem = gen_rtx_MEM (XImode, operands[1]);
5294 emit_insn (gen_aarch64_ld1_x4_<VALLDIF:mode> (operands[0], mem));
5298 (define_insn "aarch64_ld1_x4_<mode>"
5299 [(set (match_operand:XI 0 "register_operand" "=w")
5301 [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5302 (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)]
5305 "ld1\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5306 [(set_attr "type" "neon_load1_4reg<q>")]
5309 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5310 [(match_operand:DI 0 "register_operand")
5311 (match_operand:OI 1 "register_operand")
5312 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5315 rtx mem = gen_rtx_MEM (OImode, operands[0]);
5316 emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5320 (define_insn "aarch64_st1_x2_<mode>"
5321 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5323 [(match_operand:OI 1 "register_operand" "w")
5324 (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5326 "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5327 [(set_attr "type" "neon_store1_2reg<q>")]
5330 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5331 [(match_operand:DI 0 "register_operand")
5332 (match_operand:CI 1 "register_operand")
5333 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5336 rtx mem = gen_rtx_MEM (CImode, operands[0]);
5337 emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5341 (define_insn "aarch64_st1_x3_<mode>"
5342 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5344 [(match_operand:CI 1 "register_operand" "w")
5345 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5347 "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5348 [(set_attr "type" "neon_store1_3reg<q>")]
5351 (define_expand "aarch64_st1x4<VALLDIF:mode>"
5352 [(match_operand:DI 0 "register_operand" "")
5353 (match_operand:XI 1 "register_operand" "")
5354 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5357 rtx mem = gen_rtx_MEM (XImode, operands[0]);
5358 emit_insn (gen_aarch64_st1_x4_<VALLDIF:mode> (mem, operands[1]));
5362 (define_insn "aarch64_st1_x4_<mode>"
5363 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5365 [(match_operand:XI 1 "register_operand" "w")
5366 (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)]
5369 "st1\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5370 [(set_attr "type" "neon_store1_4reg<q>")]
5373 (define_insn "*aarch64_mov<mode>"
5374 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5375 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5376 "TARGET_SIMD && !BYTES_BIG_ENDIAN
5377 && (register_operand (operands[0], <MODE>mode)
5378 || register_operand (operands[1], <MODE>mode))"
5381 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5382 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5383 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5384 neon_load<nregs>_<nregs>reg_q")
5385 (set_attr "length" "<insn_count>,4,4")]
5388 (define_insn "aarch64_be_ld1<mode>"
5389 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
5390 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5391 "aarch64_simd_struct_operand" "Utv")]
5394 "ld1\\t{%0<Vmtype>}, %1"
5395 [(set_attr "type" "neon_load1_1reg<q>")]
5398 (define_insn "aarch64_be_st1<mode>"
5399 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5400 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5403 "st1\\t{%1<Vmtype>}, %0"
5404 [(set_attr "type" "neon_store1_1reg<q>")]
5407 (define_insn "*aarch64_be_movoi"
5408 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5409 (match_operand:OI 1 "general_operand" " w,w,m"))]
5410 "TARGET_SIMD && BYTES_BIG_ENDIAN
5411 && (register_operand (operands[0], OImode)
5412 || register_operand (operands[1], OImode))"
5417 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5418 (set_attr "length" "8,4,4")]
5421 (define_insn "*aarch64_be_movci"
5422 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5423 (match_operand:CI 1 "general_operand" " w,w,o"))]
5424 "TARGET_SIMD && BYTES_BIG_ENDIAN
5425 && (register_operand (operands[0], CImode)
5426 || register_operand (operands[1], CImode))"
5428 [(set_attr "type" "multiple")
5429 (set_attr "length" "12,4,4")]
5432 (define_insn "*aarch64_be_movxi"
5433 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5434 (match_operand:XI 1 "general_operand" " w,w,o"))]
5435 "TARGET_SIMD && BYTES_BIG_ENDIAN
5436 && (register_operand (operands[0], XImode)
5437 || register_operand (operands[1], XImode))"
5439 [(set_attr "type" "multiple")
5440 (set_attr "length" "16,4,4")]
5444 [(set (match_operand:OI 0 "register_operand")
5445 (match_operand:OI 1 "register_operand"))]
5446 "TARGET_SIMD && reload_completed"
5449 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5454 [(set (match_operand:CI 0 "nonimmediate_operand")
5455 (match_operand:CI 1 "general_operand"))]
5456 "TARGET_SIMD && reload_completed"
5459 if (register_operand (operands[0], CImode)
5460 && register_operand (operands[1], CImode))
5462 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5465 else if (BYTES_BIG_ENDIAN)
5467 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5468 simplify_gen_subreg (OImode, operands[1], CImode, 0));
5469 emit_move_insn (gen_lowpart (V16QImode,
5470 simplify_gen_subreg (TImode, operands[0],
5472 gen_lowpart (V16QImode,
5473 simplify_gen_subreg (TImode, operands[1],
5482 [(set (match_operand:XI 0 "nonimmediate_operand")
5483 (match_operand:XI 1 "general_operand"))]
5484 "TARGET_SIMD && reload_completed"
5487 if (register_operand (operands[0], XImode)
5488 && register_operand (operands[1], XImode))
5490 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5493 else if (BYTES_BIG_ENDIAN)
5495 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5496 simplify_gen_subreg (OImode, operands[1], XImode, 0));
5497 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5498 simplify_gen_subreg (OImode, operands[1], XImode, 32));
5505 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5506 [(match_operand:VSTRUCT 0 "register_operand")
5507 (match_operand:DI 1 "register_operand")
5508 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5511 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5512 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5515 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5520 (define_insn "aarch64_ld2<mode>_dreg"
5521 [(set (match_operand:OI 0 "register_operand" "=w")
5522 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5523 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5526 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5527 [(set_attr "type" "neon_load2_2reg<q>")]
5530 (define_insn "aarch64_ld2<mode>_dreg"
5531 [(set (match_operand:OI 0 "register_operand" "=w")
5532 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5533 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5536 "ld1\\t{%S0.1d - %T0.1d}, %1"
5537 [(set_attr "type" "neon_load1_2reg<q>")]
5540 (define_insn "aarch64_ld3<mode>_dreg"
5541 [(set (match_operand:CI 0 "register_operand" "=w")
5542 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5543 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5546 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5547 [(set_attr "type" "neon_load3_3reg<q>")]
5550 (define_insn "aarch64_ld3<mode>_dreg"
5551 [(set (match_operand:CI 0 "register_operand" "=w")
5552 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5553 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5556 "ld1\\t{%S0.1d - %U0.1d}, %1"
5557 [(set_attr "type" "neon_load1_3reg<q>")]
5560 (define_insn "aarch64_ld4<mode>_dreg"
5561 [(set (match_operand:XI 0 "register_operand" "=w")
5562 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5563 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5566 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5567 [(set_attr "type" "neon_load4_4reg<q>")]
5570 (define_insn "aarch64_ld4<mode>_dreg"
5571 [(set (match_operand:XI 0 "register_operand" "=w")
5572 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5573 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5576 "ld1\\t{%S0.1d - %V0.1d}, %1"
5577 [(set_attr "type" "neon_load1_4reg<q>")]
5580 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5581 [(match_operand:VSTRUCT 0 "register_operand")
5582 (match_operand:DI 1 "register_operand")
5583 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5586 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5587 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5589 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5593 (define_expand "aarch64_ld1<VALL_F16:mode>"
5594 [(match_operand:VALL_F16 0 "register_operand")
5595 (match_operand:DI 1 "register_operand")]
5598 machine_mode mode = <VALL_F16:MODE>mode;
5599 rtx mem = gen_rtx_MEM (mode, operands[1]);
5601 if (BYTES_BIG_ENDIAN)
5602 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5604 emit_move_insn (operands[0], mem);
5608 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5609 [(match_operand:VSTRUCT 0 "register_operand")
5610 (match_operand:DI 1 "register_operand")
5611 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5614 machine_mode mode = <VSTRUCT:MODE>mode;
5615 rtx mem = gen_rtx_MEM (mode, operands[1]);
5617 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5621 (define_expand "aarch64_ld1x2<VQ:mode>"
5622 [(match_operand:OI 0 "register_operand")
5623 (match_operand:DI 1 "register_operand")
5624 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5627 machine_mode mode = OImode;
5628 rtx mem = gen_rtx_MEM (mode, operands[1]);
5630 emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5634 (define_expand "aarch64_ld1x2<VDC:mode>"
5635 [(match_operand:OI 0 "register_operand")
5636 (match_operand:DI 1 "register_operand")
5637 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5640 machine_mode mode = OImode;
5641 rtx mem = gen_rtx_MEM (mode, operands[1]);
5643 emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5648 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5649 [(match_operand:VSTRUCT 0 "register_operand")
5650 (match_operand:DI 1 "register_operand")
5651 (match_operand:VSTRUCT 2 "register_operand")
5652 (match_operand:SI 3 "immediate_operand")
5653 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5656 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5657 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5660 aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5661 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5662 operands[0], mem, operands[2], operands[3]));
5666 ;; Expanders for builtins to extract vector registers from large
5667 ;; opaque integer modes.
5671 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5672 [(match_operand:VDC 0 "register_operand")
5673 (match_operand:VSTRUCT 1 "register_operand")
5674 (match_operand:SI 2 "immediate_operand")]
5677 int part = INTVAL (operands[2]);
5678 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5679 int offset = part * 16;
5681 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5682 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5688 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5689 [(match_operand:VQ 0 "register_operand")
5690 (match_operand:VSTRUCT 1 "register_operand")
5691 (match_operand:SI 2 "immediate_operand")]
5694 int part = INTVAL (operands[2]);
5695 int offset = part * 16;
5697 emit_move_insn (operands[0],
5698 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5702 ;; Permuted-store expanders for neon intrinsics.
5704 ;; Permute instructions
5708 (define_expand "vec_perm<mode>"
5709 [(match_operand:VB 0 "register_operand")
5710 (match_operand:VB 1 "register_operand")
5711 (match_operand:VB 2 "register_operand")
5712 (match_operand:VB 3 "register_operand")]
5715 aarch64_expand_vec_perm (operands[0], operands[1],
5716 operands[2], operands[3], <nunits>);
5720 (define_insn "aarch64_tbl1<mode>"
5721 [(set (match_operand:VB 0 "register_operand" "=w")
5722 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5723 (match_operand:VB 2 "register_operand" "w")]
5726 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5727 [(set_attr "type" "neon_tbl1<q>")]
5730 ;; Two source registers.
5732 (define_insn "aarch64_tbl2v16qi"
5733 [(set (match_operand:V16QI 0 "register_operand" "=w")
5734 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5735 (match_operand:V16QI 2 "register_operand" "w")]
5738 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5739 [(set_attr "type" "neon_tbl2_q")]
5742 (define_insn "aarch64_tbl3<mode>"
5743 [(set (match_operand:VB 0 "register_operand" "=w")
5744 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5745 (match_operand:VB 2 "register_operand" "w")]
5748 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5749 [(set_attr "type" "neon_tbl3")]
5752 (define_insn "aarch64_tbx4<mode>"
5753 [(set (match_operand:VB 0 "register_operand" "=w")
5754 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5755 (match_operand:OI 2 "register_operand" "w")
5756 (match_operand:VB 3 "register_operand" "w")]
5759 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5760 [(set_attr "type" "neon_tbl4")]
5763 ;; Three source registers.
5765 (define_insn "aarch64_qtbl3<mode>"
5766 [(set (match_operand:VB 0 "register_operand" "=w")
5767 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5768 (match_operand:VB 2 "register_operand" "w")]
5771 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5772 [(set_attr "type" "neon_tbl3")]
5775 (define_insn "aarch64_qtbx3<mode>"
5776 [(set (match_operand:VB 0 "register_operand" "=w")
5777 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5778 (match_operand:CI 2 "register_operand" "w")
5779 (match_operand:VB 3 "register_operand" "w")]
5782 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5783 [(set_attr "type" "neon_tbl3")]
5786 ;; Four source registers.
5788 (define_insn "aarch64_qtbl4<mode>"
5789 [(set (match_operand:VB 0 "register_operand" "=w")
5790 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5791 (match_operand:VB 2 "register_operand" "w")]
5794 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5795 [(set_attr "type" "neon_tbl4")]
5798 (define_insn "aarch64_qtbx4<mode>"
5799 [(set (match_operand:VB 0 "register_operand" "=w")
5800 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5801 (match_operand:XI 2 "register_operand" "w")
5802 (match_operand:VB 3 "register_operand" "w")]
5805 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5806 [(set_attr "type" "neon_tbl4")]
5809 (define_insn_and_split "aarch64_combinev16qi"
5810 [(set (match_operand:OI 0 "register_operand" "=w")
5811 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5812 (match_operand:V16QI 2 "register_operand" "w")]
5816 "&& reload_completed"
5819 aarch64_split_combinev16qi (operands);
5822 [(set_attr "type" "multiple")]
5825 ;; This instruction's pattern is generated directly by
5826 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5827 ;; need corresponding changes there.
5828 (define_insn "aarch64_<PERMUTE:perm_insn><mode>"
5829 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5830 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5831 (match_operand:VALL_F16 2 "register_operand" "w")]
5834 "<PERMUTE:perm_insn>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5835 [(set_attr "type" "neon_permute<q>")]
5838 ;; This instruction's pattern is generated directly by
5839 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5840 ;; need corresponding changes there. Note that the immediate (third)
5841 ;; operand is a lane index not a byte index.
5842 (define_insn "aarch64_ext<mode>"
5843 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5844 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5845 (match_operand:VALL_F16 2 "register_operand" "w")
5846 (match_operand:SI 3 "immediate_operand" "i")]
5850 operands[3] = GEN_INT (INTVAL (operands[3])
5851 * GET_MODE_UNIT_SIZE (<MODE>mode));
5852 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5854 [(set_attr "type" "neon_ext<q>")]
5857 ;; This instruction's pattern is generated directly by
5858 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5859 ;; need corresponding changes there.
5860 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5861 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5862 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5865 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5866 [(set_attr "type" "neon_rev<q>")]
5869 (define_insn "aarch64_st2<mode>_dreg"
5870 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5871 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5872 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5875 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5876 [(set_attr "type" "neon_store2_2reg")]
5879 (define_insn "aarch64_st2<mode>_dreg"
5880 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5881 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5882 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5885 "st1\\t{%S1.1d - %T1.1d}, %0"
5886 [(set_attr "type" "neon_store1_2reg")]
5889 (define_insn "aarch64_st3<mode>_dreg"
5890 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5891 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5892 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5895 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5896 [(set_attr "type" "neon_store3_3reg")]
5899 (define_insn "aarch64_st3<mode>_dreg"
5900 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5901 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5902 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5905 "st1\\t{%S1.1d - %U1.1d}, %0"
5906 [(set_attr "type" "neon_store1_3reg")]
5909 (define_insn "aarch64_st4<mode>_dreg"
5910 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5911 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5912 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5915 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5916 [(set_attr "type" "neon_store4_4reg")]
5919 (define_insn "aarch64_st4<mode>_dreg"
5920 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5921 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5922 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5925 "st1\\t{%S1.1d - %V1.1d}, %0"
5926 [(set_attr "type" "neon_store1_4reg")]
5929 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5930 [(match_operand:DI 0 "register_operand")
5931 (match_operand:VSTRUCT 1 "register_operand")
5932 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5935 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5936 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5938 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5942 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5943 [(match_operand:DI 0 "register_operand")
5944 (match_operand:VSTRUCT 1 "register_operand")
5945 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5948 machine_mode mode = <VSTRUCT:MODE>mode;
5949 rtx mem = gen_rtx_MEM (mode, operands[0]);
5951 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5955 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5956 [(match_operand:DI 0 "register_operand")
5957 (match_operand:VSTRUCT 1 "register_operand")
5958 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5959 (match_operand:SI 2 "immediate_operand")]
5962 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5963 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5966 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5967 mem, operands[1], operands[2]));
5971 (define_expand "aarch64_st1<VALL_F16:mode>"
5972 [(match_operand:DI 0 "register_operand")
5973 (match_operand:VALL_F16 1 "register_operand")]
5976 machine_mode mode = <VALL_F16:MODE>mode;
5977 rtx mem = gen_rtx_MEM (mode, operands[0]);
5979 if (BYTES_BIG_ENDIAN)
5980 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5982 emit_move_insn (mem, operands[1]);
5986 ;; Expander for builtins to insert vector registers into large
5987 ;; opaque integer modes.
5989 ;; Q-register list. We don't need a D-reg inserter as we zero
5990 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5992 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5993 [(match_operand:VSTRUCT 0 "register_operand")
5994 (match_operand:VSTRUCT 1 "register_operand")
5995 (match_operand:VQ 2 "register_operand")
5996 (match_operand:SI 3 "immediate_operand")]
5999 int part = INTVAL (operands[3]);
6000 int offset = part * 16;
6002 emit_move_insn (operands[0], operands[1]);
6003 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
6008 ;; Standard pattern name vec_init<mode><Vel>.
6010 (define_expand "vec_init<mode><Vel>"
6011 [(match_operand:VALL_F16 0 "register_operand")
6012 (match_operand 1 "" "")]
6015 aarch64_expand_vector_init (operands[0], operands[1]);
6019 (define_expand "vec_init<mode><Vhalf>"
6020 [(match_operand:VQ_NO2E 0 "register_operand")
6021 (match_operand 1 "" "")]
6024 aarch64_expand_vector_init (operands[0], operands[1]);
6028 (define_insn "*aarch64_simd_ld1r<mode>"
6029 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
6030 (vec_duplicate:VALL_F16
6031 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
6033 "ld1r\\t{%0.<Vtype>}, %1"
6034 [(set_attr "type" "neon_load1_all_lanes")]
6037 (define_insn "aarch64_simd_ld1<mode>_x2"
6038 [(set (match_operand:OI 0 "register_operand" "=w")
6039 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
6040 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6043 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
6044 [(set_attr "type" "neon_load1_2reg<q>")]
6047 (define_insn "aarch64_simd_ld1<mode>_x2"
6048 [(set (match_operand:OI 0 "register_operand" "=w")
6049 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
6050 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6053 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
6054 [(set_attr "type" "neon_load1_2reg<q>")]
6058 (define_insn "@aarch64_frecpe<mode>"
6059 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
6061 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
6064 "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
6065 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
6068 (define_insn "aarch64_frecpx<mode>"
6069 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
6070 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
6073 "frecpx\t%<s>0, %<s>1"
6074 [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
6077 (define_insn "@aarch64_frecps<mode>"
6078 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
6080 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
6081 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
6084 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6085 [(set_attr "type" "neon_fp_recps_<stype><q>")]
6088 (define_insn "aarch64_urecpe<mode>"
6089 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
6090 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
6093 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
6094 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
6096 ;; Standard pattern name vec_extract<mode><Vel>.
6098 (define_expand "vec_extract<mode><Vel>"
6099 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
6100 (match_operand:VALL_F16 1 "register_operand")
6101 (match_operand:SI 2 "immediate_operand")]
6105 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
6111 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
6112 [(set (match_operand:V16QI 0 "register_operand" "=w")
6115 (match_operand:V16QI 1 "register_operand" "%0")
6116 (match_operand:V16QI 2 "register_operand" "w"))]
6118 "TARGET_SIMD && TARGET_AES"
6119 "aes<aes_op>\\t%0.16b, %2.16b"
6120 [(set_attr "type" "crypto_aese")]
6123 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
6124 [(set (match_operand:V16QI 0 "register_operand" "=w")
6125 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
6127 "TARGET_SIMD && TARGET_AES"
6128 "aes<aesmc_op>\\t%0.16b, %1.16b"
6129 [(set_attr "type" "crypto_aesmc")]
6132 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
6133 ;; and enforce the register dependency without scheduling or register
6134 ;; allocation messing up the order or introducing moves inbetween.
6135 ;; Mash the two together during combine.
6137 (define_insn "*aarch64_crypto_aese_fused"
6138 [(set (match_operand:V16QI 0 "register_operand" "=w")
6142 (match_operand:V16QI 1 "register_operand" "%0")
6143 (match_operand:V16QI 2 "register_operand" "w"))]
6146 "TARGET_SIMD && TARGET_AES
6147 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6148 "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
6149 [(set_attr "type" "crypto_aese")
6150 (set_attr "length" "8")]
6153 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
6154 ;; and enforce the register dependency without scheduling or register
6155 ;; allocation messing up the order or introducing moves inbetween.
6156 ;; Mash the two together during combine.
6158 (define_insn "*aarch64_crypto_aesd_fused"
6159 [(set (match_operand:V16QI 0 "register_operand" "=w")
6163 (match_operand:V16QI 1 "register_operand" "%0")
6164 (match_operand:V16QI 2 "register_operand" "w"))]
6167 "TARGET_SIMD && TARGET_AES
6168 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6169 "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
6170 [(set_attr "type" "crypto_aese")
6171 (set_attr "length" "8")]
6176 (define_insn "aarch64_crypto_sha1hsi"
6177 [(set (match_operand:SI 0 "register_operand" "=w")
6178 (unspec:SI [(match_operand:SI 1
6179 "register_operand" "w")]
6181 "TARGET_SIMD && TARGET_SHA2"
6183 [(set_attr "type" "crypto_sha1_fast")]
6186 (define_insn "aarch64_crypto_sha1hv4si"
6187 [(set (match_operand:SI 0 "register_operand" "=w")
6188 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6189 (parallel [(const_int 0)]))]
6191 "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
6193 [(set_attr "type" "crypto_sha1_fast")]
6196 (define_insn "aarch64_be_crypto_sha1hv4si"
6197 [(set (match_operand:SI 0 "register_operand" "=w")
6198 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6199 (parallel [(const_int 3)]))]
6201 "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
6203 [(set_attr "type" "crypto_sha1_fast")]
6206 (define_insn "aarch64_crypto_sha1su1v4si"
6207 [(set (match_operand:V4SI 0 "register_operand" "=w")
6208 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6209 (match_operand:V4SI 2 "register_operand" "w")]
6211 "TARGET_SIMD && TARGET_SHA2"
6212 "sha1su1\\t%0.4s, %2.4s"
6213 [(set_attr "type" "crypto_sha1_fast")]
6216 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
6217 [(set (match_operand:V4SI 0 "register_operand" "=w")
6218 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6219 (match_operand:SI 2 "register_operand" "w")
6220 (match_operand:V4SI 3 "register_operand" "w")]
6222 "TARGET_SIMD && TARGET_SHA2"
6223 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
6224 [(set_attr "type" "crypto_sha1_slow")]
6227 (define_insn "aarch64_crypto_sha1su0v4si"
6228 [(set (match_operand:V4SI 0 "register_operand" "=w")
6229 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6230 (match_operand:V4SI 2 "register_operand" "w")
6231 (match_operand:V4SI 3 "register_operand" "w")]
6233 "TARGET_SIMD && TARGET_SHA2"
6234 "sha1su0\\t%0.4s, %2.4s, %3.4s"
6235 [(set_attr "type" "crypto_sha1_xor")]
6240 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6241 [(set (match_operand:V4SI 0 "register_operand" "=w")
6242 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6243 (match_operand:V4SI 2 "register_operand" "w")
6244 (match_operand:V4SI 3 "register_operand" "w")]
6246 "TARGET_SIMD && TARGET_SHA2"
6247 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
6248 [(set_attr "type" "crypto_sha256_slow")]
6251 (define_insn "aarch64_crypto_sha256su0v4si"
6252 [(set (match_operand:V4SI 0 "register_operand" "=w")
6253 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6254 (match_operand:V4SI 2 "register_operand" "w")]
6256 "TARGET_SIMD && TARGET_SHA2"
6257 "sha256su0\\t%0.4s, %2.4s"
6258 [(set_attr "type" "crypto_sha256_fast")]
6261 (define_insn "aarch64_crypto_sha256su1v4si"
6262 [(set (match_operand:V4SI 0 "register_operand" "=w")
6263 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6264 (match_operand:V4SI 2 "register_operand" "w")
6265 (match_operand:V4SI 3 "register_operand" "w")]
6267 "TARGET_SIMD && TARGET_SHA2"
6268 "sha256su1\\t%0.4s, %2.4s, %3.4s"
6269 [(set_attr "type" "crypto_sha256_slow")]
6274 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6275 [(set (match_operand:V2DI 0 "register_operand" "=w")
6276 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6277 (match_operand:V2DI 2 "register_operand" "w")
6278 (match_operand:V2DI 3 "register_operand" "w")]
6280 "TARGET_SIMD && TARGET_SHA3"
6281 "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6282 [(set_attr "type" "crypto_sha512")]
6285 (define_insn "aarch64_crypto_sha512su0qv2di"
6286 [(set (match_operand:V2DI 0 "register_operand" "=w")
6287 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6288 (match_operand:V2DI 2 "register_operand" "w")]
6290 "TARGET_SIMD && TARGET_SHA3"
6291 "sha512su0\\t%0.2d, %2.2d"
6292 [(set_attr "type" "crypto_sha512")]
6295 (define_insn "aarch64_crypto_sha512su1qv2di"
6296 [(set (match_operand:V2DI 0 "register_operand" "=w")
6297 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6298 (match_operand:V2DI 2 "register_operand" "w")
6299 (match_operand:V2DI 3 "register_operand" "w")]
6301 "TARGET_SIMD && TARGET_SHA3"
6302 "sha512su1\\t%0.2d, %2.2d, %3.2d"
6303 [(set_attr "type" "crypto_sha512")]
6308 (define_insn "eor3q<mode>4"
6309 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6312 (match_operand:VQ_I 2 "register_operand" "w")
6313 (match_operand:VQ_I 3 "register_operand" "w"))
6314 (match_operand:VQ_I 1 "register_operand" "w")))]
6315 "TARGET_SIMD && TARGET_SHA3"
6316 "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6317 [(set_attr "type" "crypto_sha3")]
6320 (define_insn "aarch64_rax1qv2di"
6321 [(set (match_operand:V2DI 0 "register_operand" "=w")
6324 (match_operand:V2DI 2 "register_operand" "w")
6326 (match_operand:V2DI 1 "register_operand" "w")))]
6327 "TARGET_SIMD && TARGET_SHA3"
6328 "rax1\\t%0.2d, %1.2d, %2.2d"
6329 [(set_attr "type" "crypto_sha3")]
6332 (define_insn "aarch64_xarqv2di"
6333 [(set (match_operand:V2DI 0 "register_operand" "=w")
6336 (match_operand:V2DI 1 "register_operand" "%w")
6337 (match_operand:V2DI 2 "register_operand" "w"))
6338 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6339 "TARGET_SIMD && TARGET_SHA3"
6340 "xar\\t%0.2d, %1.2d, %2.2d, %3"
6341 [(set_attr "type" "crypto_sha3")]
6344 (define_insn "bcaxq<mode>4"
6345 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6348 (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6349 (match_operand:VQ_I 2 "register_operand" "w"))
6350 (match_operand:VQ_I 1 "register_operand" "w")))]
6351 "TARGET_SIMD && TARGET_SHA3"
6352 "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6353 [(set_attr "type" "crypto_sha3")]
6358 (define_insn "aarch64_sm3ss1qv4si"
6359 [(set (match_operand:V4SI 0 "register_operand" "=w")
6360 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6361 (match_operand:V4SI 2 "register_operand" "w")
6362 (match_operand:V4SI 3 "register_operand" "w")]
6364 "TARGET_SIMD && TARGET_SM4"
6365 "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6366 [(set_attr "type" "crypto_sm3")]
6370 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6371 [(set (match_operand:V4SI 0 "register_operand" "=w")
6372 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6373 (match_operand:V4SI 2 "register_operand" "w")
6374 (match_operand:V4SI 3 "register_operand" "w")
6375 (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6377 "TARGET_SIMD && TARGET_SM4"
6378 "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6379 [(set_attr "type" "crypto_sm3")]
6382 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6383 [(set (match_operand:V4SI 0 "register_operand" "=w")
6384 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6385 (match_operand:V4SI 2 "register_operand" "w")
6386 (match_operand:V4SI 3 "register_operand" "w")]
6388 "TARGET_SIMD && TARGET_SM4"
6389 "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6390 [(set_attr "type" "crypto_sm3")]
6395 (define_insn "aarch64_sm4eqv4si"
6396 [(set (match_operand:V4SI 0 "register_operand" "=w")
6397 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6398 (match_operand:V4SI 2 "register_operand" "w")]
6400 "TARGET_SIMD && TARGET_SM4"
6401 "sm4e\\t%0.4s, %2.4s"
6402 [(set_attr "type" "crypto_sm4")]
6405 (define_insn "aarch64_sm4ekeyqv4si"
6406 [(set (match_operand:V4SI 0 "register_operand" "=w")
6407 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6408 (match_operand:V4SI 2 "register_operand" "w")]
6410 "TARGET_SIMD && TARGET_SM4"
6411 "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6412 [(set_attr "type" "crypto_sm4")]
6417 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6418 [(set (match_operand:VDQSF 0 "register_operand")
6420 [(match_operand:VDQSF 1 "register_operand")
6421 (match_operand:<VFMLA_W> 2 "register_operand")
6422 (match_operand:<VFMLA_W> 3 "register_operand")]
6426 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6427 <nunits> * 2, false);
6428 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6429 <nunits> * 2, false);
6431 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6440 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6441 [(set (match_operand:VDQSF 0 "register_operand")
6443 [(match_operand:VDQSF 1 "register_operand")
6444 (match_operand:<VFMLA_W> 2 "register_operand")
6445 (match_operand:<VFMLA_W> 3 "register_operand")]
6449 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6450 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6452 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6460 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6461 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6464 (vec_select:<VFMLA_SEL_W>
6465 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6466 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6468 (vec_select:<VFMLA_SEL_W>
6469 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6470 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6471 (match_operand:VDQSF 1 "register_operand" "0")))]
6473 "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6474 [(set_attr "type" "neon_fp_mul_s")]
6477 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6478 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6482 (vec_select:<VFMLA_SEL_W>
6483 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6484 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6486 (vec_select:<VFMLA_SEL_W>
6487 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6488 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6489 (match_operand:VDQSF 1 "register_operand" "0")))]
6491 "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6492 [(set_attr "type" "neon_fp_mul_s")]
6495 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6496 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6499 (vec_select:<VFMLA_SEL_W>
6500 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6501 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6503 (vec_select:<VFMLA_SEL_W>
6504 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6505 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6506 (match_operand:VDQSF 1 "register_operand" "0")))]
6508 "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6509 [(set_attr "type" "neon_fp_mul_s")]
6512 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6513 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6517 (vec_select:<VFMLA_SEL_W>
6518 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6519 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6521 (vec_select:<VFMLA_SEL_W>
6522 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6523 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6524 (match_operand:VDQSF 1 "register_operand" "0")))]
6526 "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6527 [(set_attr "type" "neon_fp_mul_s")]
6530 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6531 [(set (match_operand:V2SF 0 "register_operand")
6532 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6533 (match_operand:V4HF 2 "register_operand")
6534 (match_operand:V4HF 3 "register_operand")
6535 (match_operand:SI 4 "aarch64_imm2")]
6539 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6540 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6542 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6551 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6552 [(set (match_operand:V2SF 0 "register_operand")
6553 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6554 (match_operand:V4HF 2 "register_operand")
6555 (match_operand:V4HF 3 "register_operand")
6556 (match_operand:SI 4 "aarch64_imm2")]
6560 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6561 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6563 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6571 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6572 [(set (match_operand:V2SF 0 "register_operand" "=w")
6576 (match_operand:V4HF 2 "register_operand" "w")
6577 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6581 (match_operand:V4HF 3 "register_operand" "x")
6582 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6583 (match_operand:V2SF 1 "register_operand" "0")))]
6585 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6586 [(set_attr "type" "neon_fp_mul_s")]
6589 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6590 [(set (match_operand:V2SF 0 "register_operand" "=w")
6595 (match_operand:V4HF 2 "register_operand" "w")
6596 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6600 (match_operand:V4HF 3 "register_operand" "x")
6601 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6602 (match_operand:V2SF 1 "register_operand" "0")))]
6604 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6605 [(set_attr "type" "neon_fp_mul_s")]
6608 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6609 [(set (match_operand:V2SF 0 "register_operand" "=w")
6613 (match_operand:V4HF 2 "register_operand" "w")
6614 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6618 (match_operand:V4HF 3 "register_operand" "x")
6619 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6620 (match_operand:V2SF 1 "register_operand" "0")))]
6622 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6623 [(set_attr "type" "neon_fp_mul_s")]
6626 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6627 [(set (match_operand:V2SF 0 "register_operand" "=w")
6632 (match_operand:V4HF 2 "register_operand" "w")
6633 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6637 (match_operand:V4HF 3 "register_operand" "x")
6638 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6639 (match_operand:V2SF 1 "register_operand" "0")))]
6641 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6642 [(set_attr "type" "neon_fp_mul_s")]
6645 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6646 [(set (match_operand:V4SF 0 "register_operand")
6647 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6648 (match_operand:V8HF 2 "register_operand")
6649 (match_operand:V8HF 3 "register_operand")
6650 (match_operand:SI 4 "aarch64_lane_imm3")]
6654 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6655 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6657 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6665 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6666 [(set (match_operand:V4SF 0 "register_operand")
6667 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6668 (match_operand:V8HF 2 "register_operand")
6669 (match_operand:V8HF 3 "register_operand")
6670 (match_operand:SI 4 "aarch64_lane_imm3")]
6674 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6675 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6677 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6685 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6686 [(set (match_operand:V4SF 0 "register_operand" "=w")
6690 (match_operand:V8HF 2 "register_operand" "w")
6691 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6695 (match_operand:V8HF 3 "register_operand" "x")
6696 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6697 (match_operand:V4SF 1 "register_operand" "0")))]
6699 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6700 [(set_attr "type" "neon_fp_mul_s")]
6703 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6704 [(set (match_operand:V4SF 0 "register_operand" "=w")
6709 (match_operand:V8HF 2 "register_operand" "w")
6710 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6714 (match_operand:V8HF 3 "register_operand" "x")
6715 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6716 (match_operand:V4SF 1 "register_operand" "0")))]
6718 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6719 [(set_attr "type" "neon_fp_mul_s")]
6722 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6723 [(set (match_operand:V4SF 0 "register_operand" "=w")
6727 (match_operand:V8HF 2 "register_operand" "w")
6728 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6732 (match_operand:V8HF 3 "register_operand" "x")
6733 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6734 (match_operand:V4SF 1 "register_operand" "0")))]
6736 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6737 [(set_attr "type" "neon_fp_mul_s")]
6740 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6741 [(set (match_operand:V4SF 0 "register_operand" "=w")
6746 (match_operand:V8HF 2 "register_operand" "w")
6747 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6751 (match_operand:V8HF 3 "register_operand" "x")
6752 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6753 (match_operand:V4SF 1 "register_operand" "0")))]
6755 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6756 [(set_attr "type" "neon_fp_mul_s")]
6759 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6760 [(set (match_operand:V2SF 0 "register_operand")
6761 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6762 (match_operand:V4HF 2 "register_operand")
6763 (match_operand:V8HF 3 "register_operand")
6764 (match_operand:SI 4 "aarch64_lane_imm3")]
6768 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6769 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6771 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6780 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6781 [(set (match_operand:V2SF 0 "register_operand")
6782 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6783 (match_operand:V4HF 2 "register_operand")
6784 (match_operand:V8HF 3 "register_operand")
6785 (match_operand:SI 4 "aarch64_lane_imm3")]
6789 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6790 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6792 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6801 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6802 [(set (match_operand:V2SF 0 "register_operand" "=w")
6806 (match_operand:V4HF 2 "register_operand" "w")
6807 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6811 (match_operand:V8HF 3 "register_operand" "x")
6812 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6813 (match_operand:V2SF 1 "register_operand" "0")))]
6815 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6816 [(set_attr "type" "neon_fp_mul_s")]
6819 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6820 [(set (match_operand:V2SF 0 "register_operand" "=w")
6825 (match_operand:V4HF 2 "register_operand" "w")
6826 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6830 (match_operand:V8HF 3 "register_operand" "x")
6831 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6832 (match_operand:V2SF 1 "register_operand" "0")))]
6834 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6835 [(set_attr "type" "neon_fp_mul_s")]
6838 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6839 [(set (match_operand:V2SF 0 "register_operand" "=w")
6843 (match_operand:V4HF 2 "register_operand" "w")
6844 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6848 (match_operand:V8HF 3 "register_operand" "x")
6849 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6850 (match_operand:V2SF 1 "register_operand" "0")))]
6852 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6853 [(set_attr "type" "neon_fp_mul_s")]
6856 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6857 [(set (match_operand:V2SF 0 "register_operand" "=w")
6862 (match_operand:V4HF 2 "register_operand" "w")
6863 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6867 (match_operand:V8HF 3 "register_operand" "x")
6868 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6869 (match_operand:V2SF 1 "register_operand" "0")))]
6871 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6872 [(set_attr "type" "neon_fp_mul_s")]
6875 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6876 [(set (match_operand:V4SF 0 "register_operand")
6877 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6878 (match_operand:V8HF 2 "register_operand")
6879 (match_operand:V4HF 3 "register_operand")
6880 (match_operand:SI 4 "aarch64_imm2")]
6884 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6885 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6887 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6895 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6896 [(set (match_operand:V4SF 0 "register_operand")
6897 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6898 (match_operand:V8HF 2 "register_operand")
6899 (match_operand:V4HF 3 "register_operand")
6900 (match_operand:SI 4 "aarch64_imm2")]
6904 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6905 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6907 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6915 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6916 [(set (match_operand:V4SF 0 "register_operand" "=w")
6920 (match_operand:V8HF 2 "register_operand" "w")
6921 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6925 (match_operand:V4HF 3 "register_operand" "x")
6926 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6927 (match_operand:V4SF 1 "register_operand" "0")))]
6929 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6930 [(set_attr "type" "neon_fp_mul_s")]
6933 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6934 [(set (match_operand:V4SF 0 "register_operand" "=w")
6939 (match_operand:V8HF 2 "register_operand" "w")
6940 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6944 (match_operand:V4HF 3 "register_operand" "x")
6945 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6946 (match_operand:V4SF 1 "register_operand" "0")))]
6948 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6949 [(set_attr "type" "neon_fp_mul_s")]
6952 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6953 [(set (match_operand:V4SF 0 "register_operand" "=w")
6957 (match_operand:V8HF 2 "register_operand" "w")
6958 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6962 (match_operand:V4HF 3 "register_operand" "x")
6963 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6964 (match_operand:V4SF 1 "register_operand" "0")))]
6966 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6967 [(set_attr "type" "neon_fp_mul_s")]
6970 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6971 [(set (match_operand:V4SF 0 "register_operand" "=w")
6976 (match_operand:V8HF 2 "register_operand" "w")
6977 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6981 (match_operand:V4HF 3 "register_operand" "x")
6982 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6983 (match_operand:V4SF 1 "register_operand" "0")))]
6985 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6986 [(set_attr "type" "neon_fp_mul_s")]
6991 (define_insn "aarch64_crypto_pmulldi"
6992 [(set (match_operand:TI 0 "register_operand" "=w")
6993 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
6994 (match_operand:DI 2 "register_operand" "w")]
6996 "TARGET_SIMD && TARGET_AES"
6997 "pmull\\t%0.1q, %1.1d, %2.1d"
6998 [(set_attr "type" "crypto_pmull")]
7001 (define_insn "aarch64_crypto_pmullv2di"
7002 [(set (match_operand:TI 0 "register_operand" "=w")
7003 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
7004 (match_operand:V2DI 2 "register_operand" "w")]
7006 "TARGET_SIMD && TARGET_AES"
7007 "pmull2\\t%0.1q, %1.2d, %2.2d"
7008 [(set_attr "type" "crypto_pmull")]
7011 ;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector.
7012 (define_insn "<optab><Vnarrowq><mode>2"
7013 [(set (match_operand:VQN 0 "register_operand" "=w")
7014 (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
7016 "<su>xtl\t%0.<Vtype>, %1.<Vntype>"
7017 [(set_attr "type" "neon_shift_imm_long")]
7020 ;; Truncate a 128-bit integer vector to a 64-bit vector.
7021 (define_insn "trunc<mode><Vnarrowq>2"
7022 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
7023 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
7025 "xtn\t%0.<Vntype>, %1.<Vtype>"
7026 [(set_attr "type" "neon_shift_imm_narrow_q")]