1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2019 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16 1 "general_operand" ""))]
26 /* Force the operand into a register if it is not an
27 immediate whose use can be replaced with xzr.
28 If the mode is 16 bytes wide, then we will be doing
29 a stp in DI mode, so we check the validity of that.
30 If the mode is 8 bytes wide, then we will do doing a
31 normal str, so the check need not apply. */
32 if (GET_CODE (operands[0]) == MEM
33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34 && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
35 && aarch64_mem_pair_operand (operands[0], DImode))
36 || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
37 operands[1] = force_reg (<MODE>mode, operands[1]);
41 (define_expand "movmisalign<mode>"
42 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
43 (match_operand:VALL 1 "general_operand" ""))]
46 /* This pattern is not permitted to fail during expansion: if both arguments
47 are non-registers (e.g. memory := constant, which can be created by the
48 auto-vectorizer), force operand 1 into a register. */
49 if (!register_operand (operands[0], <MODE>mode)
50 && !register_operand (operands[1], <MODE>mode))
51 operands[1] = force_reg (<MODE>mode, operands[1]);
54 (define_insn "aarch64_simd_dup<mode>"
55 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
57 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
60 dup\\t%0.<Vtype>, %1.<Vetype>[0]
61 dup\\t%0.<Vtype>, %<vw>1"
62 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
65 (define_insn "aarch64_simd_dup<mode>"
66 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
67 (vec_duplicate:VDQF_F16
68 (match_operand:<VEL> 1 "register_operand" "w")))]
70 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
71 [(set_attr "type" "neon_dup<q>")]
74 (define_insn "aarch64_dup_lane<mode>"
75 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
76 (vec_duplicate:VALL_F16
78 (match_operand:VALL_F16 1 "register_operand" "w")
79 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
83 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
84 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
86 [(set_attr "type" "neon_dup<q>")]
89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
90 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
91 (vec_duplicate:VALL_F16_NO_V2Q
93 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
98 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
99 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
101 [(set_attr "type" "neon_dup<q>")]
104 (define_insn "*aarch64_simd_mov<VD:mode>"
105 [(set (match_operand:VD 0 "nonimmediate_operand"
106 "=w, m, m, w, ?r, ?w, ?r, w")
107 (match_operand:VD 1 "general_operand"
108 "m, Dz, w, w, w, r, r, Dn"))]
110 && (register_operand (operands[0], <MODE>mode)
111 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
113 switch (which_alternative)
115 case 0: return "ldr\t%d0, %1";
116 case 1: return "str\txzr, %0";
117 case 2: return "str\t%d1, %0";
118 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
119 case 4: return "umov\t%0, %1.d[0]";
120 case 5: return "fmov\t%d0, %1";
121 case 6: return "mov\t%0, %1";
123 return aarch64_output_simd_mov_immediate (operands[1], 64);
124 default: gcc_unreachable ();
127 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
128 neon_logic<q>, neon_to_gp<q>, f_mcr,\
129 mov_reg, neon_move<q>")]
132 (define_insn "*aarch64_simd_mov<VQ:mode>"
133 [(set (match_operand:VQ 0 "nonimmediate_operand"
134 "=w, Umn, m, w, ?r, ?w, ?r, w")
135 (match_operand:VQ 1 "general_operand"
136 "m, Dz, w, w, w, r, r, Dn"))]
138 && (register_operand (operands[0], <MODE>mode)
139 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
141 switch (which_alternative)
144 return "ldr\t%q0, %1";
146 return "stp\txzr, xzr, %0";
148 return "str\t%q1, %0";
150 return "mov\t%0.<Vbtype>, %1.<Vbtype>";
156 return aarch64_output_simd_mov_immediate (operands[1], 128);
161 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
162 neon_logic<q>, multiple, multiple,\
163 multiple, neon_move<q>")
164 (set_attr "length" "4,4,4,4,8,8,8,4")]
167 ;; When storing lane zero we can use the normal STR and its more permissive
170 (define_insn "aarch64_store_lane0<mode>"
171 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
172 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
173 (parallel [(match_operand 2 "const_int_operand" "n")])))]
175 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
176 "str\\t%<Vetype>1, %0"
177 [(set_attr "type" "neon_store1_1reg<q>")]
180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
181 [(set (match_operand:DREG 0 "register_operand" "=w")
182 (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
183 (set (match_operand:DREG2 2 "register_operand" "=w")
184 (match_operand:DREG2 3 "memory_operand" "m"))]
186 && rtx_equal_p (XEXP (operands[3], 0),
187 plus_constant (Pmode,
188 XEXP (operands[1], 0),
189 GET_MODE_SIZE (<DREG:MODE>mode)))"
191 [(set_attr "type" "neon_ldp")]
194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
195 [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
196 (match_operand:DREG 1 "register_operand" "w"))
197 (set (match_operand:DREG2 2 "memory_operand" "=m")
198 (match_operand:DREG2 3 "register_operand" "w"))]
200 && rtx_equal_p (XEXP (operands[2], 0),
201 plus_constant (Pmode,
202 XEXP (operands[0], 0),
203 GET_MODE_SIZE (<DREG:MODE>mode)))"
205 [(set_attr "type" "neon_stp")]
208 (define_insn "load_pair<VQ:mode><VQ2:mode>"
209 [(set (match_operand:VQ 0 "register_operand" "=w")
210 (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
211 (set (match_operand:VQ2 2 "register_operand" "=w")
212 (match_operand:VQ2 3 "memory_operand" "m"))]
214 && rtx_equal_p (XEXP (operands[3], 0),
215 plus_constant (Pmode,
216 XEXP (operands[1], 0),
217 GET_MODE_SIZE (<VQ:MODE>mode)))"
219 [(set_attr "type" "neon_ldp_q")]
222 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
223 [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
224 (match_operand:VQ 1 "register_operand" "w"))
225 (set (match_operand:VQ2 2 "memory_operand" "=m")
226 (match_operand:VQ2 3 "register_operand" "w"))]
227 "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
228 plus_constant (Pmode,
229 XEXP (operands[0], 0),
230 GET_MODE_SIZE (<VQ:MODE>mode)))"
232 [(set_attr "type" "neon_stp_q")]
237 [(set (match_operand:VQ 0 "register_operand" "")
238 (match_operand:VQ 1 "register_operand" ""))]
239 "TARGET_SIMD && reload_completed
240 && GP_REGNUM_P (REGNO (operands[0]))
241 && GP_REGNUM_P (REGNO (operands[1]))"
244 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
249 [(set (match_operand:VQ 0 "register_operand" "")
250 (match_operand:VQ 1 "register_operand" ""))]
251 "TARGET_SIMD && reload_completed
252 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
253 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
256 aarch64_split_simd_move (operands[0], operands[1]);
260 (define_expand "@aarch64_split_simd_mov<mode>"
261 [(set (match_operand:VQ 0)
262 (match_operand:VQ 1))]
265 rtx dst = operands[0];
266 rtx src = operands[1];
268 if (GP_REGNUM_P (REGNO (src)))
270 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
271 rtx src_high_part = gen_highpart (<VHALF>mode, src);
274 (gen_move_lo_quad_<mode> (dst, src_low_part));
276 (gen_move_hi_quad_<mode> (dst, src_high_part));
281 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
282 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
283 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
284 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
287 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
289 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
295 (define_insn "aarch64_simd_mov_from_<mode>low"
296 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
298 (match_operand:VQ 1 "register_operand" "w")
299 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
300 "TARGET_SIMD && reload_completed"
302 [(set_attr "type" "neon_to_gp<q>")
303 (set_attr "length" "4")
306 (define_insn "aarch64_simd_mov_from_<mode>high"
307 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
309 (match_operand:VQ 1 "register_operand" "w")
310 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
311 "TARGET_SIMD && reload_completed"
313 [(set_attr "type" "neon_to_gp<q>")
314 (set_attr "length" "4")
317 (define_insn "orn<mode>3"
318 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
319 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
320 (match_operand:VDQ_I 2 "register_operand" "w")))]
322 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
323 [(set_attr "type" "neon_logic<q>")]
326 (define_insn "bic<mode>3"
327 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
328 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
329 (match_operand:VDQ_I 2 "register_operand" "w")))]
331 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
332 [(set_attr "type" "neon_logic<q>")]
335 (define_insn "add<mode>3"
336 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
337 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
338 (match_operand:VDQ_I 2 "register_operand" "w")))]
340 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
341 [(set_attr "type" "neon_add<q>")]
344 (define_insn "sub<mode>3"
345 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
346 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
347 (match_operand:VDQ_I 2 "register_operand" "w")))]
349 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
350 [(set_attr "type" "neon_sub<q>")]
353 (define_insn "mul<mode>3"
354 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
355 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
356 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
358 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
359 [(set_attr "type" "neon_mul_<Vetype><q>")]
362 (define_insn "bswap<mode>2"
363 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
364 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
366 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
367 [(set_attr "type" "neon_rev<q>")]
370 (define_insn "aarch64_rbit<mode>"
371 [(set (match_operand:VB 0 "register_operand" "=w")
372 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
375 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
376 [(set_attr "type" "neon_rbit")]
379 (define_expand "ctz<mode>2"
380 [(set (match_operand:VS 0 "register_operand")
381 (ctz:VS (match_operand:VS 1 "register_operand")))]
384 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
385 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
387 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
388 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
393 (define_expand "xorsign<mode>3"
394 [(match_operand:VHSDF 0 "register_operand")
395 (match_operand:VHSDF 1 "register_operand")
396 (match_operand:VHSDF 2 "register_operand")]
400 machine_mode imode = <V_INT_EQUIV>mode;
401 rtx v_bitmask = gen_reg_rtx (imode);
402 rtx op1x = gen_reg_rtx (imode);
403 rtx op2x = gen_reg_rtx (imode);
405 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
406 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
408 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
410 emit_move_insn (v_bitmask,
411 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
412 HOST_WIDE_INT_M1U << bits));
414 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
415 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
416 emit_move_insn (operands[0],
417 lowpart_subreg (<MODE>mode, op1x, imode));
422 ;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
423 ;; fact that their usage need to guarantee that the source vectors are
424 ;; contiguous. It would be wrong to describe the operation without being able
425 ;; to describe the permute that is also required, but even if that is done
426 ;; the permute would have been created as a LOAD_LANES which means the values
427 ;; in the registers are in the wrong order.
428 (define_insn "aarch64_fcadd<rot><mode>"
429 [(set (match_operand:VHSDF 0 "register_operand" "=w")
430 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
431 (match_operand:VHSDF 2 "register_operand" "w")]
434 "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
435 [(set_attr "type" "neon_fcadd")]
438 (define_insn "aarch64_fcmla<rot><mode>"
439 [(set (match_operand:VHSDF 0 "register_operand" "=w")
440 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
441 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
442 (match_operand:VHSDF 3 "register_operand" "w")]
445 "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
446 [(set_attr "type" "neon_fcmla")]
450 (define_insn "aarch64_fcmla_lane<rot><mode>"
451 [(set (match_operand:VHSDF 0 "register_operand" "=w")
452 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
453 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
454 (match_operand:VHSDF 3 "register_operand" "w")
455 (match_operand:SI 4 "const_int_operand" "n")]
459 operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
460 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
462 [(set_attr "type" "neon_fcmla")]
465 (define_insn "aarch64_fcmla_laneq<rot>v4hf"
466 [(set (match_operand:V4HF 0 "register_operand" "=w")
467 (plus:V4HF (match_operand:V4HF 1 "register_operand" "0")
468 (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
469 (match_operand:V8HF 3 "register_operand" "w")
470 (match_operand:SI 4 "const_int_operand" "n")]
474 operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
475 return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
477 [(set_attr "type" "neon_fcmla")]
480 (define_insn "aarch64_fcmlaq_lane<rot><mode>"
481 [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
482 (plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0")
483 (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
484 (match_operand:<VHALF> 3 "register_operand" "w")
485 (match_operand:SI 4 "const_int_operand" "n")]
489 int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
491 = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
492 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
494 [(set_attr "type" "neon_fcmla")]
497 ;; These instructions map to the __builtins for the Dot Product operations.
498 (define_insn "aarch64_<sur>dot<vsi2qi>"
499 [(set (match_operand:VS 0 "register_operand" "=w")
500 (plus:VS (match_operand:VS 1 "register_operand" "0")
501 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
502 (match_operand:<VSI2QI> 3 "register_operand" "w")]
505 "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
506 [(set_attr "type" "neon_dot<q>")]
509 ;; These expands map to the Dot Product optab the vectorizer checks for.
510 ;; The auto-vectorizer expects a dot product builtin that also does an
511 ;; accumulation into the provided register.
512 ;; Given the following pattern
514 ;; for (i=0; i<len; i++) {
520 ;; This can be auto-vectorized to
521 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
523 ;; given enough iterations. However the vectorizer can keep unrolling the loop
524 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
525 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
528 ;; and so the vectorizer provides r, in which the result has to be accumulated.
529 (define_expand "<sur>dot_prod<vsi2qi>"
530 [(set (match_operand:VS 0 "register_operand")
531 (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
532 (match_operand:<VSI2QI> 2 "register_operand")]
534 (match_operand:VS 3 "register_operand")))]
538 gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
540 emit_insn (gen_rtx_SET (operands[0], operands[3]));
544 ;; These instructions map to the __builtins for the Dot Product
545 ;; indexed operations.
546 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
547 [(set (match_operand:VS 0 "register_operand" "=w")
548 (plus:VS (match_operand:VS 1 "register_operand" "0")
549 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
550 (match_operand:V8QI 3 "register_operand" "<h_con>")
551 (match_operand:SI 4 "immediate_operand" "i")]
555 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
556 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
558 [(set_attr "type" "neon_dot<q>")]
561 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
562 [(set (match_operand:VS 0 "register_operand" "=w")
563 (plus:VS (match_operand:VS 1 "register_operand" "0")
564 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
565 (match_operand:V16QI 3 "register_operand" "<h_con>")
566 (match_operand:SI 4 "immediate_operand" "i")]
570 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
571 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
573 [(set_attr "type" "neon_dot<q>")]
576 (define_expand "copysign<mode>3"
577 [(match_operand:VHSDF 0 "register_operand")
578 (match_operand:VHSDF 1 "register_operand")
579 (match_operand:VHSDF 2 "register_operand")]
580 "TARGET_FLOAT && TARGET_SIMD"
582 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
583 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
585 emit_move_insn (v_bitmask,
586 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
587 HOST_WIDE_INT_M1U << bits));
588 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
589 operands[2], operands[1]));
594 (define_insn "*aarch64_mul3_elt<mode>"
595 [(set (match_operand:VMUL 0 "register_operand" "=w")
599 (match_operand:VMUL 1 "register_operand" "<h_con>")
600 (parallel [(match_operand:SI 2 "immediate_operand")])))
601 (match_operand:VMUL 3 "register_operand" "w")))]
604 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
605 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
607 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
610 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
611 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
612 (mult:VMUL_CHANGE_NLANES
613 (vec_duplicate:VMUL_CHANGE_NLANES
615 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
616 (parallel [(match_operand:SI 2 "immediate_operand")])))
617 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
620 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
621 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
623 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
626 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
627 [(set (match_operand:VMUL 0 "register_operand" "=w")
630 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
631 (match_operand:VMUL 2 "register_operand" "w")))]
633 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
634 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
637 (define_insn "@aarch64_rsqrte<mode>"
638 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
639 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
642 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
643 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
645 (define_insn "@aarch64_rsqrts<mode>"
646 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
647 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
648 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
651 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
652 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
654 (define_expand "rsqrt<mode>2"
655 [(set (match_operand:VALLF 0 "register_operand" "=w")
656 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
660 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
664 (define_insn "*aarch64_mul3_elt_to_64v2df"
665 [(set (match_operand:DF 0 "register_operand" "=w")
668 (match_operand:V2DF 1 "register_operand" "w")
669 (parallel [(match_operand:SI 2 "immediate_operand")]))
670 (match_operand:DF 3 "register_operand" "w")))]
673 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
674 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
676 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
679 (define_insn "neg<mode>2"
680 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
681 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
683 "neg\t%0.<Vtype>, %1.<Vtype>"
684 [(set_attr "type" "neon_neg<q>")]
687 (define_insn "abs<mode>2"
688 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
689 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
691 "abs\t%0.<Vtype>, %1.<Vtype>"
692 [(set_attr "type" "neon_abs<q>")]
695 ;; The intrinsic version of integer ABS must not be allowed to
696 ;; combine with any operation with an integerated ABS step, such
698 (define_insn "aarch64_abs<mode>"
699 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
701 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
704 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
705 [(set_attr "type" "neon_abs<q>")]
708 ;; It's tempting to represent SABD as ABS (MINUS op1 op2).
709 ;; This isn't accurate as ABS treats always its input as a signed value.
710 ;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
711 ;; Whereas SABD would return 192 (-64 signed) on the above example.
712 ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
713 (define_insn "aarch64_<su>abd<mode>_3"
714 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
717 (match_operand:VDQ_BHSI 1 "register_operand" "w")
718 (match_operand:VDQ_BHSI 2 "register_operand" "w"))
723 "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
724 [(set_attr "type" "neon_abd<q>")]
727 (define_insn "aarch64_<sur>abdl2<mode>_3"
728 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
729 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
730 (match_operand:VDQV_S 2 "register_operand" "w")]
733 "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
734 [(set_attr "type" "neon_abd<q>")]
737 (define_insn "aarch64_<sur>abal<mode>_4"
738 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
739 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
740 (match_operand:VDQV_S 2 "register_operand" "w")
741 (match_operand:<VDBLW> 3 "register_operand" "0")]
744 "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
745 [(set_attr "type" "neon_arith_acc<q>")]
748 (define_insn "aarch64_<sur>adalp<mode>_3"
749 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
750 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
751 (match_operand:<VDBLW> 2 "register_operand" "0")]
754 "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
755 [(set_attr "type" "neon_reduc_add<q>")]
758 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
759 ;; inputs in operands 1 and 2. The sequence also has to perform a widening
760 ;; reduction of the difference into a V4SI vector and accumulate that into
761 ;; operand 3 before copying that into the result operand 0.
762 ;; Perform that with a sequence of:
763 ;; UABDL2 tmp.8h, op1.16b, op2.16b
764 ;; UABAL tmp.8h, op1.16b, op2.16b
765 ;; UADALP op3.4s, tmp.8h
766 ;; MOV op0, op3 // should be eliminated in later passes.
768 ;; For TARGET_DOTPROD we do:
769 ;; MOV tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
770 ;; UABD tmp2.16b, op1.16b, op2.16b
771 ;; UDOT op3.4s, tmp2.16b, tmp1.16b
772 ;; MOV op0, op3 // RA will tie the operands of UDOT appropriately.
774 ;; The signed version just uses the signed variants of the above instructions
775 ;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
778 (define_expand "<sur>sadv16qi"
779 [(use (match_operand:V4SI 0 "register_operand"))
780 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
781 (use (match_operand:V16QI 2 "register_operand"))] ABAL)
782 (use (match_operand:V4SI 3 "register_operand"))]
787 rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
788 rtx abd = gen_reg_rtx (V16QImode);
789 emit_insn (gen_aarch64_<sur>abdv16qi_3 (abd, operands[1], operands[2]));
790 emit_insn (gen_aarch64_udotv16qi (operands[0], operands[3],
794 rtx reduc = gen_reg_rtx (V8HImode);
795 emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
797 emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
798 operands[2], reduc));
799 emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
801 emit_move_insn (operands[0], operands[3]);
806 (define_insn "aba<mode>_3"
807 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
808 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
809 (match_operand:VDQ_BHSI 1 "register_operand" "w")
810 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
811 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
813 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
814 [(set_attr "type" "neon_arith_acc<q>")]
817 (define_insn "fabd<mode>3"
818 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
821 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
822 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
824 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
825 [(set_attr "type" "neon_fp_abd_<stype><q>")]
828 ;; For AND (vector, register) and BIC (vector, immediate)
829 (define_insn "and<mode>3"
830 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
831 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
832 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
835 switch (which_alternative)
838 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
840 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
846 [(set_attr "type" "neon_logic<q>")]
849 ;; For ORR (vector, register) and ORR (vector, immediate)
850 (define_insn "ior<mode>3"
851 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
852 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
853 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
856 switch (which_alternative)
859 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
861 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
867 [(set_attr "type" "neon_logic<q>")]
870 (define_insn "xor<mode>3"
871 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
872 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
873 (match_operand:VDQ_I 2 "register_operand" "w")))]
875 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
876 [(set_attr "type" "neon_logic<q>")]
879 (define_insn "one_cmpl<mode>2"
880 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
881 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
883 "not\t%0.<Vbtype>, %1.<Vbtype>"
884 [(set_attr "type" "neon_logic<q>")]
887 (define_insn "aarch64_simd_vec_set<mode>"
888 [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
890 (vec_duplicate:VALL_F16
891 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
892 (match_operand:VALL_F16 3 "register_operand" "0,0,0")
893 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
896 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
897 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
898 switch (which_alternative)
901 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
903 return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
905 return "ld1\\t{%0.<Vetype>}[%p2], %1";
910 [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
913 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
914 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
916 (vec_duplicate:VALL_F16
918 (match_operand:VALL_F16 3 "register_operand" "w")
920 [(match_operand:SI 4 "immediate_operand" "i")])))
921 (match_operand:VALL_F16 1 "register_operand" "0")
922 (match_operand:SI 2 "immediate_operand" "i")))]
925 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
926 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
927 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
929 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
931 [(set_attr "type" "neon_ins<q>")]
934 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
935 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
936 (vec_merge:VALL_F16_NO_V2Q
937 (vec_duplicate:VALL_F16_NO_V2Q
939 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
941 [(match_operand:SI 4 "immediate_operand" "i")])))
942 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
943 (match_operand:SI 2 "immediate_operand" "i")))]
946 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
947 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
948 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
949 INTVAL (operands[4]));
951 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
953 [(set_attr "type" "neon_ins<q>")]
956 (define_expand "signbit<mode>2"
957 [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
958 (use (match_operand:VDQSF 1 "register_operand"))]
961 int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
962 rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
964 operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
966 emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
971 (define_insn "aarch64_simd_lshr<mode>"
972 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
973 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
974 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
976 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
977 [(set_attr "type" "neon_shift_imm<q>")]
980 (define_insn "aarch64_simd_ashr<mode>"
981 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
982 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
983 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
985 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
986 [(set_attr "type" "neon_shift_imm<q>")]
989 (define_insn "aarch64_simd_imm_shl<mode>"
990 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
991 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
992 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
994 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
995 [(set_attr "type" "neon_shift_imm<q>")]
998 (define_insn "aarch64_simd_reg_sshl<mode>"
999 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1000 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1001 (match_operand:VDQ_I 2 "register_operand" "w")))]
1003 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1004 [(set_attr "type" "neon_shift_reg<q>")]
1007 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
1008 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1009 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1010 (match_operand:VDQ_I 2 "register_operand" "w")]
1011 UNSPEC_ASHIFT_UNSIGNED))]
1013 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1014 [(set_attr "type" "neon_shift_reg<q>")]
1017 (define_insn "aarch64_simd_reg_shl<mode>_signed"
1018 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1019 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1020 (match_operand:VDQ_I 2 "register_operand" "w")]
1021 UNSPEC_ASHIFT_SIGNED))]
1023 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1024 [(set_attr "type" "neon_shift_reg<q>")]
1027 (define_expand "ashl<mode>3"
1028 [(match_operand:VDQ_I 0 "register_operand" "")
1029 (match_operand:VDQ_I 1 "register_operand" "")
1030 (match_operand:SI 2 "general_operand" "")]
1033 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1036 if (CONST_INT_P (operands[2]))
1038 shift_amount = INTVAL (operands[2]);
1039 if (shift_amount >= 0 && shift_amount < bit_width)
1041 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1043 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
1050 operands[2] = force_reg (SImode, operands[2]);
1053 else if (MEM_P (operands[2]))
1055 operands[2] = force_reg (SImode, operands[2]);
1058 if (REG_P (operands[2]))
1060 rtx tmp = gen_reg_rtx (<MODE>mode);
1061 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
1062 convert_to_mode (<VEL>mode,
1065 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1074 (define_expand "lshr<mode>3"
1075 [(match_operand:VDQ_I 0 "register_operand" "")
1076 (match_operand:VDQ_I 1 "register_operand" "")
1077 (match_operand:SI 2 "general_operand" "")]
1080 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1083 if (CONST_INT_P (operands[2]))
1085 shift_amount = INTVAL (operands[2]);
1086 if (shift_amount > 0 && shift_amount <= bit_width)
1088 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1090 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
1096 operands[2] = force_reg (SImode, operands[2]);
1098 else if (MEM_P (operands[2]))
1100 operands[2] = force_reg (SImode, operands[2]);
1103 if (REG_P (operands[2]))
1105 rtx tmp = gen_reg_rtx (SImode);
1106 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1107 emit_insn (gen_negsi2 (tmp, operands[2]));
1108 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1109 convert_to_mode (<VEL>mode,
1111 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
1121 (define_expand "ashr<mode>3"
1122 [(match_operand:VDQ_I 0 "register_operand" "")
1123 (match_operand:VDQ_I 1 "register_operand" "")
1124 (match_operand:SI 2 "general_operand" "")]
1127 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1130 if (CONST_INT_P (operands[2]))
1132 shift_amount = INTVAL (operands[2]);
1133 if (shift_amount > 0 && shift_amount <= bit_width)
1135 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1137 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1143 operands[2] = force_reg (SImode, operands[2]);
1145 else if (MEM_P (operands[2]))
1147 operands[2] = force_reg (SImode, operands[2]);
1150 if (REG_P (operands[2]))
1152 rtx tmp = gen_reg_rtx (SImode);
1153 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1154 emit_insn (gen_negsi2 (tmp, operands[2]));
1155 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1156 convert_to_mode (<VEL>mode,
1158 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
1168 (define_expand "vashl<mode>3"
1169 [(match_operand:VDQ_I 0 "register_operand" "")
1170 (match_operand:VDQ_I 1 "register_operand" "")
1171 (match_operand:VDQ_I 2 "register_operand" "")]
1174 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1179 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1180 ;; Negating individual lanes most certainly offsets the
1181 ;; gain from vectorization.
1182 (define_expand "vashr<mode>3"
1183 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1184 (match_operand:VDQ_BHSI 1 "register_operand" "")
1185 (match_operand:VDQ_BHSI 2 "register_operand" "")]
1188 rtx neg = gen_reg_rtx (<MODE>mode);
1189 emit (gen_neg<mode>2 (neg, operands[2]));
1190 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1196 (define_expand "aarch64_ashr_simddi"
1197 [(match_operand:DI 0 "register_operand" "=w")
1198 (match_operand:DI 1 "register_operand" "w")
1199 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1202 /* An arithmetic shift right by 64 fills the result with copies of the sign
1203 bit, just like asr by 63 - however the standard pattern does not handle
1205 if (INTVAL (operands[2]) == 64)
1206 operands[2] = GEN_INT (63);
1207 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1212 (define_expand "vlshr<mode>3"
1213 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1214 (match_operand:VDQ_BHSI 1 "register_operand" "")
1215 (match_operand:VDQ_BHSI 2 "register_operand" "")]
1218 rtx neg = gen_reg_rtx (<MODE>mode);
1219 emit (gen_neg<mode>2 (neg, operands[2]));
1220 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1225 (define_expand "aarch64_lshr_simddi"
1226 [(match_operand:DI 0 "register_operand" "=w")
1227 (match_operand:DI 1 "register_operand" "w")
1228 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1231 if (INTVAL (operands[2]) == 64)
1232 emit_move_insn (operands[0], const0_rtx);
1234 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1239 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1240 (define_insn "vec_shr_<mode>"
1241 [(set (match_operand:VD 0 "register_operand" "=w")
1242 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1243 (match_operand:SI 2 "immediate_operand" "i")]
1247 if (BYTES_BIG_ENDIAN)
1248 return "shl %d0, %d1, %2";
1250 return "ushr %d0, %d1, %2";
1252 [(set_attr "type" "neon_shift_imm")]
1255 (define_expand "vec_set<mode>"
1256 [(match_operand:VALL_F16 0 "register_operand" "+w")
1257 (match_operand:<VEL> 1 "register_operand" "w")
1258 (match_operand:SI 2 "immediate_operand" "")]
1261 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1262 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1263 GEN_INT (elem), operands[0]));
1269 (define_insn "aarch64_mla<mode>"
1270 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1271 (plus:VDQ_BHSI (mult:VDQ_BHSI
1272 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1273 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1274 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1276 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1277 [(set_attr "type" "neon_mla_<Vetype><q>")]
1280 (define_insn "*aarch64_mla_elt<mode>"
1281 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1284 (vec_duplicate:VDQHS
1286 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1287 (parallel [(match_operand:SI 2 "immediate_operand")])))
1288 (match_operand:VDQHS 3 "register_operand" "w"))
1289 (match_operand:VDQHS 4 "register_operand" "0")))]
1292 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1293 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1295 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1298 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1299 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1302 (vec_duplicate:VDQHS
1304 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1305 (parallel [(match_operand:SI 2 "immediate_operand")])))
1306 (match_operand:VDQHS 3 "register_operand" "w"))
1307 (match_operand:VDQHS 4 "register_operand" "0")))]
1310 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1311 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1313 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1316 (define_insn "*aarch64_mla_elt_merge<mode>"
1317 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1319 (mult:VDQHS (vec_duplicate:VDQHS
1320 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1321 (match_operand:VDQHS 2 "register_operand" "w"))
1322 (match_operand:VDQHS 3 "register_operand" "0")))]
1324 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1325 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1328 (define_insn "aarch64_mls<mode>"
1329 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1330 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1331 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1332 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1334 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1335 [(set_attr "type" "neon_mla_<Vetype><q>")]
1338 (define_insn "*aarch64_mls_elt<mode>"
1339 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1341 (match_operand:VDQHS 4 "register_operand" "0")
1343 (vec_duplicate:VDQHS
1345 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1346 (parallel [(match_operand:SI 2 "immediate_operand")])))
1347 (match_operand:VDQHS 3 "register_operand" "w"))))]
1350 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1351 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1353 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1356 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1357 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1359 (match_operand:VDQHS 4 "register_operand" "0")
1361 (vec_duplicate:VDQHS
1363 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1364 (parallel [(match_operand:SI 2 "immediate_operand")])))
1365 (match_operand:VDQHS 3 "register_operand" "w"))))]
1368 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1369 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1371 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1374 (define_insn "*aarch64_mls_elt_merge<mode>"
1375 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1377 (match_operand:VDQHS 1 "register_operand" "0")
1378 (mult:VDQHS (vec_duplicate:VDQHS
1379 (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1380 (match_operand:VDQHS 3 "register_operand" "w"))))]
1382 "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1383 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1386 ;; Max/Min operations.
1387 (define_insn "<su><maxmin><mode>3"
1388 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1389 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1390 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1392 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1393 [(set_attr "type" "neon_minmax<q>")]
1396 (define_expand "<su><maxmin>v2di3"
1397 [(set (match_operand:V2DI 0 "register_operand" "")
1398 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1399 (match_operand:V2DI 2 "register_operand" "")))]
1402 enum rtx_code cmp_operator;
1423 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1424 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1425 operands[2], cmp_fmt, operands[1], operands[2]));
1429 ;; Pairwise Integer Max/Min operations.
1430 (define_insn "aarch64_<maxmin_uns>p<mode>"
1431 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1432 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1433 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1436 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1437 [(set_attr "type" "neon_minmax<q>")]
1440 ;; Pairwise FP Max/Min operations.
1441 (define_insn "aarch64_<maxmin_uns>p<mode>"
1442 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1443 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1444 (match_operand:VHSDF 2 "register_operand" "w")]
1447 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1448 [(set_attr "type" "neon_minmax<q>")]
1451 ;; vec_concat gives a new vector with the low elements from operand 1, and
1452 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1453 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1454 ;; What that means, is that the RTL descriptions of the below patterns
1455 ;; need to change depending on endianness.
1457 ;; Move to the low architectural bits of the register.
1458 ;; On little-endian this is { operand, zeroes }
1459 ;; On big-endian this is { zeroes, operand }
1461 (define_insn "move_lo_quad_internal_<mode>"
1462 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1464 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1465 (vec_duplicate:<VHALF> (const_int 0))))]
1466 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1471 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1472 (set_attr "length" "4")
1473 (set_attr "arch" "simd,fp,simd")]
1476 (define_insn "move_lo_quad_internal_<mode>"
1477 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1479 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1481 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1486 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1487 (set_attr "length" "4")
1488 (set_attr "arch" "simd,fp,simd")]
1491 (define_insn "move_lo_quad_internal_be_<mode>"
1492 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1494 (vec_duplicate:<VHALF> (const_int 0))
1495 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1496 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1501 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1502 (set_attr "length" "4")
1503 (set_attr "arch" "simd,fp,simd")]
1506 (define_insn "move_lo_quad_internal_be_<mode>"
1507 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1510 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1511 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1516 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1517 (set_attr "length" "4")
1518 (set_attr "arch" "simd,fp,simd")]
1521 (define_expand "move_lo_quad_<mode>"
1522 [(match_operand:VQ 0 "register_operand")
1523 (match_operand:VQ 1 "register_operand")]
1526 if (BYTES_BIG_ENDIAN)
1527 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1529 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1534 ;; Move operand1 to the high architectural bits of the register, keeping
1535 ;; the low architectural bits of operand2.
1536 ;; For little-endian this is { operand2, operand1 }
1537 ;; For big-endian this is { operand1, operand2 }
1539 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1540 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1544 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1545 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1546 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1548 ins\\t%0.d[1], %1.d[0]
1550 [(set_attr "type" "neon_ins")]
1553 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1554 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1556 (match_operand:<VHALF> 1 "register_operand" "w,r")
1559 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1560 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1562 ins\\t%0.d[1], %1.d[0]
1564 [(set_attr "type" "neon_ins")]
1567 (define_expand "move_hi_quad_<mode>"
1568 [(match_operand:VQ 0 "register_operand" "")
1569 (match_operand:<VHALF> 1 "register_operand" "")]
1572 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1573 if (BYTES_BIG_ENDIAN)
1574 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1577 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1582 ;; Narrowing operations.
1585 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1586 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1587 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1589 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1590 [(set_attr "type" "neon_shift_imm_narrow_q")]
1593 (define_expand "vec_pack_trunc_<mode>"
1594 [(match_operand:<VNARROWD> 0 "register_operand" "")
1595 (match_operand:VDN 1 "register_operand" "")
1596 (match_operand:VDN 2 "register_operand" "")]
1599 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1600 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1601 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1603 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1604 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1605 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1611 (define_insn "vec_pack_trunc_<mode>"
1612 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1613 (vec_concat:<VNARROWQ2>
1614 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1615 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1618 if (BYTES_BIG_ENDIAN)
1619 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1621 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1623 [(set_attr "type" "multiple")
1624 (set_attr "length" "8")]
1627 ;; Widening operations.
1629 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1630 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1631 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1632 (match_operand:VQW 1 "register_operand" "w")
1633 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1636 "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1637 [(set_attr "type" "neon_shift_imm_long")]
1640 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1641 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1642 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1643 (match_operand:VQW 1 "register_operand" "w")
1644 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1647 "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1648 [(set_attr "type" "neon_shift_imm_long")]
1651 (define_expand "vec_unpack<su>_hi_<mode>"
1652 [(match_operand:<VWIDE> 0 "register_operand" "")
1653 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1656 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1657 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1663 (define_expand "vec_unpack<su>_lo_<mode>"
1664 [(match_operand:<VWIDE> 0 "register_operand" "")
1665 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1668 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1669 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1675 ;; Widening arithmetic.
1677 (define_insn "*aarch64_<su>mlal_lo<mode>"
1678 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1681 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1682 (match_operand:VQW 2 "register_operand" "w")
1683 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1684 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1685 (match_operand:VQW 4 "register_operand" "w")
1687 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1689 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1690 [(set_attr "type" "neon_mla_<Vetype>_long")]
1693 (define_insn "*aarch64_<su>mlal_hi<mode>"
1694 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1697 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1698 (match_operand:VQW 2 "register_operand" "w")
1699 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1700 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1701 (match_operand:VQW 4 "register_operand" "w")
1703 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1705 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1706 [(set_attr "type" "neon_mla_<Vetype>_long")]
1709 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1710 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1712 (match_operand:<VWIDE> 1 "register_operand" "0")
1714 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1715 (match_operand:VQW 2 "register_operand" "w")
1716 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1717 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1718 (match_operand:VQW 4 "register_operand" "w")
1721 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1722 [(set_attr "type" "neon_mla_<Vetype>_long")]
1725 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1726 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1728 (match_operand:<VWIDE> 1 "register_operand" "0")
1730 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1731 (match_operand:VQW 2 "register_operand" "w")
1732 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1733 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1734 (match_operand:VQW 4 "register_operand" "w")
1737 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1738 [(set_attr "type" "neon_mla_<Vetype>_long")]
1741 (define_insn "*aarch64_<su>mlal<mode>"
1742 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1746 (match_operand:VD_BHSI 1 "register_operand" "w"))
1748 (match_operand:VD_BHSI 2 "register_operand" "w")))
1749 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1751 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1752 [(set_attr "type" "neon_mla_<Vetype>_long")]
1755 (define_insn "*aarch64_<su>mlsl<mode>"
1756 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1758 (match_operand:<VWIDE> 1 "register_operand" "0")
1761 (match_operand:VD_BHSI 2 "register_operand" "w"))
1763 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1765 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1766 [(set_attr "type" "neon_mla_<Vetype>_long")]
1769 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1770 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1771 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1772 (match_operand:VQW 1 "register_operand" "w")
1773 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1774 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1775 (match_operand:VQW 2 "register_operand" "w")
1778 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1779 [(set_attr "type" "neon_mul_<Vetype>_long")]
1782 (define_expand "vec_widen_<su>mult_lo_<mode>"
1783 [(match_operand:<VWIDE> 0 "register_operand" "")
1784 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1785 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1788 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1789 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1796 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1797 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1798 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1799 (match_operand:VQW 1 "register_operand" "w")
1800 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1801 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1802 (match_operand:VQW 2 "register_operand" "w")
1805 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1806 [(set_attr "type" "neon_mul_<Vetype>_long")]
1809 (define_expand "vec_widen_<su>mult_hi_<mode>"
1810 [(match_operand:<VWIDE> 0 "register_operand" "")
1811 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1812 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1815 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1816 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1824 ;; FP vector operations.
1825 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1826 ;; double-precision (64-bit) floating-point data types and arithmetic as
1827 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1828 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1830 ;; Floating-point operations can raise an exception. Vectorizing such
1831 ;; operations are safe because of reasons explained below.
1833 ;; ARMv8 permits an extension to enable trapped floating-point
1834 ;; exception handling, however this is an optional feature. In the
1835 ;; event of a floating-point exception being raised by vectorised
1837 ;; 1. If trapped floating-point exceptions are available, then a trap
1838 ;; will be taken when any lane raises an enabled exception. A trap
1839 ;; handler may determine which lane raised the exception.
1840 ;; 2. Alternatively a sticky exception flag is set in the
1841 ;; floating-point status register (FPSR). Software may explicitly
1842 ;; test the exception flags, in which case the tests will either
1843 ;; prevent vectorisation, allowing precise identification of the
1844 ;; failing operation, or if tested outside of vectorisable regions
1845 ;; then the specific operation and lane are not of interest.
1847 ;; FP arithmetic operations.
1849 (define_insn "add<mode>3"
1850 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1851 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1852 (match_operand:VHSDF 2 "register_operand" "w")))]
1854 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1855 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1858 (define_insn "sub<mode>3"
1859 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1860 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1861 (match_operand:VHSDF 2 "register_operand" "w")))]
1863 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1864 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1867 (define_insn "mul<mode>3"
1868 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1869 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1870 (match_operand:VHSDF 2 "register_operand" "w")))]
1872 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1873 [(set_attr "type" "neon_fp_mul_<stype><q>")]
1876 (define_expand "div<mode>3"
1877 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1878 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1879 (match_operand:VHSDF 2 "register_operand" "w")))]
1882 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1885 operands[1] = force_reg (<MODE>mode, operands[1]);
1888 (define_insn "*div<mode>3"
1889 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1890 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1891 (match_operand:VHSDF 2 "register_operand" "w")))]
1893 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1894 [(set_attr "type" "neon_fp_div_<stype><q>")]
1897 (define_insn "neg<mode>2"
1898 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1899 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1901 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1902 [(set_attr "type" "neon_fp_neg_<stype><q>")]
1905 (define_insn "abs<mode>2"
1906 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1907 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1909 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1910 [(set_attr "type" "neon_fp_abs_<stype><q>")]
1913 (define_insn "fma<mode>4"
1914 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1915 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1916 (match_operand:VHSDF 2 "register_operand" "w")
1917 (match_operand:VHSDF 3 "register_operand" "0")))]
1919 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1920 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1923 (define_insn "*aarch64_fma4_elt<mode>"
1924 [(set (match_operand:VDQF 0 "register_operand" "=w")
1928 (match_operand:VDQF 1 "register_operand" "<h_con>")
1929 (parallel [(match_operand:SI 2 "immediate_operand")])))
1930 (match_operand:VDQF 3 "register_operand" "w")
1931 (match_operand:VDQF 4 "register_operand" "0")))]
1934 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1935 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1937 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1940 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1941 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1943 (vec_duplicate:VDQSF
1945 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1946 (parallel [(match_operand:SI 2 "immediate_operand")])))
1947 (match_operand:VDQSF 3 "register_operand" "w")
1948 (match_operand:VDQSF 4 "register_operand" "0")))]
1951 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1952 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1954 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1957 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1958 [(set (match_operand:VMUL 0 "register_operand" "=w")
1961 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1962 (match_operand:VMUL 2 "register_operand" "w")
1963 (match_operand:VMUL 3 "register_operand" "0")))]
1965 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1966 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1969 (define_insn "*aarch64_fma4_elt_to_64v2df"
1970 [(set (match_operand:DF 0 "register_operand" "=w")
1973 (match_operand:V2DF 1 "register_operand" "w")
1974 (parallel [(match_operand:SI 2 "immediate_operand")]))
1975 (match_operand:DF 3 "register_operand" "w")
1976 (match_operand:DF 4 "register_operand" "0")))]
1979 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1980 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1982 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1985 (define_insn "fnma<mode>4"
1986 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1988 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1989 (match_operand:VHSDF 2 "register_operand" "w")
1990 (match_operand:VHSDF 3 "register_operand" "0")))]
1992 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1993 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1996 (define_insn "*aarch64_fnma4_elt<mode>"
1997 [(set (match_operand:VDQF 0 "register_operand" "=w")
2000 (match_operand:VDQF 3 "register_operand" "w"))
2003 (match_operand:VDQF 1 "register_operand" "<h_con>")
2004 (parallel [(match_operand:SI 2 "immediate_operand")])))
2005 (match_operand:VDQF 4 "register_operand" "0")))]
2008 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2009 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
2011 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2014 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
2015 [(set (match_operand:VDQSF 0 "register_operand" "=w")
2018 (match_operand:VDQSF 3 "register_operand" "w"))
2019 (vec_duplicate:VDQSF
2021 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2022 (parallel [(match_operand:SI 2 "immediate_operand")])))
2023 (match_operand:VDQSF 4 "register_operand" "0")))]
2026 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2027 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
2029 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2032 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
2033 [(set (match_operand:VMUL 0 "register_operand" "=w")
2036 (match_operand:VMUL 2 "register_operand" "w"))
2038 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2039 (match_operand:VMUL 3 "register_operand" "0")))]
2041 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2042 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2045 (define_insn "*aarch64_fnma4_elt_to_64v2df"
2046 [(set (match_operand:DF 0 "register_operand" "=w")
2049 (match_operand:V2DF 1 "register_operand" "w")
2050 (parallel [(match_operand:SI 2 "immediate_operand")]))
2052 (match_operand:DF 3 "register_operand" "w"))
2053 (match_operand:DF 4 "register_operand" "0")))]
2056 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2057 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
2059 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2062 ;; Vector versions of the floating-point frint patterns.
2063 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
2064 (define_insn "<frint_pattern><mode>2"
2065 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2066 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2069 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
2070 [(set_attr "type" "neon_fp_round_<stype><q>")]
2073 ;; Vector versions of the fcvt standard patterns.
2074 ;; Expands to lbtrunc, lround, lceil, lfloor
2075 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
2076 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2077 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2078 [(match_operand:VHSDF 1 "register_operand" "w")]
2081 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
2082 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
2085 ;; HF Scalar variants of related SIMD instructions.
2086 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
2087 [(set (match_operand:HI 0 "register_operand" "=w")
2088 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
2090 "TARGET_SIMD_F16INST"
2091 "fcvt<frint_suffix><su>\t%h0, %h1"
2092 [(set_attr "type" "neon_fp_to_int_s")]
2095 (define_insn "<optab>_trunchfhi2"
2096 [(set (match_operand:HI 0 "register_operand" "=w")
2097 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
2098 "TARGET_SIMD_F16INST"
2099 "fcvtz<su>\t%h0, %h1"
2100 [(set_attr "type" "neon_fp_to_int_s")]
2103 (define_insn "<optab>hihf2"
2104 [(set (match_operand:HF 0 "register_operand" "=w")
2105 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
2106 "TARGET_SIMD_F16INST"
2107 "<su_optab>cvtf\t%h0, %h1"
2108 [(set_attr "type" "neon_int_to_fp_s")]
2111 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
2112 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2113 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2115 (match_operand:VDQF 1 "register_operand" "w")
2116 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
2119 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
2120 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
2122 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
2124 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
2125 output_asm_insn (buf, operands);
2128 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
2131 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
2132 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2133 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2134 [(match_operand:VHSDF 1 "register_operand")]
2139 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2140 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2141 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2142 [(match_operand:VHSDF 1 "register_operand")]
2147 (define_expand "ftrunc<VHSDF:mode>2"
2148 [(set (match_operand:VHSDF 0 "register_operand")
2149 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2154 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2155 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2157 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2159 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2160 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2163 ;; Conversions between vectors of floats and doubles.
2164 ;; Contains a mix of patterns to match standard pattern names
2165 ;; and those for intrinsics.
2167 ;; Float widening operations.
2169 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2170 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2171 (float_extend:<VWIDE> (vec_select:<VHALF>
2172 (match_operand:VQ_HSF 1 "register_operand" "w")
2173 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2176 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2177 [(set_attr "type" "neon_fp_cvt_widen_s")]
2180 ;; Convert between fixed-point and floating-point (vector modes)
2182 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2183 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2184 (unspec:<VHSDF:FCVT_TARGET>
2185 [(match_operand:VHSDF 1 "register_operand" "w")
2186 (match_operand:SI 2 "immediate_operand" "i")]
2189 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2190 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2193 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2194 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2195 (unspec:<VDQ_HSDI:FCVT_TARGET>
2196 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2197 (match_operand:SI 2 "immediate_operand" "i")]
2200 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2201 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2204 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2205 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2206 ;; the meaning of HI and LO changes depending on the target endianness.
2207 ;; While elsewhere we map the higher numbered elements of a vector to
2208 ;; the lower architectural lanes of the vector, for these patterns we want
2209 ;; to always treat "hi" as referring to the higher architectural lanes.
2210 ;; Consequently, while the patterns below look inconsistent with our
2211 ;; other big-endian patterns their behavior is as required.
2213 (define_expand "vec_unpacks_lo_<mode>"
2214 [(match_operand:<VWIDE> 0 "register_operand" "")
2215 (match_operand:VQ_HSF 1 "register_operand" "")]
2218 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2219 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2225 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2226 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2227 (float_extend:<VWIDE> (vec_select:<VHALF>
2228 (match_operand:VQ_HSF 1 "register_operand" "w")
2229 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2232 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2233 [(set_attr "type" "neon_fp_cvt_widen_s")]
2236 (define_expand "vec_unpacks_hi_<mode>"
2237 [(match_operand:<VWIDE> 0 "register_operand" "")
2238 (match_operand:VQ_HSF 1 "register_operand" "")]
2241 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2242 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2247 (define_insn "aarch64_float_extend_lo_<Vwide>"
2248 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2249 (float_extend:<VWIDE>
2250 (match_operand:VDF 1 "register_operand" "w")))]
2252 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2253 [(set_attr "type" "neon_fp_cvt_widen_s")]
2256 ;; Float narrowing operations.
2258 (define_insn "aarch64_float_truncate_lo_<mode>"
2259 [(set (match_operand:VDF 0 "register_operand" "=w")
2261 (match_operand:<VWIDE> 1 "register_operand" "w")))]
2263 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2264 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2267 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2268 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2270 (match_operand:VDF 1 "register_operand" "0")
2272 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2273 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2274 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2275 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2278 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2279 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2282 (match_operand:<VWIDE> 2 "register_operand" "w"))
2283 (match_operand:VDF 1 "register_operand" "0")))]
2284 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2285 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2286 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2289 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2290 [(match_operand:<VDBL> 0 "register_operand" "=w")
2291 (match_operand:VDF 1 "register_operand" "0")
2292 (match_operand:<VWIDE> 2 "register_operand" "w")]
2295 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2296 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2297 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2298 emit_insn (gen (operands[0], operands[1], operands[2]));
2303 (define_expand "vec_pack_trunc_v2df"
2304 [(set (match_operand:V4SF 0 "register_operand")
2306 (float_truncate:V2SF
2307 (match_operand:V2DF 1 "register_operand"))
2308 (float_truncate:V2SF
2309 (match_operand:V2DF 2 "register_operand"))
2313 rtx tmp = gen_reg_rtx (V2SFmode);
2314 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2315 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2317 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2318 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2319 tmp, operands[hi]));
2324 (define_expand "vec_pack_trunc_df"
2325 [(set (match_operand:V2SF 0 "register_operand")
2328 (match_operand:DF 1 "register_operand"))
2330 (match_operand:DF 2 "register_operand"))
2334 rtx tmp = gen_reg_rtx (V2SFmode);
2335 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2336 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2338 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2339 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2340 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2346 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2348 ;; a = (b < c) ? b : c;
2349 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
2350 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
2353 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2354 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2355 ;; operand will be returned when both operands are zero (i.e. they may not
2356 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2357 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2360 (define_insn "<su><maxmin><mode>3"
2361 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2362 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2363 (match_operand:VHSDF 2 "register_operand" "w")))]
2365 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2366 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2369 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2370 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2371 ;; which implement the IEEE fmax ()/fmin () functions.
2372 (define_insn "<maxmin_uns><mode>3"
2373 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2374 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2375 (match_operand:VHSDF 2 "register_operand" "w")]
2378 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2379 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2382 ;; 'across lanes' add.
2384 (define_expand "reduc_plus_scal_<mode>"
2385 [(match_operand:<VEL> 0 "register_operand" "=w")
2386 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2390 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2391 rtx scratch = gen_reg_rtx (<MODE>mode);
2392 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2393 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2398 (define_insn "aarch64_faddp<mode>"
2399 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2400 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2401 (match_operand:VHSDF 2 "register_operand" "w")]
2404 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2405 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2408 (define_insn "aarch64_reduc_plus_internal<mode>"
2409 [(set (match_operand:VDQV 0 "register_operand" "=w")
2410 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2413 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2414 [(set_attr "type" "neon_reduc_add<q>")]
2417 (define_insn "aarch64_reduc_plus_internalv2si"
2418 [(set (match_operand:V2SI 0 "register_operand" "=w")
2419 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2422 "addp\\t%0.2s, %1.2s, %1.2s"
2423 [(set_attr "type" "neon_reduc_add")]
2426 (define_insn "reduc_plus_scal_<mode>"
2427 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2428 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2431 "faddp\\t%<Vetype>0, %1.<Vtype>"
2432 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2435 (define_expand "reduc_plus_scal_v4sf"
2436 [(set (match_operand:SF 0 "register_operand")
2437 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2441 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2442 rtx scratch = gen_reg_rtx (V4SFmode);
2443 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2444 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2445 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2449 (define_insn "clrsb<mode>2"
2450 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2451 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2453 "cls\\t%0.<Vtype>, %1.<Vtype>"
2454 [(set_attr "type" "neon_cls<q>")]
2457 (define_insn "clz<mode>2"
2458 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2459 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2461 "clz\\t%0.<Vtype>, %1.<Vtype>"
2462 [(set_attr "type" "neon_cls<q>")]
2465 (define_insn "popcount<mode>2"
2466 [(set (match_operand:VB 0 "register_operand" "=w")
2467 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2469 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2470 [(set_attr "type" "neon_cnt<q>")]
2473 ;; 'across lanes' max and min ops.
2475 ;; Template for outputting a scalar, so we can create __builtins which can be
2476 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
2477 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2478 [(match_operand:<VEL> 0 "register_operand")
2479 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2483 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2484 rtx scratch = gen_reg_rtx (<MODE>mode);
2485 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2487 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2492 ;; Likewise for integer cases, signed and unsigned.
2493 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2494 [(match_operand:<VEL> 0 "register_operand")
2495 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2499 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2500 rtx scratch = gen_reg_rtx (<MODE>mode);
2501 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2503 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2508 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2509 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2510 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2513 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2514 [(set_attr "type" "neon_reduc_minmax<q>")]
2517 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2518 [(set (match_operand:V2SI 0 "register_operand" "=w")
2519 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2522 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2523 [(set_attr "type" "neon_reduc_minmax")]
2526 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2527 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2528 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2531 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2532 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2535 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2537 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2540 ;; Thus our BSL is of the form:
2541 ;; op0 = bsl (mask, op2, op3)
2542 ;; We can use any of:
2545 ;; bsl mask, op1, op2
2546 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2547 ;; bit op0, op2, mask
2548 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2549 ;; bif op0, op1, mask
2551 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2552 ;; Some forms of straight-line code may generate the equivalent form
2553 ;; in *aarch64_simd_bsl<mode>_alt.
2555 (define_insn "aarch64_simd_bsl<mode>_internal"
2556 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2560 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2561 (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2562 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2563 (match_dup:<V_INT_EQUIV> 3)
2567 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2568 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2569 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2570 [(set_attr "type" "neon_bsl<q>")]
2573 ;; We need this form in addition to the above pattern to match the case
2574 ;; when combine tries merging three insns such that the second operand of
2575 ;; the outer XOR matches the second operand of the inner XOR rather than
2576 ;; the first. The two are equivalent but since recog doesn't try all
2577 ;; permutations of commutative operations, we have to have a separate pattern.
2579 (define_insn "*aarch64_simd_bsl<mode>_alt"
2580 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2584 (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2585 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2586 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2587 (match_dup:<V_INT_EQUIV> 2)))]
2590 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2591 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2592 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2593 [(set_attr "type" "neon_bsl<q>")]
2596 ;; DImode is special, we want to avoid computing operations which are
2597 ;; more naturally computed in general purpose registers in the vector
2598 ;; registers. If we do that, we need to move all three operands from general
2599 ;; purpose registers to vector registers, then back again. However, we
2600 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2601 ;; optimizations based on the component operations of a BSL.
2603 ;; That means we need a splitter back to the individual operations, if they
2604 ;; would be better calculated on the integer side.
2606 (define_insn_and_split "aarch64_simd_bsldi_internal"
2607 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2611 (match_operand:DI 3 "register_operand" "w,0,w,r")
2612 (match_operand:DI 2 "register_operand" "w,w,0,r"))
2613 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2618 bsl\\t%0.8b, %2.8b, %3.8b
2619 bit\\t%0.8b, %2.8b, %1.8b
2620 bif\\t%0.8b, %3.8b, %1.8b
2622 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2623 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2625 /* Split back to individual operations. If we're before reload, and
2626 able to create a temporary register, do so. If we're after reload,
2627 we've got an early-clobber destination register, so use that.
2628 Otherwise, we can't create pseudos and we can't yet guarantee that
2629 operands[0] is safe to write, so FAIL to split. */
2632 if (reload_completed)
2633 scratch = operands[0];
2634 else if (can_create_pseudo_p ())
2635 scratch = gen_reg_rtx (DImode);
2639 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2640 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2641 emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2644 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2645 (set_attr "length" "4,4,4,12")]
2648 (define_insn_and_split "aarch64_simd_bsldi_alt"
2649 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2653 (match_operand:DI 3 "register_operand" "w,w,0,r")
2654 (match_operand:DI 2 "register_operand" "w,0,w,r"))
2655 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2660 bsl\\t%0.8b, %3.8b, %2.8b
2661 bit\\t%0.8b, %3.8b, %1.8b
2662 bif\\t%0.8b, %2.8b, %1.8b
2664 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2665 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2667 /* Split back to individual operations. If we're before reload, and
2668 able to create a temporary register, do so. If we're after reload,
2669 we've got an early-clobber destination register, so use that.
2670 Otherwise, we can't create pseudos and we can't yet guarantee that
2671 operands[0] is safe to write, so FAIL to split. */
2674 if (reload_completed)
2675 scratch = operands[0];
2676 else if (can_create_pseudo_p ())
2677 scratch = gen_reg_rtx (DImode);
2681 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2682 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2683 emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2686 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2687 (set_attr "length" "4,4,4,12")]
2690 (define_expand "aarch64_simd_bsl<mode>"
2691 [(match_operand:VALLDIF 0 "register_operand")
2692 (match_operand:<V_INT_EQUIV> 1 "register_operand")
2693 (match_operand:VALLDIF 2 "register_operand")
2694 (match_operand:VALLDIF 3 "register_operand")]
2697 /* We can't alias operands together if they have different modes. */
2698 rtx tmp = operands[0];
2699 if (FLOAT_MODE_P (<MODE>mode))
2701 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2702 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2703 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2705 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2706 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2710 if (tmp != operands[0])
2711 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2716 (define_expand "vcond_mask_<mode><v_int_equiv>"
2717 [(match_operand:VALLDI 0 "register_operand")
2718 (match_operand:VALLDI 1 "nonmemory_operand")
2719 (match_operand:VALLDI 2 "nonmemory_operand")
2720 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2723 /* If we have (a = (P) ? -1 : 0);
2724 Then we can simply move the generated mask (result must be int). */
2725 if (operands[1] == CONSTM1_RTX (<MODE>mode)
2726 && operands[2] == CONST0_RTX (<MODE>mode))
2727 emit_move_insn (operands[0], operands[3]);
2728 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
2729 else if (operands[1] == CONST0_RTX (<MODE>mode)
2730 && operands[2] == CONSTM1_RTX (<MODE>mode))
2731 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2734 if (!REG_P (operands[1]))
2735 operands[1] = force_reg (<MODE>mode, operands[1]);
2736 if (!REG_P (operands[2]))
2737 operands[2] = force_reg (<MODE>mode, operands[2]);
2738 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2739 operands[1], operands[2]));
2745 ;; Patterns comparing two vectors to produce a mask.
2747 (define_expand "vec_cmp<mode><mode>"
2748 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2749 (match_operator 1 "comparison_operator"
2750 [(match_operand:VSDQ_I_DI 2 "register_operand")
2751 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2754 rtx mask = operands[0];
2755 enum rtx_code code = GET_CODE (operands[1]);
2765 if (operands[3] == CONST0_RTX (<MODE>mode))
2770 if (!REG_P (operands[3]))
2771 operands[3] = force_reg (<MODE>mode, operands[3]);
2779 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2783 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2787 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2791 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2795 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2799 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2803 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2807 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2811 /* Handle NE as !EQ. */
2812 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2813 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2817 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2827 (define_expand "vec_cmp<mode><v_int_equiv>"
2828 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2829 (match_operator 1 "comparison_operator"
2830 [(match_operand:VDQF 2 "register_operand")
2831 (match_operand:VDQF 3 "nonmemory_operand")]))]
2834 int use_zero_form = 0;
2835 enum rtx_code code = GET_CODE (operands[1]);
2836 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2838 rtx (*comparison) (rtx, rtx, rtx) = NULL;
2847 if (operands[3] == CONST0_RTX (<MODE>mode))
2854 if (!REG_P (operands[3]))
2855 operands[3] = force_reg (<MODE>mode, operands[3]);
2865 comparison = gen_aarch64_cmlt<mode>;
2870 std::swap (operands[2], operands[3]);
2874 comparison = gen_aarch64_cmgt<mode>;
2879 comparison = gen_aarch64_cmle<mode>;
2884 std::swap (operands[2], operands[3]);
2888 comparison = gen_aarch64_cmge<mode>;
2892 comparison = gen_aarch64_cmeq<mode>;
2910 /* All of the above must not raise any FP exceptions. Thus we first
2911 check each operand for NaNs and force any elements containing NaN to
2912 zero before using them in the compare.
2913 Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2914 (cm<cc> (isnan (a) ? 0.0 : a,
2915 isnan (b) ? 0.0 : b))
2916 We use the following transformations for doing the comparisions:
2920 a UNLT b -> b GT a. */
2922 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2923 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2924 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2925 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2926 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2927 emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2928 emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2929 lowpart_subreg (<V_INT_EQUIV>mode,
2932 emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2933 lowpart_subreg (<V_INT_EQUIV>mode,
2936 gcc_assert (comparison != NULL);
2937 emit_insn (comparison (operands[0],
2938 lowpart_subreg (<MODE>mode,
2939 tmp0, <V_INT_EQUIV>mode),
2940 lowpart_subreg (<MODE>mode,
2941 tmp1, <V_INT_EQUIV>mode)));
2942 emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2952 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2953 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2959 a NE b -> ~(a EQ b) */
2960 gcc_assert (comparison != NULL);
2961 emit_insn (comparison (operands[0], operands[2], operands[3]));
2963 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2967 /* LTGT is not guranteed to not generate a FP exception. So let's
2968 go the faster way : ((a > b) || (b > a)). */
2969 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2970 operands[2], operands[3]));
2971 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2972 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2978 /* cmeq (a, a) & cmeq (b, b). */
2979 emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2980 operands[2], operands[2]));
2981 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2982 emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2984 if (code == UNORDERED)
2985 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2986 else if (code == UNEQ)
2988 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
2989 emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
3000 (define_expand "vec_cmpu<mode><mode>"
3001 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3002 (match_operator 1 "comparison_operator"
3003 [(match_operand:VSDQ_I_DI 2 "register_operand")
3004 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
3007 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
3008 operands[2], operands[3]));
3012 (define_expand "vcond<mode><mode>"
3013 [(set (match_operand:VALLDI 0 "register_operand")
3014 (if_then_else:VALLDI
3015 (match_operator 3 "comparison_operator"
3016 [(match_operand:VALLDI 4 "register_operand")
3017 (match_operand:VALLDI 5 "nonmemory_operand")])
3018 (match_operand:VALLDI 1 "nonmemory_operand")
3019 (match_operand:VALLDI 2 "nonmemory_operand")))]
3022 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3023 enum rtx_code code = GET_CODE (operands[3]);
3025 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3026 it as well as switch operands 1/2 in order to avoid the additional
3030 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3031 operands[4], operands[5]);
3032 std::swap (operands[1], operands[2]);
3034 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3035 operands[4], operands[5]));
3036 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3037 operands[2], mask));
3042 (define_expand "vcond<v_cmp_mixed><mode>"
3043 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
3044 (if_then_else:<V_cmp_mixed>
3045 (match_operator 3 "comparison_operator"
3046 [(match_operand:VDQF_COND 4 "register_operand")
3047 (match_operand:VDQF_COND 5 "nonmemory_operand")])
3048 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
3049 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
3052 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3053 enum rtx_code code = GET_CODE (operands[3]);
3055 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3056 it as well as switch operands 1/2 in order to avoid the additional
3060 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3061 operands[4], operands[5]);
3062 std::swap (operands[1], operands[2]);
3064 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3065 operands[4], operands[5]));
3066 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
3067 operands[0], operands[1],
3068 operands[2], mask));
3073 (define_expand "vcondu<mode><mode>"
3074 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3075 (if_then_else:VSDQ_I_DI
3076 (match_operator 3 "comparison_operator"
3077 [(match_operand:VSDQ_I_DI 4 "register_operand")
3078 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
3079 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
3080 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
3083 rtx mask = gen_reg_rtx (<MODE>mode);
3084 enum rtx_code code = GET_CODE (operands[3]);
3086 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3087 it as well as switch operands 1/2 in order to avoid the additional
3091 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3092 operands[4], operands[5]);
3093 std::swap (operands[1], operands[2]);
3095 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
3096 operands[4], operands[5]));
3097 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3098 operands[2], mask));
3102 (define_expand "vcondu<mode><v_cmp_mixed>"
3103 [(set (match_operand:VDQF 0 "register_operand")
3105 (match_operator 3 "comparison_operator"
3106 [(match_operand:<V_cmp_mixed> 4 "register_operand")
3107 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
3108 (match_operand:VDQF 1 "nonmemory_operand")
3109 (match_operand:VDQF 2 "nonmemory_operand")))]
3112 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3113 enum rtx_code code = GET_CODE (operands[3]);
3115 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3116 it as well as switch operands 1/2 in order to avoid the additional
3120 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3121 operands[4], operands[5]);
3122 std::swap (operands[1], operands[2]);
3124 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
3126 operands[4], operands[5]));
3127 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3128 operands[2], mask));
3132 ;; Patterns for AArch64 SIMD Intrinsics.
3134 ;; Lane extraction with sign extension to general purpose register.
3135 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3136 [(set (match_operand:GPI 0 "register_operand" "=r")
3139 (match_operand:VDQQH 1 "register_operand" "w")
3140 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3143 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3144 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3146 [(set_attr "type" "neon_to_gp<q>")]
3149 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
3150 [(set (match_operand:GPI 0 "register_operand" "=r")
3153 (match_operand:VDQQH 1 "register_operand" "w")
3154 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3157 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3158 INTVAL (operands[2]));
3159 return "umov\\t%w0, %1.<Vetype>[%2]";
3161 [(set_attr "type" "neon_to_gp<q>")]
3164 ;; Lane extraction of a value, neither sign nor zero extension
3165 ;; is guaranteed so upper bits should be considered undefined.
3166 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3167 (define_insn "aarch64_get_lane<mode>"
3168 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3170 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3171 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3174 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3175 switch (which_alternative)
3178 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3180 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3182 return "st1\\t{%1.<Vetype>}[%2], %0";
3187 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3190 (define_insn "load_pair_lanes<mode>"
3191 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3193 (match_operand:VDC 1 "memory_operand" "Utq")
3194 (match_operand:VDC 2 "memory_operand" "m")))]
3195 "TARGET_SIMD && !STRICT_ALIGNMENT
3196 && rtx_equal_p (XEXP (operands[2], 0),
3197 plus_constant (Pmode,
3198 XEXP (operands[1], 0),
3199 GET_MODE_SIZE (<MODE>mode)))"
3201 [(set_attr "type" "neon_load1_1reg_q")]
3204 (define_insn "store_pair_lanes<mode>"
3205 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
3207 (match_operand:VDC 1 "register_operand" "w, r")
3208 (match_operand:VDC 2 "register_operand" "w, r")))]
3212 stp\\t%x1, %x2, %y0"
3213 [(set_attr "type" "neon_stp, store_16")]
3216 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3219 (define_insn "@aarch64_combinez<mode>"
3220 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3222 (match_operand:VDC 1 "general_operand" "w,?r,m")
3223 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3224 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3229 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3230 (set_attr "arch" "simd,fp,simd")]
3233 (define_insn "@aarch64_combinez_be<mode>"
3234 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3236 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3237 (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3238 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3243 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3244 (set_attr "arch" "simd,fp,simd")]
3247 (define_expand "aarch64_combine<mode>"
3248 [(match_operand:<VDBL> 0 "register_operand")
3249 (match_operand:VDC 1 "register_operand")
3250 (match_operand:VDC 2 "register_operand")]
3253 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3259 (define_expand "@aarch64_simd_combine<mode>"
3260 [(match_operand:<VDBL> 0 "register_operand")
3261 (match_operand:VDC 1 "register_operand")
3262 (match_operand:VDC 2 "register_operand")]
3265 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3266 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3269 [(set_attr "type" "multiple")]
3272 ;; <su><addsub>l<q>.
3274 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3275 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3276 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3277 (match_operand:VQW 1 "register_operand" "w")
3278 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3279 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3280 (match_operand:VQW 2 "register_operand" "w")
3283 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3284 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3287 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3288 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3289 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3290 (match_operand:VQW 1 "register_operand" "w")
3291 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3292 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3293 (match_operand:VQW 2 "register_operand" "w")
3296 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3297 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3301 (define_expand "aarch64_saddl2<mode>"
3302 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3303 (match_operand:VQW 1 "register_operand" "w")
3304 (match_operand:VQW 2 "register_operand" "w")]
3307 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3308 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3313 (define_expand "aarch64_uaddl2<mode>"
3314 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3315 (match_operand:VQW 1 "register_operand" "w")
3316 (match_operand:VQW 2 "register_operand" "w")]
3319 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3320 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3325 (define_expand "aarch64_ssubl2<mode>"
3326 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3327 (match_operand:VQW 1 "register_operand" "w")
3328 (match_operand:VQW 2 "register_operand" "w")]
3331 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3332 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3337 (define_expand "aarch64_usubl2<mode>"
3338 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3339 (match_operand:VQW 1 "register_operand" "w")
3340 (match_operand:VQW 2 "register_operand" "w")]
3343 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3344 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3349 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3350 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3351 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3352 (match_operand:VD_BHSI 1 "register_operand" "w"))
3354 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3356 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3357 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3360 ;; <su><addsub>w<q>.
3362 (define_expand "widen_ssum<mode>3"
3363 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3364 (plus:<VDBLW> (sign_extend:<VDBLW>
3365 (match_operand:VQW 1 "register_operand" ""))
3366 (match_operand:<VDBLW> 2 "register_operand" "")))]
3369 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3370 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3372 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3374 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3379 (define_expand "widen_ssum<mode>3"
3380 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3381 (plus:<VWIDE> (sign_extend:<VWIDE>
3382 (match_operand:VD_BHSI 1 "register_operand" ""))
3383 (match_operand:<VWIDE> 2 "register_operand" "")))]
3386 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3390 (define_expand "widen_usum<mode>3"
3391 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3392 (plus:<VDBLW> (zero_extend:<VDBLW>
3393 (match_operand:VQW 1 "register_operand" ""))
3394 (match_operand:<VDBLW> 2 "register_operand" "")))]
3397 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3398 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3400 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3402 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3407 (define_expand "widen_usum<mode>3"
3408 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3409 (plus:<VWIDE> (zero_extend:<VWIDE>
3410 (match_operand:VD_BHSI 1 "register_operand" ""))
3411 (match_operand:<VWIDE> 2 "register_operand" "")))]
3414 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3418 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
3419 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3420 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3422 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3424 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3425 [(set_attr "type" "neon_sub_widen")]
3428 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
3429 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3430 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3433 (match_operand:VQW 2 "register_operand" "w")
3434 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3436 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3437 [(set_attr "type" "neon_sub_widen")]
3440 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
3441 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3442 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3445 (match_operand:VQW 2 "register_operand" "w")
3446 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3448 "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3449 [(set_attr "type" "neon_sub_widen")]
3452 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
3453 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3455 (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
3456 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3458 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3459 [(set_attr "type" "neon_add_widen")]
3462 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
3463 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3467 (match_operand:VQW 2 "register_operand" "w")
3468 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3469 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3471 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3472 [(set_attr "type" "neon_add_widen")]
3475 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
3476 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3480 (match_operand:VQW 2 "register_operand" "w")
3481 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3482 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3484 "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3485 [(set_attr "type" "neon_add_widen")]
3488 (define_expand "aarch64_saddw2<mode>"
3489 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3490 (match_operand:<VWIDE> 1 "register_operand" "w")
3491 (match_operand:VQW 2 "register_operand" "w")]
3494 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3495 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3500 (define_expand "aarch64_uaddw2<mode>"
3501 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3502 (match_operand:<VWIDE> 1 "register_operand" "w")
3503 (match_operand:VQW 2 "register_operand" "w")]
3506 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3507 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3513 (define_expand "aarch64_ssubw2<mode>"
3514 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3515 (match_operand:<VWIDE> 1 "register_operand" "w")
3516 (match_operand:VQW 2 "register_operand" "w")]
3519 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3520 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3525 (define_expand "aarch64_usubw2<mode>"
3526 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3527 (match_operand:<VWIDE> 1 "register_operand" "w")
3528 (match_operand:VQW 2 "register_operand" "w")]
3531 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3532 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3537 ;; <su><r>h<addsub>.
3539 (define_expand "<u>avg<mode>3_floor"
3540 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3541 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3542 (match_operand:VDQ_BHSI 2 "register_operand")]
3547 (define_expand "<u>avg<mode>3_ceil"
3548 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3549 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3550 (match_operand:VDQ_BHSI 2 "register_operand")]
3555 (define_insn "aarch64_<sur>h<addsub><mode>"
3556 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3557 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3558 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3561 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3562 [(set_attr "type" "neon_<addsub>_halve<q>")]
3565 ;; <r><addsub>hn<q>.
3567 (define_insn "aarch64_<sur><addsub>hn<mode>"
3568 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3569 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3570 (match_operand:VQN 2 "register_operand" "w")]
3573 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3574 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3577 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3578 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3579 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3580 (match_operand:VQN 2 "register_operand" "w")
3581 (match_operand:VQN 3 "register_operand" "w")]
3584 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3585 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3590 (define_insn "aarch64_pmul<mode>"
3591 [(set (match_operand:VB 0 "register_operand" "=w")
3592 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3593 (match_operand:VB 2 "register_operand" "w")]
3596 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3597 [(set_attr "type" "neon_mul_<Vetype><q>")]
3602 (define_insn "aarch64_fmulx<mode>"
3603 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3605 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3606 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3609 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3610 [(set_attr "type" "neon_fp_mul_<stype>")]
3613 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3615 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3616 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3618 [(match_operand:VDQSF 1 "register_operand" "w")
3619 (vec_duplicate:VDQSF
3621 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3622 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3626 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3627 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3629 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3632 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3634 (define_insn "*aarch64_mulx_elt<mode>"
3635 [(set (match_operand:VDQF 0 "register_operand" "=w")
3637 [(match_operand:VDQF 1 "register_operand" "w")
3640 (match_operand:VDQF 2 "register_operand" "w")
3641 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3645 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3646 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3648 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3653 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3654 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3656 [(match_operand:VHSDF 1 "register_operand" "w")
3657 (vec_duplicate:VHSDF
3658 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3661 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3662 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3665 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3666 ;; vmulxd_lane_f64 == vmulx_lane_f64
3667 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3669 (define_insn "*aarch64_vgetfmulx<mode>"
3670 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3672 [(match_operand:<VEL> 1 "register_operand" "w")
3674 (match_operand:VDQF 2 "register_operand" "w")
3675 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3679 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3680 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3682 [(set_attr "type" "fmul<Vetype>")]
3686 (define_insn "aarch64_<su_optab><optab><mode>"
3687 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3688 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3689 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3691 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3692 [(set_attr "type" "neon_<optab><q>")]
3695 ;; suqadd and usqadd
3697 (define_insn "aarch64_<sur>qadd<mode>"
3698 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3699 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3700 (match_operand:VSDQ_I 2 "register_operand" "w")]
3703 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3704 [(set_attr "type" "neon_qadd<q>")]
3709 (define_insn "aarch64_sqmovun<mode>"
3710 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3711 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3714 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3715 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3718 ;; sqmovn and uqmovn
3720 (define_insn "aarch64_<sur>qmovn<mode>"
3721 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3722 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3725 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3726 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3731 (define_insn "aarch64_s<optab><mode>"
3732 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3734 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3736 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3737 [(set_attr "type" "neon_<optab><q>")]
3742 (define_insn "aarch64_sq<r>dmulh<mode>"
3743 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3745 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3746 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3749 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3750 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3755 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3756 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3758 [(match_operand:VDQHS 1 "register_operand" "w")
3760 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3761 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3765 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3766 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3767 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3770 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3771 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3773 [(match_operand:VDQHS 1 "register_operand" "w")
3775 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3776 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3780 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3781 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3782 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3785 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3786 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3788 [(match_operand:SD_HSI 1 "register_operand" "w")
3790 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3791 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3795 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3796 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3797 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3800 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3801 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3803 [(match_operand:SD_HSI 1 "register_operand" "w")
3805 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3806 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3810 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3811 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3812 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3817 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3818 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3820 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3821 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3822 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3825 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3826 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3829 ;; sqrdml[as]h_lane.
3831 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3832 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3834 [(match_operand:VDQHS 1 "register_operand" "0")
3835 (match_operand:VDQHS 2 "register_operand" "w")
3837 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3838 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3842 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3844 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3846 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3849 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3850 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3852 [(match_operand:SD_HSI 1 "register_operand" "0")
3853 (match_operand:SD_HSI 2 "register_operand" "w")
3855 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3856 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3860 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3862 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3864 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3867 ;; sqrdml[as]h_laneq.
3869 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3870 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3872 [(match_operand:VDQHS 1 "register_operand" "0")
3873 (match_operand:VDQHS 2 "register_operand" "w")
3875 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3876 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3880 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3882 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3884 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3887 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3888 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3890 [(match_operand:SD_HSI 1 "register_operand" "0")
3891 (match_operand:SD_HSI 2 "register_operand" "w")
3893 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3894 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3898 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3900 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3902 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3907 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3908 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3910 (match_operand:<VWIDE> 1 "register_operand" "0")
3913 (sign_extend:<VWIDE>
3914 (match_operand:VSD_HSI 2 "register_operand" "w"))
3915 (sign_extend:<VWIDE>
3916 (match_operand:VSD_HSI 3 "register_operand" "w")))
3919 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3920 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3925 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3926 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3928 (match_operand:<VWIDE> 1 "register_operand" "0")
3931 (sign_extend:<VWIDE>
3932 (match_operand:VD_HSI 2 "register_operand" "w"))
3933 (sign_extend:<VWIDE>
3934 (vec_duplicate:VD_HSI
3936 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3937 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3942 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3944 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3946 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3949 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3950 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3952 (match_operand:<VWIDE> 1 "register_operand" "0")
3955 (sign_extend:<VWIDE>
3956 (match_operand:VD_HSI 2 "register_operand" "w"))
3957 (sign_extend:<VWIDE>
3958 (vec_duplicate:VD_HSI
3960 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3961 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3966 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3968 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3970 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3973 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3974 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3976 (match_operand:<VWIDE> 1 "register_operand" "0")
3979 (sign_extend:<VWIDE>
3980 (match_operand:SD_HSI 2 "register_operand" "w"))
3981 (sign_extend:<VWIDE>
3983 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3984 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3989 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3991 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3993 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3996 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3997 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3999 (match_operand:<VWIDE> 1 "register_operand" "0")
4002 (sign_extend:<VWIDE>
4003 (match_operand:SD_HSI 2 "register_operand" "w"))
4004 (sign_extend:<VWIDE>
4006 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4007 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4012 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4014 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4016 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4021 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
4022 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4024 (match_operand:<VWIDE> 1 "register_operand" "0")
4027 (sign_extend:<VWIDE>
4028 (match_operand:VD_HSI 2 "register_operand" "w"))
4029 (sign_extend:<VWIDE>
4030 (vec_duplicate:VD_HSI
4031 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4034 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4035 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4040 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
4041 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4043 (match_operand:<VWIDE> 1 "register_operand" "0")
4046 (sign_extend:<VWIDE>
4048 (match_operand:VQ_HSI 2 "register_operand" "w")
4049 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4050 (sign_extend:<VWIDE>
4052 (match_operand:VQ_HSI 3 "register_operand" "w")
4056 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4057 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4060 (define_expand "aarch64_sqdmlal2<mode>"
4061 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4062 (match_operand:<VWIDE> 1 "register_operand" "w")
4063 (match_operand:VQ_HSI 2 "register_operand" "w")
4064 (match_operand:VQ_HSI 3 "register_operand" "w")]
4067 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4068 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
4069 operands[2], operands[3], p));
4073 (define_expand "aarch64_sqdmlsl2<mode>"
4074 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4075 (match_operand:<VWIDE> 1 "register_operand" "w")
4076 (match_operand:VQ_HSI 2 "register_operand" "w")
4077 (match_operand:VQ_HSI 3 "register_operand" "w")]
4080 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4081 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
4082 operands[2], operands[3], p));
4088 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
4089 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4091 (match_operand:<VWIDE> 1 "register_operand" "0")
4094 (sign_extend:<VWIDE>
4096 (match_operand:VQ_HSI 2 "register_operand" "w")
4097 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4098 (sign_extend:<VWIDE>
4099 (vec_duplicate:<VHALF>
4101 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4102 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4107 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4109 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4111 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4114 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
4115 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4117 (match_operand:<VWIDE> 1 "register_operand" "0")
4120 (sign_extend:<VWIDE>
4122 (match_operand:VQ_HSI 2 "register_operand" "w")
4123 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4124 (sign_extend:<VWIDE>
4125 (vec_duplicate:<VHALF>
4127 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4128 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4133 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4135 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4137 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4140 (define_expand "aarch64_sqdmlal2_lane<mode>"
4141 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4142 (match_operand:<VWIDE> 1 "register_operand" "w")
4143 (match_operand:VQ_HSI 2 "register_operand" "w")
4144 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4145 (match_operand:SI 4 "immediate_operand" "i")]
4148 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4149 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
4150 operands[2], operands[3],
4155 (define_expand "aarch64_sqdmlal2_laneq<mode>"
4156 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4157 (match_operand:<VWIDE> 1 "register_operand" "w")
4158 (match_operand:VQ_HSI 2 "register_operand" "w")
4159 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4160 (match_operand:SI 4 "immediate_operand" "i")]
4163 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4164 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
4165 operands[2], operands[3],
4170 (define_expand "aarch64_sqdmlsl2_lane<mode>"
4171 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4172 (match_operand:<VWIDE> 1 "register_operand" "w")
4173 (match_operand:VQ_HSI 2 "register_operand" "w")
4174 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4175 (match_operand:SI 4 "immediate_operand" "i")]
4178 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4179 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
4180 operands[2], operands[3],
4185 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
4186 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4187 (match_operand:<VWIDE> 1 "register_operand" "w")
4188 (match_operand:VQ_HSI 2 "register_operand" "w")
4189 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4190 (match_operand:SI 4 "immediate_operand" "i")]
4193 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4194 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4195 operands[2], operands[3],
4200 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4201 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4203 (match_operand:<VWIDE> 1 "register_operand" "0")
4206 (sign_extend:<VWIDE>
4208 (match_operand:VQ_HSI 2 "register_operand" "w")
4209 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4210 (sign_extend:<VWIDE>
4211 (vec_duplicate:<VHALF>
4212 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4215 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4216 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4219 (define_expand "aarch64_sqdmlal2_n<mode>"
4220 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4221 (match_operand:<VWIDE> 1 "register_operand" "w")
4222 (match_operand:VQ_HSI 2 "register_operand" "w")
4223 (match_operand:<VEL> 3 "register_operand" "w")]
4226 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4227 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4228 operands[2], operands[3],
4233 (define_expand "aarch64_sqdmlsl2_n<mode>"
4234 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4235 (match_operand:<VWIDE> 1 "register_operand" "w")
4236 (match_operand:VQ_HSI 2 "register_operand" "w")
4237 (match_operand:<VEL> 3 "register_operand" "w")]
4240 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4241 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4242 operands[2], operands[3],
4249 (define_insn "aarch64_sqdmull<mode>"
4250 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4253 (sign_extend:<VWIDE>
4254 (match_operand:VSD_HSI 1 "register_operand" "w"))
4255 (sign_extend:<VWIDE>
4256 (match_operand:VSD_HSI 2 "register_operand" "w")))
4259 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4260 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4265 (define_insn "aarch64_sqdmull_lane<mode>"
4266 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4269 (sign_extend:<VWIDE>
4270 (match_operand:VD_HSI 1 "register_operand" "w"))
4271 (sign_extend:<VWIDE>
4272 (vec_duplicate:VD_HSI
4274 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4275 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4280 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4281 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4283 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4286 (define_insn "aarch64_sqdmull_laneq<mode>"
4287 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4290 (sign_extend:<VWIDE>
4291 (match_operand:VD_HSI 1 "register_operand" "w"))
4292 (sign_extend:<VWIDE>
4293 (vec_duplicate:VD_HSI
4295 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4296 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4301 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4302 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4304 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4307 (define_insn "aarch64_sqdmull_lane<mode>"
4308 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4311 (sign_extend:<VWIDE>
4312 (match_operand:SD_HSI 1 "register_operand" "w"))
4313 (sign_extend:<VWIDE>
4315 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4316 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4321 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4322 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4324 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4327 (define_insn "aarch64_sqdmull_laneq<mode>"
4328 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4331 (sign_extend:<VWIDE>
4332 (match_operand:SD_HSI 1 "register_operand" "w"))
4333 (sign_extend:<VWIDE>
4335 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4336 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4341 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4342 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4344 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4349 (define_insn "aarch64_sqdmull_n<mode>"
4350 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4353 (sign_extend:<VWIDE>
4354 (match_operand:VD_HSI 1 "register_operand" "w"))
4355 (sign_extend:<VWIDE>
4356 (vec_duplicate:VD_HSI
4357 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4361 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4362 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4369 (define_insn "aarch64_sqdmull2<mode>_internal"
4370 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4373 (sign_extend:<VWIDE>
4375 (match_operand:VQ_HSI 1 "register_operand" "w")
4376 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4377 (sign_extend:<VWIDE>
4379 (match_operand:VQ_HSI 2 "register_operand" "w")
4384 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4385 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4388 (define_expand "aarch64_sqdmull2<mode>"
4389 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4390 (match_operand:VQ_HSI 1 "register_operand" "w")
4391 (match_operand:VQ_HSI 2 "register_operand" "w")]
4394 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4395 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4402 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4403 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4406 (sign_extend:<VWIDE>
4408 (match_operand:VQ_HSI 1 "register_operand" "w")
4409 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4410 (sign_extend:<VWIDE>
4411 (vec_duplicate:<VHALF>
4413 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4414 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4419 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4420 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4422 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4425 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4426 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4429 (sign_extend:<VWIDE>
4431 (match_operand:VQ_HSI 1 "register_operand" "w")
4432 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4433 (sign_extend:<VWIDE>
4434 (vec_duplicate:<VHALF>
4436 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4437 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4442 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4443 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4445 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4448 (define_expand "aarch64_sqdmull2_lane<mode>"
4449 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4450 (match_operand:VQ_HSI 1 "register_operand" "w")
4451 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4452 (match_operand:SI 3 "immediate_operand" "i")]
4455 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4456 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4457 operands[2], operands[3],
4462 (define_expand "aarch64_sqdmull2_laneq<mode>"
4463 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4464 (match_operand:VQ_HSI 1 "register_operand" "w")
4465 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4466 (match_operand:SI 3 "immediate_operand" "i")]
4469 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4470 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4471 operands[2], operands[3],
4478 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4479 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4482 (sign_extend:<VWIDE>
4484 (match_operand:VQ_HSI 1 "register_operand" "w")
4485 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4486 (sign_extend:<VWIDE>
4487 (vec_duplicate:<VHALF>
4488 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4492 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4493 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4496 (define_expand "aarch64_sqdmull2_n<mode>"
4497 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4498 (match_operand:VQ_HSI 1 "register_operand" "w")
4499 (match_operand:<VEL> 2 "register_operand" "w")]
4502 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4503 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4510 (define_insn "aarch64_<sur>shl<mode>"
4511 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4513 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4514 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4517 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4518 [(set_attr "type" "neon_shift_reg<q>")]
4524 (define_insn "aarch64_<sur>q<r>shl<mode>"
4525 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4527 [(match_operand:VSDQ_I 1 "register_operand" "w")
4528 (match_operand:VSDQ_I 2 "register_operand" "w")]
4531 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4532 [(set_attr "type" "neon_sat_shift_reg<q>")]
4537 (define_insn "aarch64_<sur>shll_n<mode>"
4538 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4539 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4541 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4545 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4546 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4548 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4550 [(set_attr "type" "neon_shift_imm_long")]
4555 (define_insn "aarch64_<sur>shll2_n<mode>"
4556 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4557 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4558 (match_operand:SI 2 "immediate_operand" "i")]
4562 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4563 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4565 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4567 [(set_attr "type" "neon_shift_imm_long")]
4572 (define_insn "aarch64_<sur>shr_n<mode>"
4573 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4574 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4576 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4579 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4580 [(set_attr "type" "neon_sat_shift_imm<q>")]
4585 (define_insn "aarch64_<sur>sra_n<mode>"
4586 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4587 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4588 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4590 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4593 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4594 [(set_attr "type" "neon_shift_acc<q>")]
4599 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4600 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4601 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4602 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4604 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4607 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4608 [(set_attr "type" "neon_shift_imm<q>")]
4613 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4614 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4615 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4617 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4620 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4621 [(set_attr "type" "neon_sat_shift_imm<q>")]
4627 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4628 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4629 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4631 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4634 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4635 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4639 ;; cm(eq|ge|gt|lt|le)
4640 ;; Note, we have constraints for Dz and Z as different expanders
4641 ;; have different ideas of what should be passed to this pattern.
4643 (define_insn "aarch64_cm<optab><mode>"
4644 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4646 (COMPARISONS:<V_INT_EQUIV>
4647 (match_operand:VDQ_I 1 "register_operand" "w,w")
4648 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4652 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4653 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4654 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4657 (define_insn_and_split "aarch64_cm<optab>di"
4658 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4661 (match_operand:DI 1 "register_operand" "w,w,r")
4662 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4664 (clobber (reg:CC CC_REGNUM))]
4667 "&& reload_completed"
4668 [(set (match_operand:DI 0 "register_operand")
4671 (match_operand:DI 1 "register_operand")
4672 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4675 /* If we are in the general purpose register file,
4676 we split to a sequence of comparison and store. */
4677 if (GP_REGNUM_P (REGNO (operands[0]))
4678 && GP_REGNUM_P (REGNO (operands[1])))
4680 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4681 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4682 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4683 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4686 /* Otherwise, we expand to a similar pattern which does not
4687 clobber CC_REGNUM. */
4689 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4692 (define_insn "*aarch64_cm<optab>di"
4693 [(set (match_operand:DI 0 "register_operand" "=w,w")
4696 (match_operand:DI 1 "register_operand" "w,w")
4697 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4699 "TARGET_SIMD && reload_completed"
4701 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4702 cm<optab>\t%d0, %d1, #0"
4703 [(set_attr "type" "neon_compare, neon_compare_zero")]
4708 (define_insn "aarch64_cm<optab><mode>"
4709 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4711 (UCOMPARISONS:<V_INT_EQUIV>
4712 (match_operand:VDQ_I 1 "register_operand" "w")
4713 (match_operand:VDQ_I 2 "register_operand" "w")
4716 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4717 [(set_attr "type" "neon_compare<q>")]
4720 (define_insn_and_split "aarch64_cm<optab>di"
4721 [(set (match_operand:DI 0 "register_operand" "=w,r")
4724 (match_operand:DI 1 "register_operand" "w,r")
4725 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4727 (clobber (reg:CC CC_REGNUM))]
4730 "&& reload_completed"
4731 [(set (match_operand:DI 0 "register_operand")
4734 (match_operand:DI 1 "register_operand")
4735 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4738 /* If we are in the general purpose register file,
4739 we split to a sequence of comparison and store. */
4740 if (GP_REGNUM_P (REGNO (operands[0]))
4741 && GP_REGNUM_P (REGNO (operands[1])))
4743 machine_mode mode = CCmode;
4744 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4745 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4746 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4749 /* Otherwise, we expand to a similar pattern which does not
4750 clobber CC_REGNUM. */
4752 [(set_attr "type" "neon_compare,multiple")]
4755 (define_insn "*aarch64_cm<optab>di"
4756 [(set (match_operand:DI 0 "register_operand" "=w")
4759 (match_operand:DI 1 "register_operand" "w")
4760 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4762 "TARGET_SIMD && reload_completed"
4763 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4764 [(set_attr "type" "neon_compare")]
4769 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4770 ;; we don't have any insns using ne, and aarch64_vcond outputs
4771 ;; not (neg (eq (and x y) 0))
4772 ;; which is rewritten by simplify_rtx as
4773 ;; plus (eq (and x y) 0) -1.
4775 (define_insn "aarch64_cmtst<mode>"
4776 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4780 (match_operand:VDQ_I 1 "register_operand" "w")
4781 (match_operand:VDQ_I 2 "register_operand" "w"))
4782 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4783 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4786 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4787 [(set_attr "type" "neon_tst<q>")]
4790 (define_insn_and_split "aarch64_cmtstdi"
4791 [(set (match_operand:DI 0 "register_operand" "=w,r")
4795 (match_operand:DI 1 "register_operand" "w,r")
4796 (match_operand:DI 2 "register_operand" "w,r"))
4798 (clobber (reg:CC CC_REGNUM))]
4801 "&& reload_completed"
4802 [(set (match_operand:DI 0 "register_operand")
4806 (match_operand:DI 1 "register_operand")
4807 (match_operand:DI 2 "register_operand"))
4810 /* If we are in the general purpose register file,
4811 we split to a sequence of comparison and store. */
4812 if (GP_REGNUM_P (REGNO (operands[0]))
4813 && GP_REGNUM_P (REGNO (operands[1])))
4815 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4816 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4817 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4818 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4819 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4822 /* Otherwise, we expand to a similar pattern which does not
4823 clobber CC_REGNUM. */
4825 [(set_attr "type" "neon_tst,multiple")]
4828 (define_insn "*aarch64_cmtstdi"
4829 [(set (match_operand:DI 0 "register_operand" "=w")
4833 (match_operand:DI 1 "register_operand" "w")
4834 (match_operand:DI 2 "register_operand" "w"))
4837 "cmtst\t%d0, %d1, %d2"
4838 [(set_attr "type" "neon_tst")]
4841 ;; fcm(eq|ge|gt|le|lt)
4843 (define_insn "aarch64_cm<optab><mode>"
4844 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4846 (COMPARISONS:<V_INT_EQUIV>
4847 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4848 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4852 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4853 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4854 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4858 ;; Note we can also handle what would be fac(le|lt) by
4859 ;; generating fac(ge|gt).
4861 (define_insn "aarch64_fac<optab><mode>"
4862 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4864 (FAC_COMPARISONS:<V_INT_EQUIV>
4866 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4868 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4871 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4872 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4877 (define_insn "aarch64_addp<mode>"
4878 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4880 [(match_operand:VD_BHSI 1 "register_operand" "w")
4881 (match_operand:VD_BHSI 2 "register_operand" "w")]
4884 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4885 [(set_attr "type" "neon_reduc_add<q>")]
4888 (define_insn "aarch64_addpdi"
4889 [(set (match_operand:DI 0 "register_operand" "=w")
4891 [(match_operand:V2DI 1 "register_operand" "w")]
4895 [(set_attr "type" "neon_reduc_add")]
4900 (define_expand "sqrt<mode>2"
4901 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4902 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4905 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4909 (define_insn "*sqrt<mode>2"
4910 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4911 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4913 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4914 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4917 ;; Patterns for vector struct loads and stores.
4919 (define_insn "aarch64_simd_ld2<mode>"
4920 [(set (match_operand:OI 0 "register_operand" "=w")
4921 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4922 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4925 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4926 [(set_attr "type" "neon_load2_2reg<q>")]
4929 (define_insn "aarch64_simd_ld2r<mode>"
4930 [(set (match_operand:OI 0 "register_operand" "=w")
4931 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4932 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4935 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4936 [(set_attr "type" "neon_load2_all_lanes<q>")]
4939 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4940 [(set (match_operand:OI 0 "register_operand" "=w")
4941 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4942 (match_operand:OI 2 "register_operand" "0")
4943 (match_operand:SI 3 "immediate_operand" "i")
4944 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4948 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4949 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4951 [(set_attr "type" "neon_load2_one_lane")]
4954 (define_expand "vec_load_lanesoi<mode>"
4955 [(set (match_operand:OI 0 "register_operand" "=w")
4956 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4957 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4961 if (BYTES_BIG_ENDIAN)
4963 rtx tmp = gen_reg_rtx (OImode);
4964 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4965 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4966 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4969 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4973 (define_insn "aarch64_simd_st2<mode>"
4974 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4975 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4976 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4979 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4980 [(set_attr "type" "neon_store2_2reg<q>")]
4983 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4984 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4985 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4986 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4987 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4988 (match_operand:SI 2 "immediate_operand" "i")]
4992 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4993 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4995 [(set_attr "type" "neon_store2_one_lane<q>")]
4998 (define_expand "vec_store_lanesoi<mode>"
4999 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5000 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
5001 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5005 if (BYTES_BIG_ENDIAN)
5007 rtx tmp = gen_reg_rtx (OImode);
5008 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5009 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
5010 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
5013 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
5017 (define_insn "aarch64_simd_ld3<mode>"
5018 [(set (match_operand:CI 0 "register_operand" "=w")
5019 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5020 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5023 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5024 [(set_attr "type" "neon_load3_3reg<q>")]
5027 (define_insn "aarch64_simd_ld3r<mode>"
5028 [(set (match_operand:CI 0 "register_operand" "=w")
5029 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5030 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5033 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5034 [(set_attr "type" "neon_load3_all_lanes<q>")]
5037 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
5038 [(set (match_operand:CI 0 "register_operand" "=w")
5039 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5040 (match_operand:CI 2 "register_operand" "0")
5041 (match_operand:SI 3 "immediate_operand" "i")
5042 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5046 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5047 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
5049 [(set_attr "type" "neon_load3_one_lane")]
5052 (define_expand "vec_load_lanesci<mode>"
5053 [(set (match_operand:CI 0 "register_operand" "=w")
5054 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5055 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5059 if (BYTES_BIG_ENDIAN)
5061 rtx tmp = gen_reg_rtx (CImode);
5062 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5063 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
5064 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
5067 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
5071 (define_insn "aarch64_simd_st3<mode>"
5072 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5073 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
5074 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5077 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5078 [(set_attr "type" "neon_store3_3reg<q>")]
5081 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5082 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
5083 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5084 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5085 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5086 (match_operand:SI 2 "immediate_operand" "i")]
5090 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5091 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
5093 [(set_attr "type" "neon_store3_one_lane<q>")]
5096 (define_expand "vec_store_lanesci<mode>"
5097 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5098 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
5099 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5103 if (BYTES_BIG_ENDIAN)
5105 rtx tmp = gen_reg_rtx (CImode);
5106 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5107 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
5108 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
5111 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
5115 (define_insn "aarch64_simd_ld4<mode>"
5116 [(set (match_operand:XI 0 "register_operand" "=w")
5117 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5118 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5121 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5122 [(set_attr "type" "neon_load4_4reg<q>")]
5125 (define_insn "aarch64_simd_ld4r<mode>"
5126 [(set (match_operand:XI 0 "register_operand" "=w")
5127 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5128 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5131 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5132 [(set_attr "type" "neon_load4_all_lanes<q>")]
5135 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
5136 [(set (match_operand:XI 0 "register_operand" "=w")
5137 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5138 (match_operand:XI 2 "register_operand" "0")
5139 (match_operand:SI 3 "immediate_operand" "i")
5140 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5144 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5145 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
5147 [(set_attr "type" "neon_load4_one_lane")]
5150 (define_expand "vec_load_lanesxi<mode>"
5151 [(set (match_operand:XI 0 "register_operand" "=w")
5152 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5153 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5157 if (BYTES_BIG_ENDIAN)
5159 rtx tmp = gen_reg_rtx (XImode);
5160 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5161 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
5162 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
5165 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
5169 (define_insn "aarch64_simd_st4<mode>"
5170 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5171 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5172 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5175 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5176 [(set_attr "type" "neon_store4_4reg<q>")]
5179 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5180 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
5181 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5182 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5183 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5184 (match_operand:SI 2 "immediate_operand" "i")]
5188 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5189 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5191 [(set_attr "type" "neon_store4_one_lane<q>")]
5194 (define_expand "vec_store_lanesxi<mode>"
5195 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5196 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5197 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5201 if (BYTES_BIG_ENDIAN)
5203 rtx tmp = gen_reg_rtx (XImode);
5204 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5205 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5206 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5209 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5213 (define_insn_and_split "aarch64_rev_reglist<mode>"
5214 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5216 [(match_operand:VSTRUCT 1 "register_operand" "w")
5217 (match_operand:V16QI 2 "register_operand" "w")]
5218 UNSPEC_REV_REGLIST))]
5221 "&& reload_completed"
5225 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5226 for (i = 0; i < nregs; i++)
5228 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5229 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5230 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5234 [(set_attr "type" "neon_tbl1_q")
5235 (set_attr "length" "<insn_count>")]
5238 ;; Reload patterns for AdvSIMD register list operands.
5240 (define_expand "mov<mode>"
5241 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
5242 (match_operand:VSTRUCT 1 "general_operand" ""))]
5245 if (can_create_pseudo_p ())
5247 if (GET_CODE (operands[0]) != REG)
5248 operands[1] = force_reg (<MODE>mode, operands[1]);
5253 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5254 [(match_operand:CI 0 "register_operand" "=w")
5255 (match_operand:DI 1 "register_operand" "r")
5256 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5259 rtx mem = gen_rtx_MEM (CImode, operands[1]);
5260 emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5264 (define_insn "aarch64_ld1_x3_<mode>"
5265 [(set (match_operand:CI 0 "register_operand" "=w")
5267 [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5268 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5270 "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5271 [(set_attr "type" "neon_load1_3reg<q>")]
5274 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5275 [(match_operand:DI 0 "register_operand" "")
5276 (match_operand:OI 1 "register_operand" "")
5277 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5280 rtx mem = gen_rtx_MEM (OImode, operands[0]);
5281 emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5285 (define_insn "aarch64_st1_x2_<mode>"
5286 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5288 [(match_operand:OI 1 "register_operand" "w")
5289 (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5291 "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5292 [(set_attr "type" "neon_store1_2reg<q>")]
5295 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5296 [(match_operand:DI 0 "register_operand" "")
5297 (match_operand:CI 1 "register_operand" "")
5298 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5301 rtx mem = gen_rtx_MEM (CImode, operands[0]);
5302 emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5306 (define_insn "aarch64_st1_x3_<mode>"
5307 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5309 [(match_operand:CI 1 "register_operand" "w")
5310 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5312 "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5313 [(set_attr "type" "neon_store1_3reg<q>")]
5316 (define_insn "*aarch64_mov<mode>"
5317 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5318 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5319 "TARGET_SIMD && !BYTES_BIG_ENDIAN
5320 && (register_operand (operands[0], <MODE>mode)
5321 || register_operand (operands[1], <MODE>mode))"
5324 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5325 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5326 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5327 neon_load<nregs>_<nregs>reg_q")
5328 (set_attr "length" "<insn_count>,4,4")]
5331 (define_insn "aarch64_be_ld1<mode>"
5332 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
5333 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5334 "aarch64_simd_struct_operand" "Utv")]
5337 "ld1\\t{%0<Vmtype>}, %1"
5338 [(set_attr "type" "neon_load1_1reg<q>")]
5341 (define_insn "aarch64_be_st1<mode>"
5342 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5343 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5346 "st1\\t{%1<Vmtype>}, %0"
5347 [(set_attr "type" "neon_store1_1reg<q>")]
5350 (define_insn "*aarch64_be_movoi"
5351 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5352 (match_operand:OI 1 "general_operand" " w,w,m"))]
5353 "TARGET_SIMD && BYTES_BIG_ENDIAN
5354 && (register_operand (operands[0], OImode)
5355 || register_operand (operands[1], OImode))"
5360 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5361 (set_attr "length" "8,4,4")]
5364 (define_insn "*aarch64_be_movci"
5365 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5366 (match_operand:CI 1 "general_operand" " w,w,o"))]
5367 "TARGET_SIMD && BYTES_BIG_ENDIAN
5368 && (register_operand (operands[0], CImode)
5369 || register_operand (operands[1], CImode))"
5371 [(set_attr "type" "multiple")
5372 (set_attr "length" "12,4,4")]
5375 (define_insn "*aarch64_be_movxi"
5376 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5377 (match_operand:XI 1 "general_operand" " w,w,o"))]
5378 "TARGET_SIMD && BYTES_BIG_ENDIAN
5379 && (register_operand (operands[0], XImode)
5380 || register_operand (operands[1], XImode))"
5382 [(set_attr "type" "multiple")
5383 (set_attr "length" "16,4,4")]
5387 [(set (match_operand:OI 0 "register_operand")
5388 (match_operand:OI 1 "register_operand"))]
5389 "TARGET_SIMD && reload_completed"
5392 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5397 [(set (match_operand:CI 0 "nonimmediate_operand")
5398 (match_operand:CI 1 "general_operand"))]
5399 "TARGET_SIMD && reload_completed"
5402 if (register_operand (operands[0], CImode)
5403 && register_operand (operands[1], CImode))
5405 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5408 else if (BYTES_BIG_ENDIAN)
5410 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5411 simplify_gen_subreg (OImode, operands[1], CImode, 0));
5412 emit_move_insn (gen_lowpart (V16QImode,
5413 simplify_gen_subreg (TImode, operands[0],
5415 gen_lowpart (V16QImode,
5416 simplify_gen_subreg (TImode, operands[1],
5425 [(set (match_operand:XI 0 "nonimmediate_operand")
5426 (match_operand:XI 1 "general_operand"))]
5427 "TARGET_SIMD && reload_completed"
5430 if (register_operand (operands[0], XImode)
5431 && register_operand (operands[1], XImode))
5433 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5436 else if (BYTES_BIG_ENDIAN)
5438 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5439 simplify_gen_subreg (OImode, operands[1], XImode, 0));
5440 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5441 simplify_gen_subreg (OImode, operands[1], XImode, 32));
5448 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5449 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5450 (match_operand:DI 1 "register_operand" "w")
5451 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5454 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5455 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5458 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5463 (define_insn "aarch64_ld2<mode>_dreg"
5464 [(set (match_operand:OI 0 "register_operand" "=w")
5465 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5466 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5469 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5470 [(set_attr "type" "neon_load2_2reg<q>")]
5473 (define_insn "aarch64_ld2<mode>_dreg"
5474 [(set (match_operand:OI 0 "register_operand" "=w")
5475 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5476 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5479 "ld1\\t{%S0.1d - %T0.1d}, %1"
5480 [(set_attr "type" "neon_load1_2reg<q>")]
5483 (define_insn "aarch64_ld3<mode>_dreg"
5484 [(set (match_operand:CI 0 "register_operand" "=w")
5485 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5486 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5489 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5490 [(set_attr "type" "neon_load3_3reg<q>")]
5493 (define_insn "aarch64_ld3<mode>_dreg"
5494 [(set (match_operand:CI 0 "register_operand" "=w")
5495 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5496 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5499 "ld1\\t{%S0.1d - %U0.1d}, %1"
5500 [(set_attr "type" "neon_load1_3reg<q>")]
5503 (define_insn "aarch64_ld4<mode>_dreg"
5504 [(set (match_operand:XI 0 "register_operand" "=w")
5505 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5506 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5509 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5510 [(set_attr "type" "neon_load4_4reg<q>")]
5513 (define_insn "aarch64_ld4<mode>_dreg"
5514 [(set (match_operand:XI 0 "register_operand" "=w")
5515 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5516 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5519 "ld1\\t{%S0.1d - %V0.1d}, %1"
5520 [(set_attr "type" "neon_load1_4reg<q>")]
5523 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5524 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5525 (match_operand:DI 1 "register_operand" "r")
5526 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5529 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5530 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5532 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5536 (define_expand "aarch64_ld1<VALL_F16:mode>"
5537 [(match_operand:VALL_F16 0 "register_operand")
5538 (match_operand:DI 1 "register_operand")]
5541 machine_mode mode = <VALL_F16:MODE>mode;
5542 rtx mem = gen_rtx_MEM (mode, operands[1]);
5544 if (BYTES_BIG_ENDIAN)
5545 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5547 emit_move_insn (operands[0], mem);
5551 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5552 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5553 (match_operand:DI 1 "register_operand" "r")
5554 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5557 machine_mode mode = <VSTRUCT:MODE>mode;
5558 rtx mem = gen_rtx_MEM (mode, operands[1]);
5560 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5564 (define_expand "aarch64_ld1x2<VQ:mode>"
5565 [(match_operand:OI 0 "register_operand" "=w")
5566 (match_operand:DI 1 "register_operand" "r")
5567 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5570 machine_mode mode = OImode;
5571 rtx mem = gen_rtx_MEM (mode, operands[1]);
5573 emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5577 (define_expand "aarch64_ld1x2<VDC:mode>"
5578 [(match_operand:OI 0 "register_operand" "=w")
5579 (match_operand:DI 1 "register_operand" "r")
5580 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5583 machine_mode mode = OImode;
5584 rtx mem = gen_rtx_MEM (mode, operands[1]);
5586 emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5591 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5592 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5593 (match_operand:DI 1 "register_operand" "w")
5594 (match_operand:VSTRUCT 2 "register_operand" "0")
5595 (match_operand:SI 3 "immediate_operand" "i")
5596 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5599 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5600 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5603 aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5604 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5605 operands[0], mem, operands[2], operands[3]));
5609 ;; Expanders for builtins to extract vector registers from large
5610 ;; opaque integer modes.
5614 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5615 [(match_operand:VDC 0 "register_operand" "=w")
5616 (match_operand:VSTRUCT 1 "register_operand" "w")
5617 (match_operand:SI 2 "immediate_operand" "i")]
5620 int part = INTVAL (operands[2]);
5621 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5622 int offset = part * 16;
5624 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5625 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5631 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5632 [(match_operand:VQ 0 "register_operand" "=w")
5633 (match_operand:VSTRUCT 1 "register_operand" "w")
5634 (match_operand:SI 2 "immediate_operand" "i")]
5637 int part = INTVAL (operands[2]);
5638 int offset = part * 16;
5640 emit_move_insn (operands[0],
5641 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5645 ;; Permuted-store expanders for neon intrinsics.
5647 ;; Permute instructions
5651 (define_expand "vec_perm<mode>"
5652 [(match_operand:VB 0 "register_operand")
5653 (match_operand:VB 1 "register_operand")
5654 (match_operand:VB 2 "register_operand")
5655 (match_operand:VB 3 "register_operand")]
5658 aarch64_expand_vec_perm (operands[0], operands[1],
5659 operands[2], operands[3], <nunits>);
5663 (define_insn "aarch64_tbl1<mode>"
5664 [(set (match_operand:VB 0 "register_operand" "=w")
5665 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5666 (match_operand:VB 2 "register_operand" "w")]
5669 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5670 [(set_attr "type" "neon_tbl1<q>")]
5673 ;; Two source registers.
5675 (define_insn "aarch64_tbl2v16qi"
5676 [(set (match_operand:V16QI 0 "register_operand" "=w")
5677 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5678 (match_operand:V16QI 2 "register_operand" "w")]
5681 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5682 [(set_attr "type" "neon_tbl2_q")]
5685 (define_insn "aarch64_tbl3<mode>"
5686 [(set (match_operand:VB 0 "register_operand" "=w")
5687 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5688 (match_operand:VB 2 "register_operand" "w")]
5691 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5692 [(set_attr "type" "neon_tbl3")]
5695 (define_insn "aarch64_tbx4<mode>"
5696 [(set (match_operand:VB 0 "register_operand" "=w")
5697 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5698 (match_operand:OI 2 "register_operand" "w")
5699 (match_operand:VB 3 "register_operand" "w")]
5702 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5703 [(set_attr "type" "neon_tbl4")]
5706 ;; Three source registers.
5708 (define_insn "aarch64_qtbl3<mode>"
5709 [(set (match_operand:VB 0 "register_operand" "=w")
5710 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5711 (match_operand:VB 2 "register_operand" "w")]
5714 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5715 [(set_attr "type" "neon_tbl3")]
5718 (define_insn "aarch64_qtbx3<mode>"
5719 [(set (match_operand:VB 0 "register_operand" "=w")
5720 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5721 (match_operand:CI 2 "register_operand" "w")
5722 (match_operand:VB 3 "register_operand" "w")]
5725 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5726 [(set_attr "type" "neon_tbl3")]
5729 ;; Four source registers.
5731 (define_insn "aarch64_qtbl4<mode>"
5732 [(set (match_operand:VB 0 "register_operand" "=w")
5733 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5734 (match_operand:VB 2 "register_operand" "w")]
5737 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5738 [(set_attr "type" "neon_tbl4")]
5741 (define_insn "aarch64_qtbx4<mode>"
5742 [(set (match_operand:VB 0 "register_operand" "=w")
5743 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5744 (match_operand:XI 2 "register_operand" "w")
5745 (match_operand:VB 3 "register_operand" "w")]
5748 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5749 [(set_attr "type" "neon_tbl4")]
5752 (define_insn_and_split "aarch64_combinev16qi"
5753 [(set (match_operand:OI 0 "register_operand" "=w")
5754 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5755 (match_operand:V16QI 2 "register_operand" "w")]
5759 "&& reload_completed"
5762 aarch64_split_combinev16qi (operands);
5765 [(set_attr "type" "multiple")]
5768 ;; This instruction's pattern is generated directly by
5769 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5770 ;; need corresponding changes there.
5771 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5772 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5773 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5774 (match_operand:VALL_F16 2 "register_operand" "w")]
5777 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5778 [(set_attr "type" "neon_permute<q>")]
5781 ;; This instruction's pattern is generated directly by
5782 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5783 ;; need corresponding changes there. Note that the immediate (third)
5784 ;; operand is a lane index not a byte index.
5785 (define_insn "aarch64_ext<mode>"
5786 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5787 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5788 (match_operand:VALL_F16 2 "register_operand" "w")
5789 (match_operand:SI 3 "immediate_operand" "i")]
5793 operands[3] = GEN_INT (INTVAL (operands[3])
5794 * GET_MODE_UNIT_SIZE (<MODE>mode));
5795 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5797 [(set_attr "type" "neon_ext<q>")]
5800 ;; This instruction's pattern is generated directly by
5801 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5802 ;; need corresponding changes there.
5803 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5804 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5805 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5808 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5809 [(set_attr "type" "neon_rev<q>")]
5812 (define_insn "aarch64_st2<mode>_dreg"
5813 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5814 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5815 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5818 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5819 [(set_attr "type" "neon_store2_2reg")]
5822 (define_insn "aarch64_st2<mode>_dreg"
5823 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5824 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5825 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5828 "st1\\t{%S1.1d - %T1.1d}, %0"
5829 [(set_attr "type" "neon_store1_2reg")]
5832 (define_insn "aarch64_st3<mode>_dreg"
5833 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5834 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5835 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5838 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5839 [(set_attr "type" "neon_store3_3reg")]
5842 (define_insn "aarch64_st3<mode>_dreg"
5843 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5844 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5845 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5848 "st1\\t{%S1.1d - %U1.1d}, %0"
5849 [(set_attr "type" "neon_store1_3reg")]
5852 (define_insn "aarch64_st4<mode>_dreg"
5853 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5854 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5855 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5858 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5859 [(set_attr "type" "neon_store4_4reg")]
5862 (define_insn "aarch64_st4<mode>_dreg"
5863 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5864 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5865 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5868 "st1\\t{%S1.1d - %V1.1d}, %0"
5869 [(set_attr "type" "neon_store1_4reg")]
5872 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5873 [(match_operand:DI 0 "register_operand" "r")
5874 (match_operand:VSTRUCT 1 "register_operand" "w")
5875 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5878 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5879 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5881 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5885 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5886 [(match_operand:DI 0 "register_operand" "r")
5887 (match_operand:VSTRUCT 1 "register_operand" "w")
5888 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5891 machine_mode mode = <VSTRUCT:MODE>mode;
5892 rtx mem = gen_rtx_MEM (mode, operands[0]);
5894 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5898 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5899 [(match_operand:DI 0 "register_operand" "r")
5900 (match_operand:VSTRUCT 1 "register_operand" "w")
5901 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5902 (match_operand:SI 2 "immediate_operand")]
5905 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5906 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5909 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5910 mem, operands[1], operands[2]));
5914 (define_expand "aarch64_st1<VALL_F16:mode>"
5915 [(match_operand:DI 0 "register_operand")
5916 (match_operand:VALL_F16 1 "register_operand")]
5919 machine_mode mode = <VALL_F16:MODE>mode;
5920 rtx mem = gen_rtx_MEM (mode, operands[0]);
5922 if (BYTES_BIG_ENDIAN)
5923 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5925 emit_move_insn (mem, operands[1]);
5929 ;; Expander for builtins to insert vector registers into large
5930 ;; opaque integer modes.
5932 ;; Q-register list. We don't need a D-reg inserter as we zero
5933 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5935 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5936 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5937 (match_operand:VSTRUCT 1 "register_operand" "0")
5938 (match_operand:VQ 2 "register_operand" "w")
5939 (match_operand:SI 3 "immediate_operand" "i")]
5942 int part = INTVAL (operands[3]);
5943 int offset = part * 16;
5945 emit_move_insn (operands[0], operands[1]);
5946 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5951 ;; Standard pattern name vec_init<mode><Vel>.
5953 (define_expand "vec_init<mode><Vel>"
5954 [(match_operand:VALL_F16 0 "register_operand" "")
5955 (match_operand 1 "" "")]
5958 aarch64_expand_vector_init (operands[0], operands[1]);
5962 (define_expand "vec_init<mode><Vhalf>"
5963 [(match_operand:VQ_NO2E 0 "register_operand" "")
5964 (match_operand 1 "" "")]
5967 aarch64_expand_vector_init (operands[0], operands[1]);
5971 (define_insn "*aarch64_simd_ld1r<mode>"
5972 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5973 (vec_duplicate:VALL_F16
5974 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5976 "ld1r\\t{%0.<Vtype>}, %1"
5977 [(set_attr "type" "neon_load1_all_lanes")]
5980 (define_insn "aarch64_simd_ld1<mode>_x2"
5981 [(set (match_operand:OI 0 "register_operand" "=w")
5982 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5983 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5986 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5987 [(set_attr "type" "neon_load1_2reg<q>")]
5990 (define_insn "aarch64_simd_ld1<mode>_x2"
5991 [(set (match_operand:OI 0 "register_operand" "=w")
5992 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5993 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5996 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5997 [(set_attr "type" "neon_load1_2reg<q>")]
6001 (define_insn "@aarch64_frecpe<mode>"
6002 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
6004 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
6007 "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
6008 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
6011 (define_insn "aarch64_frecpx<mode>"
6012 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
6013 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
6016 "frecpx\t%<s>0, %<s>1"
6017 [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
6020 (define_insn "@aarch64_frecps<mode>"
6021 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
6023 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
6024 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
6027 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6028 [(set_attr "type" "neon_fp_recps_<stype><q>")]
6031 (define_insn "aarch64_urecpe<mode>"
6032 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
6033 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
6036 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
6037 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
6039 ;; Standard pattern name vec_extract<mode><Vel>.
6041 (define_expand "vec_extract<mode><Vel>"
6042 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
6043 (match_operand:VALL_F16 1 "register_operand" "")
6044 (match_operand:SI 2 "immediate_operand" "")]
6048 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
6054 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
6055 [(set (match_operand:V16QI 0 "register_operand" "=w")
6056 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%0")
6057 (match_operand:V16QI 2 "register_operand" "w")]
6059 "TARGET_SIMD && TARGET_AES"
6060 "aes<aes_op>\\t%0.16b, %2.16b"
6061 [(set_attr "type" "crypto_aese")]
6064 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
6065 [(set (match_operand:V16QI 0 "register_operand" "=w")
6066 (unspec:V16QI [(xor:V16QI
6067 (match_operand:V16QI 1 "register_operand" "%0")
6068 (match_operand:V16QI 2 "register_operand" "w"))
6069 (match_operand:V16QI 3 "aarch64_simd_imm_zero" "")]
6071 "TARGET_SIMD && TARGET_AES"
6072 "aes<aes_op>\\t%0.16b, %2.16b"
6073 [(set_attr "type" "crypto_aese")]
6076 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
6077 [(set (match_operand:V16QI 0 "register_operand" "=w")
6078 (unspec:V16QI [(match_operand:V16QI 3 "aarch64_simd_imm_zero" "")
6079 (xor:V16QI (match_operand:V16QI 1 "register_operand" "%0")
6080 (match_operand:V16QI 2 "register_operand" "w"))]
6082 "TARGET_SIMD && TARGET_AES"
6083 "aes<aes_op>\\t%0.16b, %2.16b"
6084 [(set_attr "type" "crypto_aese")]
6087 ;; When AES/AESMC fusion is enabled we want the register allocation to
6091 ;; So prefer to tie operand 1 to operand 0 when fusing.
6093 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
6094 [(set (match_operand:V16QI 0 "register_operand" "=w,w")
6095 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
6097 "TARGET_SIMD && TARGET_AES"
6098 "aes<aesmc_op>\\t%0.16b, %1.16b"
6099 [(set_attr "type" "crypto_aesmc")
6100 (set_attr_alternative "enabled"
6101 [(if_then_else (match_test
6102 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
6103 (const_string "yes" )
6104 (const_string "no"))
6105 (const_string "yes")])]
6108 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
6109 ;; and enforce the register dependency without scheduling or register
6110 ;; allocation messing up the order or introducing moves inbetween.
6111 ;; Mash the two together during combine.
6113 (define_insn "*aarch64_crypto_aese_fused"
6114 [(set (match_operand:V16QI 0 "register_operand" "=&w")
6117 [(match_operand:V16QI 1 "register_operand" "0")
6118 (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE)
6120 "TARGET_SIMD && TARGET_AES
6121 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6122 "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
6123 [(set_attr "type" "crypto_aese")
6124 (set_attr "length" "8")]
6127 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
6128 ;; and enforce the register dependency without scheduling or register
6129 ;; allocation messing up the order or introducing moves inbetween.
6130 ;; Mash the two together during combine.
6132 (define_insn "*aarch64_crypto_aesd_fused"
6133 [(set (match_operand:V16QI 0 "register_operand" "=&w")
6136 [(match_operand:V16QI 1 "register_operand" "0")
6137 (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD)
6139 "TARGET_SIMD && TARGET_AES
6140 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6141 "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
6142 [(set_attr "type" "crypto_aese")
6143 (set_attr "length" "8")]
6148 (define_insn "aarch64_crypto_sha1hsi"
6149 [(set (match_operand:SI 0 "register_operand" "=w")
6150 (unspec:SI [(match_operand:SI 1
6151 "register_operand" "w")]
6153 "TARGET_SIMD && TARGET_SHA2"
6155 [(set_attr "type" "crypto_sha1_fast")]
6158 (define_insn "aarch64_crypto_sha1hv4si"
6159 [(set (match_operand:SI 0 "register_operand" "=w")
6160 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6161 (parallel [(const_int 0)]))]
6163 "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
6165 [(set_attr "type" "crypto_sha1_fast")]
6168 (define_insn "aarch64_be_crypto_sha1hv4si"
6169 [(set (match_operand:SI 0 "register_operand" "=w")
6170 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6171 (parallel [(const_int 3)]))]
6173 "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
6175 [(set_attr "type" "crypto_sha1_fast")]
6178 (define_insn "aarch64_crypto_sha1su1v4si"
6179 [(set (match_operand:V4SI 0 "register_operand" "=w")
6180 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6181 (match_operand:V4SI 2 "register_operand" "w")]
6183 "TARGET_SIMD && TARGET_SHA2"
6184 "sha1su1\\t%0.4s, %2.4s"
6185 [(set_attr "type" "crypto_sha1_fast")]
6188 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
6189 [(set (match_operand:V4SI 0 "register_operand" "=w")
6190 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6191 (match_operand:SI 2 "register_operand" "w")
6192 (match_operand:V4SI 3 "register_operand" "w")]
6194 "TARGET_SIMD && TARGET_SHA2"
6195 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
6196 [(set_attr "type" "crypto_sha1_slow")]
6199 (define_insn "aarch64_crypto_sha1su0v4si"
6200 [(set (match_operand:V4SI 0 "register_operand" "=w")
6201 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6202 (match_operand:V4SI 2 "register_operand" "w")
6203 (match_operand:V4SI 3 "register_operand" "w")]
6205 "TARGET_SIMD && TARGET_SHA2"
6206 "sha1su0\\t%0.4s, %2.4s, %3.4s"
6207 [(set_attr "type" "crypto_sha1_xor")]
6212 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6213 [(set (match_operand:V4SI 0 "register_operand" "=w")
6214 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6215 (match_operand:V4SI 2 "register_operand" "w")
6216 (match_operand:V4SI 3 "register_operand" "w")]
6218 "TARGET_SIMD && TARGET_SHA2"
6219 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
6220 [(set_attr "type" "crypto_sha256_slow")]
6223 (define_insn "aarch64_crypto_sha256su0v4si"
6224 [(set (match_operand:V4SI 0 "register_operand" "=w")
6225 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6226 (match_operand:V4SI 2 "register_operand" "w")]
6228 "TARGET_SIMD && TARGET_SHA2"
6229 "sha256su0\\t%0.4s, %2.4s"
6230 [(set_attr "type" "crypto_sha256_fast")]
6233 (define_insn "aarch64_crypto_sha256su1v4si"
6234 [(set (match_operand:V4SI 0 "register_operand" "=w")
6235 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6236 (match_operand:V4SI 2 "register_operand" "w")
6237 (match_operand:V4SI 3 "register_operand" "w")]
6239 "TARGET_SIMD && TARGET_SHA2"
6240 "sha256su1\\t%0.4s, %2.4s, %3.4s"
6241 [(set_attr "type" "crypto_sha256_slow")]
6246 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6247 [(set (match_operand:V2DI 0 "register_operand" "=w")
6248 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6249 (match_operand:V2DI 2 "register_operand" "w")
6250 (match_operand:V2DI 3 "register_operand" "w")]
6252 "TARGET_SIMD && TARGET_SHA3"
6253 "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6254 [(set_attr "type" "crypto_sha512")]
6257 (define_insn "aarch64_crypto_sha512su0qv2di"
6258 [(set (match_operand:V2DI 0 "register_operand" "=w")
6259 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6260 (match_operand:V2DI 2 "register_operand" "w")]
6262 "TARGET_SIMD && TARGET_SHA3"
6263 "sha512su0\\t%0.2d, %2.2d"
6264 [(set_attr "type" "crypto_sha512")]
6267 (define_insn "aarch64_crypto_sha512su1qv2di"
6268 [(set (match_operand:V2DI 0 "register_operand" "=w")
6269 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6270 (match_operand:V2DI 2 "register_operand" "w")
6271 (match_operand:V2DI 3 "register_operand" "w")]
6273 "TARGET_SIMD && TARGET_SHA3"
6274 "sha512su1\\t%0.2d, %2.2d, %3.2d"
6275 [(set_attr "type" "crypto_sha512")]
6280 (define_insn "eor3q<mode>4"
6281 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6284 (match_operand:VQ_I 2 "register_operand" "w")
6285 (match_operand:VQ_I 3 "register_operand" "w"))
6286 (match_operand:VQ_I 1 "register_operand" "w")))]
6287 "TARGET_SIMD && TARGET_SHA3"
6288 "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6289 [(set_attr "type" "crypto_sha3")]
6292 (define_insn "aarch64_rax1qv2di"
6293 [(set (match_operand:V2DI 0 "register_operand" "=w")
6296 (match_operand:V2DI 2 "register_operand" "w")
6298 (match_operand:V2DI 1 "register_operand" "w")))]
6299 "TARGET_SIMD && TARGET_SHA3"
6300 "rax1\\t%0.2d, %1.2d, %2.2d"
6301 [(set_attr "type" "crypto_sha3")]
6304 (define_insn "aarch64_xarqv2di"
6305 [(set (match_operand:V2DI 0 "register_operand" "=w")
6308 (match_operand:V2DI 1 "register_operand" "%w")
6309 (match_operand:V2DI 2 "register_operand" "w"))
6310 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6311 "TARGET_SIMD && TARGET_SHA3"
6312 "xar\\t%0.2d, %1.2d, %2.2d, %3"
6313 [(set_attr "type" "crypto_sha3")]
6316 (define_insn "bcaxq<mode>4"
6317 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6320 (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6321 (match_operand:VQ_I 2 "register_operand" "w"))
6322 (match_operand:VQ_I 1 "register_operand" "w")))]
6323 "TARGET_SIMD && TARGET_SHA3"
6324 "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6325 [(set_attr "type" "crypto_sha3")]
6330 (define_insn "aarch64_sm3ss1qv4si"
6331 [(set (match_operand:V4SI 0 "register_operand" "=w")
6332 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6333 (match_operand:V4SI 2 "register_operand" "w")
6334 (match_operand:V4SI 3 "register_operand" "w")]
6336 "TARGET_SIMD && TARGET_SM4"
6337 "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6338 [(set_attr "type" "crypto_sm3")]
6342 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6343 [(set (match_operand:V4SI 0 "register_operand" "=w")
6344 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6345 (match_operand:V4SI 2 "register_operand" "w")
6346 (match_operand:V4SI 3 "register_operand" "w")
6347 (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6349 "TARGET_SIMD && TARGET_SM4"
6350 "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6351 [(set_attr "type" "crypto_sm3")]
6354 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6355 [(set (match_operand:V4SI 0 "register_operand" "=w")
6356 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6357 (match_operand:V4SI 2 "register_operand" "w")
6358 (match_operand:V4SI 3 "register_operand" "w")]
6360 "TARGET_SIMD && TARGET_SM4"
6361 "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6362 [(set_attr "type" "crypto_sm3")]
6367 (define_insn "aarch64_sm4eqv4si"
6368 [(set (match_operand:V4SI 0 "register_operand" "=w")
6369 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6370 (match_operand:V4SI 2 "register_operand" "w")]
6372 "TARGET_SIMD && TARGET_SM4"
6373 "sm4e\\t%0.4s, %2.4s"
6374 [(set_attr "type" "crypto_sm4")]
6377 (define_insn "aarch64_sm4ekeyqv4si"
6378 [(set (match_operand:V4SI 0 "register_operand" "=w")
6379 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6380 (match_operand:V4SI 2 "register_operand" "w")]
6382 "TARGET_SIMD && TARGET_SM4"
6383 "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6384 [(set_attr "type" "crypto_sm4")]
6389 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6390 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6392 [(match_operand:VDQSF 1 "register_operand" "0")
6393 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6394 (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6398 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6399 <nunits> * 2, false);
6400 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6401 <nunits> * 2, false);
6403 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6412 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6413 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6415 [(match_operand:VDQSF 1 "register_operand" "0")
6416 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6417 (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6421 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6422 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6424 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6432 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6433 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6436 (vec_select:<VFMLA_SEL_W>
6437 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6438 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6440 (vec_select:<VFMLA_SEL_W>
6441 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6442 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6443 (match_operand:VDQSF 1 "register_operand" "0")))]
6445 "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6446 [(set_attr "type" "neon_fp_mul_s")]
6449 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6450 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6454 (vec_select:<VFMLA_SEL_W>
6455 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6456 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6458 (vec_select:<VFMLA_SEL_W>
6459 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6460 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6461 (match_operand:VDQSF 1 "register_operand" "0")))]
6463 "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6464 [(set_attr "type" "neon_fp_mul_s")]
6467 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6468 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6471 (vec_select:<VFMLA_SEL_W>
6472 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6473 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6475 (vec_select:<VFMLA_SEL_W>
6476 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6477 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6478 (match_operand:VDQSF 1 "register_operand" "0")))]
6480 "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6481 [(set_attr "type" "neon_fp_mul_s")]
6484 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6485 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6489 (vec_select:<VFMLA_SEL_W>
6490 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6491 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6493 (vec_select:<VFMLA_SEL_W>
6494 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6495 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6496 (match_operand:VDQSF 1 "register_operand" "0")))]
6498 "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6499 [(set_attr "type" "neon_fp_mul_s")]
6502 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6503 [(set (match_operand:V2SF 0 "register_operand" "")
6504 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6505 (match_operand:V4HF 2 "register_operand" "")
6506 (match_operand:V4HF 3 "register_operand" "")
6507 (match_operand:SI 4 "aarch64_imm2" "")]
6511 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6512 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6514 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6523 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6524 [(set (match_operand:V2SF 0 "register_operand" "")
6525 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6526 (match_operand:V4HF 2 "register_operand" "")
6527 (match_operand:V4HF 3 "register_operand" "")
6528 (match_operand:SI 4 "aarch64_imm2" "")]
6532 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6533 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6535 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6543 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6544 [(set (match_operand:V2SF 0 "register_operand" "=w")
6548 (match_operand:V4HF 2 "register_operand" "w")
6549 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6553 (match_operand:V4HF 3 "register_operand" "x")
6554 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6555 (match_operand:V2SF 1 "register_operand" "0")))]
6557 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6558 [(set_attr "type" "neon_fp_mul_s")]
6561 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6562 [(set (match_operand:V2SF 0 "register_operand" "=w")
6567 (match_operand:V4HF 2 "register_operand" "w")
6568 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6572 (match_operand:V4HF 3 "register_operand" "x")
6573 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6574 (match_operand:V2SF 1 "register_operand" "0")))]
6576 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6577 [(set_attr "type" "neon_fp_mul_s")]
6580 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6581 [(set (match_operand:V2SF 0 "register_operand" "=w")
6585 (match_operand:V4HF 2 "register_operand" "w")
6586 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6590 (match_operand:V4HF 3 "register_operand" "x")
6591 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6592 (match_operand:V2SF 1 "register_operand" "0")))]
6594 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6595 [(set_attr "type" "neon_fp_mul_s")]
6598 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6599 [(set (match_operand:V2SF 0 "register_operand" "=w")
6604 (match_operand:V4HF 2 "register_operand" "w")
6605 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6609 (match_operand:V4HF 3 "register_operand" "x")
6610 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6611 (match_operand:V2SF 1 "register_operand" "0")))]
6613 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6614 [(set_attr "type" "neon_fp_mul_s")]
6617 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6618 [(set (match_operand:V4SF 0 "register_operand" "")
6619 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6620 (match_operand:V8HF 2 "register_operand" "")
6621 (match_operand:V8HF 3 "register_operand" "")
6622 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6626 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6627 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6629 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6637 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6638 [(set (match_operand:V4SF 0 "register_operand" "")
6639 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6640 (match_operand:V8HF 2 "register_operand" "")
6641 (match_operand:V8HF 3 "register_operand" "")
6642 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6646 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6647 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6649 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6657 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6658 [(set (match_operand:V4SF 0 "register_operand" "=w")
6662 (match_operand:V8HF 2 "register_operand" "w")
6663 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6667 (match_operand:V8HF 3 "register_operand" "x")
6668 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6669 (match_operand:V4SF 1 "register_operand" "0")))]
6671 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6672 [(set_attr "type" "neon_fp_mul_s")]
6675 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6676 [(set (match_operand:V4SF 0 "register_operand" "=w")
6681 (match_operand:V8HF 2 "register_operand" "w")
6682 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6686 (match_operand:V8HF 3 "register_operand" "x")
6687 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6688 (match_operand:V4SF 1 "register_operand" "0")))]
6690 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6691 [(set_attr "type" "neon_fp_mul_s")]
6694 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6695 [(set (match_operand:V4SF 0 "register_operand" "=w")
6699 (match_operand:V8HF 2 "register_operand" "w")
6700 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6704 (match_operand:V8HF 3 "register_operand" "x")
6705 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6706 (match_operand:V4SF 1 "register_operand" "0")))]
6708 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6709 [(set_attr "type" "neon_fp_mul_s")]
6712 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6713 [(set (match_operand:V4SF 0 "register_operand" "=w")
6718 (match_operand:V8HF 2 "register_operand" "w")
6719 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6723 (match_operand:V8HF 3 "register_operand" "x")
6724 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6725 (match_operand:V4SF 1 "register_operand" "0")))]
6727 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6728 [(set_attr "type" "neon_fp_mul_s")]
6731 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6732 [(set (match_operand:V2SF 0 "register_operand" "")
6733 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6734 (match_operand:V4HF 2 "register_operand" "")
6735 (match_operand:V8HF 3 "register_operand" "")
6736 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6740 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6741 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6743 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6752 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6753 [(set (match_operand:V2SF 0 "register_operand" "")
6754 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6755 (match_operand:V4HF 2 "register_operand" "")
6756 (match_operand:V8HF 3 "register_operand" "")
6757 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6761 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6762 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6764 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6773 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6774 [(set (match_operand:V2SF 0 "register_operand" "=w")
6778 (match_operand:V4HF 2 "register_operand" "w")
6779 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6783 (match_operand:V8HF 3 "register_operand" "x")
6784 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6785 (match_operand:V2SF 1 "register_operand" "0")))]
6787 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6788 [(set_attr "type" "neon_fp_mul_s")]
6791 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6792 [(set (match_operand:V2SF 0 "register_operand" "=w")
6797 (match_operand:V4HF 2 "register_operand" "w")
6798 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6802 (match_operand:V8HF 3 "register_operand" "x")
6803 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6804 (match_operand:V2SF 1 "register_operand" "0")))]
6806 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6807 [(set_attr "type" "neon_fp_mul_s")]
6810 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6811 [(set (match_operand:V2SF 0 "register_operand" "=w")
6815 (match_operand:V4HF 2 "register_operand" "w")
6816 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6820 (match_operand:V8HF 3 "register_operand" "x")
6821 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6822 (match_operand:V2SF 1 "register_operand" "0")))]
6824 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6825 [(set_attr "type" "neon_fp_mul_s")]
6828 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6829 [(set (match_operand:V2SF 0 "register_operand" "=w")
6834 (match_operand:V4HF 2 "register_operand" "w")
6835 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6839 (match_operand:V8HF 3 "register_operand" "x")
6840 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6841 (match_operand:V2SF 1 "register_operand" "0")))]
6843 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6844 [(set_attr "type" "neon_fp_mul_s")]
6847 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6848 [(set (match_operand:V4SF 0 "register_operand" "")
6849 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6850 (match_operand:V8HF 2 "register_operand" "")
6851 (match_operand:V4HF 3 "register_operand" "")
6852 (match_operand:SI 4 "aarch64_imm2" "")]
6856 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6857 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6859 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6867 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6868 [(set (match_operand:V4SF 0 "register_operand" "")
6869 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6870 (match_operand:V8HF 2 "register_operand" "")
6871 (match_operand:V4HF 3 "register_operand" "")
6872 (match_operand:SI 4 "aarch64_imm2" "")]
6876 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6877 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6879 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6887 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6888 [(set (match_operand:V4SF 0 "register_operand" "=w")
6892 (match_operand:V8HF 2 "register_operand" "w")
6893 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6897 (match_operand:V4HF 3 "register_operand" "x")
6898 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6899 (match_operand:V4SF 1 "register_operand" "0")))]
6901 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6902 [(set_attr "type" "neon_fp_mul_s")]
6905 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6906 [(set (match_operand:V4SF 0 "register_operand" "=w")
6911 (match_operand:V8HF 2 "register_operand" "w")
6912 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6916 (match_operand:V4HF 3 "register_operand" "x")
6917 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6918 (match_operand:V4SF 1 "register_operand" "0")))]
6920 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6921 [(set_attr "type" "neon_fp_mul_s")]
6924 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6925 [(set (match_operand:V4SF 0 "register_operand" "=w")
6929 (match_operand:V8HF 2 "register_operand" "w")
6930 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6934 (match_operand:V4HF 3 "register_operand" "x")
6935 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6936 (match_operand:V4SF 1 "register_operand" "0")))]
6938 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6939 [(set_attr "type" "neon_fp_mul_s")]
6942 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6943 [(set (match_operand:V4SF 0 "register_operand" "=w")
6948 (match_operand:V8HF 2 "register_operand" "w")
6949 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6953 (match_operand:V4HF 3 "register_operand" "x")
6954 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6955 (match_operand:V4SF 1 "register_operand" "0")))]
6957 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6958 [(set_attr "type" "neon_fp_mul_s")]
6963 (define_insn "aarch64_crypto_pmulldi"
6964 [(set (match_operand:TI 0 "register_operand" "=w")
6965 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
6966 (match_operand:DI 2 "register_operand" "w")]
6968 "TARGET_SIMD && TARGET_AES"
6969 "pmull\\t%0.1q, %1.1d, %2.1d"
6970 [(set_attr "type" "crypto_pmull")]
6973 (define_insn "aarch64_crypto_pmullv2di"
6974 [(set (match_operand:TI 0 "register_operand" "=w")
6975 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
6976 (match_operand:V2DI 2 "register_operand" "w")]
6978 "TARGET_SIMD && TARGET_AES"
6979 "pmull2\\t%0.1q, %1.2d, %2.2d"
6980 [(set_attr "type" "crypto_pmull")]