1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2019 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
23 (match_operand:VALL_F16 1 "general_operand"))]
26 /* Force the operand into a register if it is not an
27 immediate whose use can be replaced with xzr.
28 If the mode is 16 bytes wide, then we will be doing
29 a stp in DI mode, so we check the validity of that.
30 If the mode is 8 bytes wide, then we will do doing a
31 normal str, so the check need not apply. */
32 if (GET_CODE (operands[0]) == MEM
33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34 && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
35 && aarch64_mem_pair_operand (operands[0], DImode))
36 || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
37 operands[1] = force_reg (<MODE>mode, operands[1]);
41 (define_expand "movmisalign<mode>"
42 [(set (match_operand:VALL 0 "nonimmediate_operand")
43 (match_operand:VALL 1 "general_operand"))]
46 /* This pattern is not permitted to fail during expansion: if both arguments
47 are non-registers (e.g. memory := constant, which can be created by the
48 auto-vectorizer), force operand 1 into a register. */
49 if (!register_operand (operands[0], <MODE>mode)
50 && !register_operand (operands[1], <MODE>mode))
51 operands[1] = force_reg (<MODE>mode, operands[1]);
54 (define_insn "aarch64_simd_dup<mode>"
55 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
57 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
60 dup\\t%0.<Vtype>, %1.<Vetype>[0]
61 dup\\t%0.<Vtype>, %<vw>1"
62 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
65 (define_insn "aarch64_simd_dup<mode>"
66 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
67 (vec_duplicate:VDQF_F16
68 (match_operand:<VEL> 1 "register_operand" "w")))]
70 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
71 [(set_attr "type" "neon_dup<q>")]
74 (define_insn "aarch64_dup_lane<mode>"
75 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
76 (vec_duplicate:VALL_F16
78 (match_operand:VALL_F16 1 "register_operand" "w")
79 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
83 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
84 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
86 [(set_attr "type" "neon_dup<q>")]
89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
90 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
91 (vec_duplicate:VALL_F16_NO_V2Q
93 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
98 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
99 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
101 [(set_attr "type" "neon_dup<q>")]
104 (define_insn "*aarch64_simd_mov<VD:mode>"
105 [(set (match_operand:VD 0 "nonimmediate_operand"
106 "=w, m, m, w, ?r, ?w, ?r, w")
107 (match_operand:VD 1 "general_operand"
108 "m, Dz, w, w, w, r, r, Dn"))]
110 && (register_operand (operands[0], <MODE>mode)
111 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
113 switch (which_alternative)
115 case 0: return "ldr\t%d0, %1";
116 case 1: return "str\txzr, %0";
117 case 2: return "str\t%d1, %0";
118 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
119 case 4: return "umov\t%0, %1.d[0]";
120 case 5: return "fmov\t%d0, %1";
121 case 6: return "mov\t%0, %1";
123 return aarch64_output_simd_mov_immediate (operands[1], 64);
124 default: gcc_unreachable ();
127 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
128 neon_logic<q>, neon_to_gp<q>, f_mcr,\
129 mov_reg, neon_move<q>")]
132 (define_insn "*aarch64_simd_mov<VQ:mode>"
133 [(set (match_operand:VQ 0 "nonimmediate_operand"
134 "=w, Umn, m, w, ?r, ?w, ?r, w")
135 (match_operand:VQ 1 "general_operand"
136 "m, Dz, w, w, w, r, r, Dn"))]
138 && (register_operand (operands[0], <MODE>mode)
139 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
141 switch (which_alternative)
144 return "ldr\t%q0, %1";
146 return "stp\txzr, xzr, %0";
148 return "str\t%q1, %0";
150 return "mov\t%0.<Vbtype>, %1.<Vbtype>";
156 return aarch64_output_simd_mov_immediate (operands[1], 128);
161 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
162 neon_logic<q>, multiple, multiple,\
163 multiple, neon_move<q>")
164 (set_attr "length" "4,4,4,4,8,8,8,4")]
167 ;; When storing lane zero we can use the normal STR and its more permissive
170 (define_insn "aarch64_store_lane0<mode>"
171 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
172 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
173 (parallel [(match_operand 2 "const_int_operand" "n")])))]
175 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
176 "str\\t%<Vetype>1, %0"
177 [(set_attr "type" "neon_store1_1reg<q>")]
180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
181 [(set (match_operand:DREG 0 "register_operand" "=w")
182 (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
183 (set (match_operand:DREG2 2 "register_operand" "=w")
184 (match_operand:DREG2 3 "memory_operand" "m"))]
186 && rtx_equal_p (XEXP (operands[3], 0),
187 plus_constant (Pmode,
188 XEXP (operands[1], 0),
189 GET_MODE_SIZE (<DREG:MODE>mode)))"
191 [(set_attr "type" "neon_ldp")]
194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
195 [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
196 (match_operand:DREG 1 "register_operand" "w"))
197 (set (match_operand:DREG2 2 "memory_operand" "=m")
198 (match_operand:DREG2 3 "register_operand" "w"))]
200 && rtx_equal_p (XEXP (operands[2], 0),
201 plus_constant (Pmode,
202 XEXP (operands[0], 0),
203 GET_MODE_SIZE (<DREG:MODE>mode)))"
205 [(set_attr "type" "neon_stp")]
208 (define_insn "load_pair<VQ:mode><VQ2:mode>"
209 [(set (match_operand:VQ 0 "register_operand" "=w")
210 (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
211 (set (match_operand:VQ2 2 "register_operand" "=w")
212 (match_operand:VQ2 3 "memory_operand" "m"))]
214 && rtx_equal_p (XEXP (operands[3], 0),
215 plus_constant (Pmode,
216 XEXP (operands[1], 0),
217 GET_MODE_SIZE (<VQ:MODE>mode)))"
219 [(set_attr "type" "neon_ldp_q")]
222 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
223 [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
224 (match_operand:VQ 1 "register_operand" "w"))
225 (set (match_operand:VQ2 2 "memory_operand" "=m")
226 (match_operand:VQ2 3 "register_operand" "w"))]
227 "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
228 plus_constant (Pmode,
229 XEXP (operands[0], 0),
230 GET_MODE_SIZE (<VQ:MODE>mode)))"
232 [(set_attr "type" "neon_stp_q")]
237 [(set (match_operand:VQ 0 "register_operand" "")
238 (match_operand:VQ 1 "register_operand" ""))]
239 "TARGET_SIMD && reload_completed
240 && GP_REGNUM_P (REGNO (operands[0]))
241 && GP_REGNUM_P (REGNO (operands[1]))"
244 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
249 [(set (match_operand:VQ 0 "register_operand" "")
250 (match_operand:VQ 1 "register_operand" ""))]
251 "TARGET_SIMD && reload_completed
252 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
253 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
256 aarch64_split_simd_move (operands[0], operands[1]);
260 (define_expand "@aarch64_split_simd_mov<mode>"
261 [(set (match_operand:VQ 0)
262 (match_operand:VQ 1))]
265 rtx dst = operands[0];
266 rtx src = operands[1];
268 if (GP_REGNUM_P (REGNO (src)))
270 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
271 rtx src_high_part = gen_highpart (<VHALF>mode, src);
274 (gen_move_lo_quad_<mode> (dst, src_low_part));
276 (gen_move_hi_quad_<mode> (dst, src_high_part));
281 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
282 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
283 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
284 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
287 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
289 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
295 (define_insn "aarch64_simd_mov_from_<mode>low"
296 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
298 (match_operand:VQ 1 "register_operand" "w")
299 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
300 "TARGET_SIMD && reload_completed"
302 [(set_attr "type" "neon_to_gp<q>")
303 (set_attr "length" "4")
306 (define_insn "aarch64_simd_mov_from_<mode>high"
307 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
309 (match_operand:VQ 1 "register_operand" "w")
310 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
311 "TARGET_SIMD && reload_completed"
313 [(set_attr "type" "neon_to_gp<q>")
314 (set_attr "length" "4")
317 (define_insn "orn<mode>3"
318 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
319 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
320 (match_operand:VDQ_I 2 "register_operand" "w")))]
322 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
323 [(set_attr "type" "neon_logic<q>")]
326 (define_insn "bic<mode>3"
327 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
328 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
329 (match_operand:VDQ_I 2 "register_operand" "w")))]
331 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
332 [(set_attr "type" "neon_logic<q>")]
335 (define_insn "add<mode>3"
336 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
337 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
338 (match_operand:VDQ_I 2 "register_operand" "w")))]
340 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
341 [(set_attr "type" "neon_add<q>")]
344 (define_insn "sub<mode>3"
345 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
346 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
347 (match_operand:VDQ_I 2 "register_operand" "w")))]
349 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
350 [(set_attr "type" "neon_sub<q>")]
353 (define_insn "mul<mode>3"
354 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
355 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
356 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
358 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
359 [(set_attr "type" "neon_mul_<Vetype><q>")]
362 (define_insn "bswap<mode>2"
363 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
364 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
366 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
367 [(set_attr "type" "neon_rev<q>")]
370 (define_insn "aarch64_rbit<mode>"
371 [(set (match_operand:VB 0 "register_operand" "=w")
372 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
375 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
376 [(set_attr "type" "neon_rbit")]
379 (define_expand "ctz<mode>2"
380 [(set (match_operand:VS 0 "register_operand")
381 (ctz:VS (match_operand:VS 1 "register_operand")))]
384 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
385 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
387 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
388 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
393 (define_expand "xorsign<mode>3"
394 [(match_operand:VHSDF 0 "register_operand")
395 (match_operand:VHSDF 1 "register_operand")
396 (match_operand:VHSDF 2 "register_operand")]
400 machine_mode imode = <V_INT_EQUIV>mode;
401 rtx v_bitmask = gen_reg_rtx (imode);
402 rtx op1x = gen_reg_rtx (imode);
403 rtx op2x = gen_reg_rtx (imode);
405 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
406 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
408 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
410 emit_move_insn (v_bitmask,
411 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
412 HOST_WIDE_INT_M1U << bits));
414 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
415 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
416 emit_move_insn (operands[0],
417 lowpart_subreg (<MODE>mode, op1x, imode));
422 ;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
423 ;; fact that their usage need to guarantee that the source vectors are
424 ;; contiguous. It would be wrong to describe the operation without being able
425 ;; to describe the permute that is also required, but even if that is done
426 ;; the permute would have been created as a LOAD_LANES which means the values
427 ;; in the registers are in the wrong order.
428 (define_insn "aarch64_fcadd<rot><mode>"
429 [(set (match_operand:VHSDF 0 "register_operand" "=w")
430 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
431 (match_operand:VHSDF 2 "register_operand" "w")]
434 "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
435 [(set_attr "type" "neon_fcadd")]
438 (define_insn "aarch64_fcmla<rot><mode>"
439 [(set (match_operand:VHSDF 0 "register_operand" "=w")
440 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
441 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
442 (match_operand:VHSDF 3 "register_operand" "w")]
445 "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
446 [(set_attr "type" "neon_fcmla")]
450 (define_insn "aarch64_fcmla_lane<rot><mode>"
451 [(set (match_operand:VHSDF 0 "register_operand" "=w")
452 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
453 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
454 (match_operand:VHSDF 3 "register_operand" "w")
455 (match_operand:SI 4 "const_int_operand" "n")]
459 operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
460 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
462 [(set_attr "type" "neon_fcmla")]
465 (define_insn "aarch64_fcmla_laneq<rot>v4hf"
466 [(set (match_operand:V4HF 0 "register_operand" "=w")
467 (plus:V4HF (match_operand:V4HF 1 "register_operand" "0")
468 (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
469 (match_operand:V8HF 3 "register_operand" "w")
470 (match_operand:SI 4 "const_int_operand" "n")]
474 operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
475 return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
477 [(set_attr "type" "neon_fcmla")]
480 (define_insn "aarch64_fcmlaq_lane<rot><mode>"
481 [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
482 (plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0")
483 (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
484 (match_operand:<VHALF> 3 "register_operand" "w")
485 (match_operand:SI 4 "const_int_operand" "n")]
489 int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
491 = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
492 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
494 [(set_attr "type" "neon_fcmla")]
497 ;; These instructions map to the __builtins for the Dot Product operations.
498 (define_insn "aarch64_<sur>dot<vsi2qi>"
499 [(set (match_operand:VS 0 "register_operand" "=w")
500 (plus:VS (match_operand:VS 1 "register_operand" "0")
501 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
502 (match_operand:<VSI2QI> 3 "register_operand" "w")]
505 "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
506 [(set_attr "type" "neon_dot<q>")]
509 ;; These expands map to the Dot Product optab the vectorizer checks for.
510 ;; The auto-vectorizer expects a dot product builtin that also does an
511 ;; accumulation into the provided register.
512 ;; Given the following pattern
514 ;; for (i=0; i<len; i++) {
520 ;; This can be auto-vectorized to
521 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
523 ;; given enough iterations. However the vectorizer can keep unrolling the loop
524 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
525 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
528 ;; and so the vectorizer provides r, in which the result has to be accumulated.
529 (define_expand "<sur>dot_prod<vsi2qi>"
530 [(set (match_operand:VS 0 "register_operand")
531 (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
532 (match_operand:<VSI2QI> 2 "register_operand")]
534 (match_operand:VS 3 "register_operand")))]
538 gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
540 emit_insn (gen_rtx_SET (operands[0], operands[3]));
544 ;; These instructions map to the __builtins for the Dot Product
545 ;; indexed operations.
546 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
547 [(set (match_operand:VS 0 "register_operand" "=w")
548 (plus:VS (match_operand:VS 1 "register_operand" "0")
549 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
550 (match_operand:V8QI 3 "register_operand" "<h_con>")
551 (match_operand:SI 4 "immediate_operand" "i")]
555 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
556 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
558 [(set_attr "type" "neon_dot<q>")]
561 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
562 [(set (match_operand:VS 0 "register_operand" "=w")
563 (plus:VS (match_operand:VS 1 "register_operand" "0")
564 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
565 (match_operand:V16QI 3 "register_operand" "<h_con>")
566 (match_operand:SI 4 "immediate_operand" "i")]
570 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
571 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
573 [(set_attr "type" "neon_dot<q>")]
576 (define_expand "copysign<mode>3"
577 [(match_operand:VHSDF 0 "register_operand")
578 (match_operand:VHSDF 1 "register_operand")
579 (match_operand:VHSDF 2 "register_operand")]
580 "TARGET_FLOAT && TARGET_SIMD"
582 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
583 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
585 emit_move_insn (v_bitmask,
586 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
587 HOST_WIDE_INT_M1U << bits));
588 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
589 operands[2], operands[1]));
594 (define_insn "*aarch64_mul3_elt<mode>"
595 [(set (match_operand:VMUL 0 "register_operand" "=w")
599 (match_operand:VMUL 1 "register_operand" "<h_con>")
600 (parallel [(match_operand:SI 2 "immediate_operand")])))
601 (match_operand:VMUL 3 "register_operand" "w")))]
604 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
605 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
607 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
610 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
611 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
612 (mult:VMUL_CHANGE_NLANES
613 (vec_duplicate:VMUL_CHANGE_NLANES
615 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
616 (parallel [(match_operand:SI 2 "immediate_operand")])))
617 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
620 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
621 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
623 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
626 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
627 [(set (match_operand:VMUL 0 "register_operand" "=w")
630 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
631 (match_operand:VMUL 2 "register_operand" "w")))]
633 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
634 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
637 (define_insn "@aarch64_rsqrte<mode>"
638 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
639 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
642 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
643 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
645 (define_insn "@aarch64_rsqrts<mode>"
646 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
647 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
648 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
651 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
652 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
654 (define_expand "rsqrt<mode>2"
655 [(set (match_operand:VALLF 0 "register_operand")
656 (unspec:VALLF [(match_operand:VALLF 1 "register_operand")]
660 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
664 (define_insn "*aarch64_mul3_elt_to_64v2df"
665 [(set (match_operand:DF 0 "register_operand" "=w")
668 (match_operand:V2DF 1 "register_operand" "w")
669 (parallel [(match_operand:SI 2 "immediate_operand")]))
670 (match_operand:DF 3 "register_operand" "w")))]
673 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
674 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
676 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
679 (define_insn "neg<mode>2"
680 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
681 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
683 "neg\t%0.<Vtype>, %1.<Vtype>"
684 [(set_attr "type" "neon_neg<q>")]
687 (define_insn "abs<mode>2"
688 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
689 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
691 "abs\t%0.<Vtype>, %1.<Vtype>"
692 [(set_attr "type" "neon_abs<q>")]
695 ;; The intrinsic version of integer ABS must not be allowed to
696 ;; combine with any operation with an integerated ABS step, such
698 (define_insn "aarch64_abs<mode>"
699 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
701 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
704 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
705 [(set_attr "type" "neon_abs<q>")]
708 ;; It's tempting to represent SABD as ABS (MINUS op1 op2).
709 ;; This isn't accurate as ABS treats always its input as a signed value.
710 ;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
711 ;; Whereas SABD would return 192 (-64 signed) on the above example.
712 ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
713 (define_insn "aarch64_<su>abd<mode>_3"
714 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
717 (match_operand:VDQ_BHSI 1 "register_operand" "w")
718 (match_operand:VDQ_BHSI 2 "register_operand" "w"))
723 "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
724 [(set_attr "type" "neon_abd<q>")]
727 (define_insn "aarch64_<sur>abdl2<mode>_3"
728 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
729 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
730 (match_operand:VDQV_S 2 "register_operand" "w")]
733 "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
734 [(set_attr "type" "neon_abd<q>")]
737 (define_insn "aarch64_<sur>abal<mode>_4"
738 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
739 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
740 (match_operand:VDQV_S 2 "register_operand" "w")
741 (match_operand:<VDBLW> 3 "register_operand" "0")]
744 "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
745 [(set_attr "type" "neon_arith_acc<q>")]
748 (define_insn "aarch64_<sur>adalp<mode>_3"
749 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
750 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
751 (match_operand:<VDBLW> 2 "register_operand" "0")]
754 "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
755 [(set_attr "type" "neon_reduc_add<q>")]
758 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
759 ;; inputs in operands 1 and 2. The sequence also has to perform a widening
760 ;; reduction of the difference into a V4SI vector and accumulate that into
761 ;; operand 3 before copying that into the result operand 0.
762 ;; Perform that with a sequence of:
763 ;; UABDL2 tmp.8h, op1.16b, op2.16b
764 ;; UABAL tmp.8h, op1.16b, op2.16b
765 ;; UADALP op3.4s, tmp.8h
766 ;; MOV op0, op3 // should be eliminated in later passes.
768 ;; For TARGET_DOTPROD we do:
769 ;; MOV tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
770 ;; UABD tmp2.16b, op1.16b, op2.16b
771 ;; UDOT op3.4s, tmp2.16b, tmp1.16b
772 ;; MOV op0, op3 // RA will tie the operands of UDOT appropriately.
774 ;; The signed version just uses the signed variants of the above instructions
775 ;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
778 (define_expand "<sur>sadv16qi"
779 [(use (match_operand:V4SI 0 "register_operand"))
780 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
781 (use (match_operand:V16QI 2 "register_operand"))] ABAL)
782 (use (match_operand:V4SI 3 "register_operand"))]
787 rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
788 rtx abd = gen_reg_rtx (V16QImode);
789 emit_insn (gen_aarch64_<sur>abdv16qi_3 (abd, operands[1], operands[2]));
790 emit_insn (gen_aarch64_udotv16qi (operands[0], operands[3],
794 rtx reduc = gen_reg_rtx (V8HImode);
795 emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
797 emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
798 operands[2], reduc));
799 emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
801 emit_move_insn (operands[0], operands[3]);
806 (define_insn "aba<mode>_3"
807 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
808 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
809 (match_operand:VDQ_BHSI 1 "register_operand" "w")
810 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
811 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
813 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
814 [(set_attr "type" "neon_arith_acc<q>")]
817 (define_insn "fabd<mode>3"
818 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
821 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
822 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
824 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
825 [(set_attr "type" "neon_fp_abd_<stype><q>")]
828 ;; For AND (vector, register) and BIC (vector, immediate)
829 (define_insn "and<mode>3"
830 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
831 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
832 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
835 switch (which_alternative)
838 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
840 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
846 [(set_attr "type" "neon_logic<q>")]
849 ;; For ORR (vector, register) and ORR (vector, immediate)
850 (define_insn "ior<mode>3"
851 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
852 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
853 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
856 switch (which_alternative)
859 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
861 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
867 [(set_attr "type" "neon_logic<q>")]
870 (define_insn "xor<mode>3"
871 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
872 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
873 (match_operand:VDQ_I 2 "register_operand" "w")))]
875 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
876 [(set_attr "type" "neon_logic<q>")]
879 (define_insn "one_cmpl<mode>2"
880 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
881 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
883 "not\t%0.<Vbtype>, %1.<Vbtype>"
884 [(set_attr "type" "neon_logic<q>")]
887 (define_insn "aarch64_simd_vec_set<mode>"
888 [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
890 (vec_duplicate:VALL_F16
891 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
892 (match_operand:VALL_F16 3 "register_operand" "0,0,0")
893 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
896 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
897 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
898 switch (which_alternative)
901 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
903 return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
905 return "ld1\\t{%0.<Vetype>}[%p2], %1";
910 [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
913 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
914 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
916 (vec_duplicate:VALL_F16
918 (match_operand:VALL_F16 3 "register_operand" "w")
920 [(match_operand:SI 4 "immediate_operand" "i")])))
921 (match_operand:VALL_F16 1 "register_operand" "0")
922 (match_operand:SI 2 "immediate_operand" "i")))]
925 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
926 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
927 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
929 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
931 [(set_attr "type" "neon_ins<q>")]
934 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
935 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
936 (vec_merge:VALL_F16_NO_V2Q
937 (vec_duplicate:VALL_F16_NO_V2Q
939 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
941 [(match_operand:SI 4 "immediate_operand" "i")])))
942 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
943 (match_operand:SI 2 "immediate_operand" "i")))]
946 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
947 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
948 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
949 INTVAL (operands[4]));
951 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
953 [(set_attr "type" "neon_ins<q>")]
956 (define_expand "signbit<mode>2"
957 [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
958 (use (match_operand:VDQSF 1 "register_operand"))]
961 int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
962 rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
964 operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
966 emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
971 (define_insn "aarch64_simd_lshr<mode>"
972 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
973 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
974 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
976 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
977 [(set_attr "type" "neon_shift_imm<q>")]
980 (define_insn "aarch64_simd_ashr<mode>"
981 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
982 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
983 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
985 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
986 [(set_attr "type" "neon_shift_imm<q>")]
989 (define_insn "aarch64_simd_imm_shl<mode>"
990 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
991 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
992 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
994 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
995 [(set_attr "type" "neon_shift_imm<q>")]
998 (define_insn "aarch64_simd_reg_sshl<mode>"
999 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1000 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1001 (match_operand:VDQ_I 2 "register_operand" "w")))]
1003 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1004 [(set_attr "type" "neon_shift_reg<q>")]
1007 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
1008 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1009 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1010 (match_operand:VDQ_I 2 "register_operand" "w")]
1011 UNSPEC_ASHIFT_UNSIGNED))]
1013 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1014 [(set_attr "type" "neon_shift_reg<q>")]
1017 (define_insn "aarch64_simd_reg_shl<mode>_signed"
1018 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1019 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1020 (match_operand:VDQ_I 2 "register_operand" "w")]
1021 UNSPEC_ASHIFT_SIGNED))]
1023 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1024 [(set_attr "type" "neon_shift_reg<q>")]
1027 (define_expand "ashl<mode>3"
1028 [(match_operand:VDQ_I 0 "register_operand")
1029 (match_operand:VDQ_I 1 "register_operand")
1030 (match_operand:SI 2 "general_operand")]
1033 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1036 if (CONST_INT_P (operands[2]))
1038 shift_amount = INTVAL (operands[2]);
1039 if (shift_amount >= 0 && shift_amount < bit_width)
1041 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1043 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
1050 operands[2] = force_reg (SImode, operands[2]);
1053 else if (MEM_P (operands[2]))
1055 operands[2] = force_reg (SImode, operands[2]);
1058 if (REG_P (operands[2]))
1060 rtx tmp = gen_reg_rtx (<MODE>mode);
1061 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
1062 convert_to_mode (<VEL>mode,
1065 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1074 (define_expand "lshr<mode>3"
1075 [(match_operand:VDQ_I 0 "register_operand")
1076 (match_operand:VDQ_I 1 "register_operand")
1077 (match_operand:SI 2 "general_operand")]
1080 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1083 if (CONST_INT_P (operands[2]))
1085 shift_amount = INTVAL (operands[2]);
1086 if (shift_amount > 0 && shift_amount <= bit_width)
1088 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1090 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
1096 operands[2] = force_reg (SImode, operands[2]);
1098 else if (MEM_P (operands[2]))
1100 operands[2] = force_reg (SImode, operands[2]);
1103 if (REG_P (operands[2]))
1105 rtx tmp = gen_reg_rtx (SImode);
1106 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1107 emit_insn (gen_negsi2 (tmp, operands[2]));
1108 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1109 convert_to_mode (<VEL>mode,
1111 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
1121 (define_expand "ashr<mode>3"
1122 [(match_operand:VDQ_I 0 "register_operand")
1123 (match_operand:VDQ_I 1 "register_operand")
1124 (match_operand:SI 2 "general_operand")]
1127 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1130 if (CONST_INT_P (operands[2]))
1132 shift_amount = INTVAL (operands[2]);
1133 if (shift_amount > 0 && shift_amount <= bit_width)
1135 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1137 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1143 operands[2] = force_reg (SImode, operands[2]);
1145 else if (MEM_P (operands[2]))
1147 operands[2] = force_reg (SImode, operands[2]);
1150 if (REG_P (operands[2]))
1152 rtx tmp = gen_reg_rtx (SImode);
1153 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1154 emit_insn (gen_negsi2 (tmp, operands[2]));
1155 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1156 convert_to_mode (<VEL>mode,
1158 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
1168 (define_expand "vashl<mode>3"
1169 [(match_operand:VDQ_I 0 "register_operand")
1170 (match_operand:VDQ_I 1 "register_operand")
1171 (match_operand:VDQ_I 2 "register_operand")]
1174 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1179 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1180 ;; Negating individual lanes most certainly offsets the
1181 ;; gain from vectorization.
1182 (define_expand "vashr<mode>3"
1183 [(match_operand:VDQ_BHSI 0 "register_operand")
1184 (match_operand:VDQ_BHSI 1 "register_operand")
1185 (match_operand:VDQ_BHSI 2 "register_operand")]
1188 rtx neg = gen_reg_rtx (<MODE>mode);
1189 emit (gen_neg<mode>2 (neg, operands[2]));
1190 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1196 (define_expand "aarch64_ashr_simddi"
1197 [(match_operand:DI 0 "register_operand")
1198 (match_operand:DI 1 "register_operand")
1199 (match_operand:SI 2 "aarch64_shift_imm64_di")]
1202 /* An arithmetic shift right by 64 fills the result with copies of the sign
1203 bit, just like asr by 63 - however the standard pattern does not handle
1205 if (INTVAL (operands[2]) == 64)
1206 operands[2] = GEN_INT (63);
1207 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1212 (define_expand "vlshr<mode>3"
1213 [(match_operand:VDQ_BHSI 0 "register_operand")
1214 (match_operand:VDQ_BHSI 1 "register_operand")
1215 (match_operand:VDQ_BHSI 2 "register_operand")]
1218 rtx neg = gen_reg_rtx (<MODE>mode);
1219 emit (gen_neg<mode>2 (neg, operands[2]));
1220 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1225 (define_expand "aarch64_lshr_simddi"
1226 [(match_operand:DI 0 "register_operand")
1227 (match_operand:DI 1 "register_operand")
1228 (match_operand:SI 2 "aarch64_shift_imm64_di")]
1231 if (INTVAL (operands[2]) == 64)
1232 emit_move_insn (operands[0], const0_rtx);
1234 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1239 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1240 (define_insn "vec_shr_<mode>"
1241 [(set (match_operand:VD 0 "register_operand" "=w")
1242 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1243 (match_operand:SI 2 "immediate_operand" "i")]
1247 if (BYTES_BIG_ENDIAN)
1248 return "shl %d0, %d1, %2";
1250 return "ushr %d0, %d1, %2";
1252 [(set_attr "type" "neon_shift_imm")]
1255 (define_expand "vec_set<mode>"
1256 [(match_operand:VALL_F16 0 "register_operand")
1257 (match_operand:<VEL> 1 "register_operand")
1258 (match_operand:SI 2 "immediate_operand")]
1261 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1262 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1263 GEN_INT (elem), operands[0]));
1269 (define_insn "aarch64_mla<mode>"
1270 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1271 (plus:VDQ_BHSI (mult:VDQ_BHSI
1272 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1273 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1274 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1276 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1277 [(set_attr "type" "neon_mla_<Vetype><q>")]
1280 (define_insn "*aarch64_mla_elt<mode>"
1281 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1284 (vec_duplicate:VDQHS
1286 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1287 (parallel [(match_operand:SI 2 "immediate_operand")])))
1288 (match_operand:VDQHS 3 "register_operand" "w"))
1289 (match_operand:VDQHS 4 "register_operand" "0")))]
1292 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1293 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1295 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1298 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1299 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1302 (vec_duplicate:VDQHS
1304 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1305 (parallel [(match_operand:SI 2 "immediate_operand")])))
1306 (match_operand:VDQHS 3 "register_operand" "w"))
1307 (match_operand:VDQHS 4 "register_operand" "0")))]
1310 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1311 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1313 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1316 (define_insn "*aarch64_mla_elt_merge<mode>"
1317 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1319 (mult:VDQHS (vec_duplicate:VDQHS
1320 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1321 (match_operand:VDQHS 2 "register_operand" "w"))
1322 (match_operand:VDQHS 3 "register_operand" "0")))]
1324 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1325 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1328 (define_insn "aarch64_mls<mode>"
1329 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1330 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1331 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1332 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1334 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1335 [(set_attr "type" "neon_mla_<Vetype><q>")]
1338 (define_insn "*aarch64_mls_elt<mode>"
1339 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1341 (match_operand:VDQHS 4 "register_operand" "0")
1343 (vec_duplicate:VDQHS
1345 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1346 (parallel [(match_operand:SI 2 "immediate_operand")])))
1347 (match_operand:VDQHS 3 "register_operand" "w"))))]
1350 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1351 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1353 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1356 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1357 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1359 (match_operand:VDQHS 4 "register_operand" "0")
1361 (vec_duplicate:VDQHS
1363 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1364 (parallel [(match_operand:SI 2 "immediate_operand")])))
1365 (match_operand:VDQHS 3 "register_operand" "w"))))]
1368 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1369 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1371 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1374 (define_insn "*aarch64_mls_elt_merge<mode>"
1375 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1377 (match_operand:VDQHS 1 "register_operand" "0")
1378 (mult:VDQHS (vec_duplicate:VDQHS
1379 (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1380 (match_operand:VDQHS 3 "register_operand" "w"))))]
1382 "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1383 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1386 ;; Max/Min operations.
1387 (define_insn "<su><maxmin><mode>3"
1388 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1389 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1390 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1392 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1393 [(set_attr "type" "neon_minmax<q>")]
1396 (define_expand "<su><maxmin>v2di3"
1397 [(set (match_operand:V2DI 0 "register_operand")
1398 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand")
1399 (match_operand:V2DI 2 "register_operand")))]
1402 enum rtx_code cmp_operator;
1423 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1424 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1425 operands[2], cmp_fmt, operands[1], operands[2]));
1429 ;; Pairwise Integer Max/Min operations.
1430 (define_insn "aarch64_<maxmin_uns>p<mode>"
1431 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1432 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1433 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1436 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1437 [(set_attr "type" "neon_minmax<q>")]
1440 ;; Pairwise FP Max/Min operations.
1441 (define_insn "aarch64_<maxmin_uns>p<mode>"
1442 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1443 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1444 (match_operand:VHSDF 2 "register_operand" "w")]
1447 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1448 [(set_attr "type" "neon_minmax<q>")]
1451 ;; vec_concat gives a new vector with the low elements from operand 1, and
1452 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1453 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1454 ;; What that means, is that the RTL descriptions of the below patterns
1455 ;; need to change depending on endianness.
1457 ;; Move to the low architectural bits of the register.
1458 ;; On little-endian this is { operand, zeroes }
1459 ;; On big-endian this is { zeroes, operand }
1461 (define_insn "move_lo_quad_internal_<mode>"
1462 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1464 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1465 (vec_duplicate:<VHALF> (const_int 0))))]
1466 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1471 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1472 (set_attr "length" "4")
1473 (set_attr "arch" "simd,fp,simd")]
1476 (define_insn "move_lo_quad_internal_<mode>"
1477 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1479 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1481 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1486 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1487 (set_attr "length" "4")
1488 (set_attr "arch" "simd,fp,simd")]
1491 (define_insn "move_lo_quad_internal_be_<mode>"
1492 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1494 (vec_duplicate:<VHALF> (const_int 0))
1495 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1496 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1501 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1502 (set_attr "length" "4")
1503 (set_attr "arch" "simd,fp,simd")]
1506 (define_insn "move_lo_quad_internal_be_<mode>"
1507 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1510 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1511 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1516 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1517 (set_attr "length" "4")
1518 (set_attr "arch" "simd,fp,simd")]
1521 (define_expand "move_lo_quad_<mode>"
1522 [(match_operand:VQ 0 "register_operand")
1523 (match_operand:VQ 1 "register_operand")]
1526 if (BYTES_BIG_ENDIAN)
1527 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1529 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1534 ;; Move operand1 to the high architectural bits of the register, keeping
1535 ;; the low architectural bits of operand2.
1536 ;; For little-endian this is { operand2, operand1 }
1537 ;; For big-endian this is { operand1, operand2 }
1539 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1540 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1544 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1545 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1546 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1548 ins\\t%0.d[1], %1.d[0]
1550 [(set_attr "type" "neon_ins")]
1553 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1554 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1556 (match_operand:<VHALF> 1 "register_operand" "w,r")
1559 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1560 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1562 ins\\t%0.d[1], %1.d[0]
1564 [(set_attr "type" "neon_ins")]
1567 (define_expand "move_hi_quad_<mode>"
1568 [(match_operand:VQ 0 "register_operand")
1569 (match_operand:<VHALF> 1 "register_operand")]
1572 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1573 if (BYTES_BIG_ENDIAN)
1574 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1577 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1582 ;; Narrowing operations.
1585 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1586 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1587 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1589 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1590 [(set_attr "type" "neon_shift_imm_narrow_q")]
1593 (define_expand "vec_pack_trunc_<mode>"
1594 [(match_operand:<VNARROWD> 0 "register_operand")
1595 (match_operand:VDN 1 "register_operand")
1596 (match_operand:VDN 2 "register_operand")]
1599 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1600 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1601 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1603 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1604 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1605 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1611 (define_insn "vec_pack_trunc_<mode>"
1612 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1613 (vec_concat:<VNARROWQ2>
1614 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1615 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1618 if (BYTES_BIG_ENDIAN)
1619 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1621 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1623 [(set_attr "type" "multiple")
1624 (set_attr "length" "8")]
1627 ;; Widening operations.
1629 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1630 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1631 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1632 (match_operand:VQW 1 "register_operand" "w")
1633 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1636 "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1637 [(set_attr "type" "neon_shift_imm_long")]
1640 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1641 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1642 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1643 (match_operand:VQW 1 "register_operand" "w")
1644 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1647 "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1648 [(set_attr "type" "neon_shift_imm_long")]
1651 (define_expand "vec_unpack<su>_hi_<mode>"
1652 [(match_operand:<VWIDE> 0 "register_operand")
1653 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1656 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1657 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1663 (define_expand "vec_unpack<su>_lo_<mode>"
1664 [(match_operand:<VWIDE> 0 "register_operand")
1665 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1668 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1669 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1675 ;; Widening arithmetic.
1677 (define_insn "*aarch64_<su>mlal_lo<mode>"
1678 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1681 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1682 (match_operand:VQW 2 "register_operand" "w")
1683 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1684 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1685 (match_operand:VQW 4 "register_operand" "w")
1687 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1689 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1690 [(set_attr "type" "neon_mla_<Vetype>_long")]
1693 (define_insn "*aarch64_<su>mlal_hi<mode>"
1694 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1697 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1698 (match_operand:VQW 2 "register_operand" "w")
1699 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1700 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1701 (match_operand:VQW 4 "register_operand" "w")
1703 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1705 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1706 [(set_attr "type" "neon_mla_<Vetype>_long")]
1709 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1710 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1712 (match_operand:<VWIDE> 1 "register_operand" "0")
1714 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1715 (match_operand:VQW 2 "register_operand" "w")
1716 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1717 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1718 (match_operand:VQW 4 "register_operand" "w")
1721 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1722 [(set_attr "type" "neon_mla_<Vetype>_long")]
1725 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1726 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1728 (match_operand:<VWIDE> 1 "register_operand" "0")
1730 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1731 (match_operand:VQW 2 "register_operand" "w")
1732 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1733 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1734 (match_operand:VQW 4 "register_operand" "w")
1737 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1738 [(set_attr "type" "neon_mla_<Vetype>_long")]
1741 (define_insn "*aarch64_<su>mlal<mode>"
1742 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1746 (match_operand:VD_BHSI 1 "register_operand" "w"))
1748 (match_operand:VD_BHSI 2 "register_operand" "w")))
1749 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1751 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1752 [(set_attr "type" "neon_mla_<Vetype>_long")]
1755 (define_insn "*aarch64_<su>mlsl<mode>"
1756 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1758 (match_operand:<VWIDE> 1 "register_operand" "0")
1761 (match_operand:VD_BHSI 2 "register_operand" "w"))
1763 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1765 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1766 [(set_attr "type" "neon_mla_<Vetype>_long")]
1769 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1770 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1771 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1772 (match_operand:VQW 1 "register_operand" "w")
1773 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1774 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1775 (match_operand:VQW 2 "register_operand" "w")
1778 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1779 [(set_attr "type" "neon_mul_<Vetype>_long")]
1782 (define_expand "vec_widen_<su>mult_lo_<mode>"
1783 [(match_operand:<VWIDE> 0 "register_operand")
1784 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
1785 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
1788 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1789 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1796 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1797 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1798 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1799 (match_operand:VQW 1 "register_operand" "w")
1800 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1801 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1802 (match_operand:VQW 2 "register_operand" "w")
1805 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1806 [(set_attr "type" "neon_mul_<Vetype>_long")]
1809 (define_expand "vec_widen_<su>mult_hi_<mode>"
1810 [(match_operand:<VWIDE> 0 "register_operand")
1811 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
1812 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
1815 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1816 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1824 ;; FP vector operations.
1825 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1826 ;; double-precision (64-bit) floating-point data types and arithmetic as
1827 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1828 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1830 ;; Floating-point operations can raise an exception. Vectorizing such
1831 ;; operations are safe because of reasons explained below.
1833 ;; ARMv8 permits an extension to enable trapped floating-point
1834 ;; exception handling, however this is an optional feature. In the
1835 ;; event of a floating-point exception being raised by vectorised
1837 ;; 1. If trapped floating-point exceptions are available, then a trap
1838 ;; will be taken when any lane raises an enabled exception. A trap
1839 ;; handler may determine which lane raised the exception.
1840 ;; 2. Alternatively a sticky exception flag is set in the
1841 ;; floating-point status register (FPSR). Software may explicitly
1842 ;; test the exception flags, in which case the tests will either
1843 ;; prevent vectorisation, allowing precise identification of the
1844 ;; failing operation, or if tested outside of vectorisable regions
1845 ;; then the specific operation and lane are not of interest.
1847 ;; FP arithmetic operations.
1849 (define_insn "add<mode>3"
1850 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1851 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1852 (match_operand:VHSDF 2 "register_operand" "w")))]
1854 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1855 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1858 (define_insn "sub<mode>3"
1859 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1860 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1861 (match_operand:VHSDF 2 "register_operand" "w")))]
1863 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1864 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1867 (define_insn "mul<mode>3"
1868 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1869 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1870 (match_operand:VHSDF 2 "register_operand" "w")))]
1872 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1873 [(set_attr "type" "neon_fp_mul_<stype><q>")]
1876 (define_expand "div<mode>3"
1877 [(set (match_operand:VHSDF 0 "register_operand")
1878 (div:VHSDF (match_operand:VHSDF 1 "register_operand")
1879 (match_operand:VHSDF 2 "register_operand")))]
1882 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1885 operands[1] = force_reg (<MODE>mode, operands[1]);
1888 (define_insn "*div<mode>3"
1889 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1890 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1891 (match_operand:VHSDF 2 "register_operand" "w")))]
1893 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1894 [(set_attr "type" "neon_fp_div_<stype><q>")]
1897 (define_insn "neg<mode>2"
1898 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1899 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1901 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1902 [(set_attr "type" "neon_fp_neg_<stype><q>")]
1905 (define_insn "abs<mode>2"
1906 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1907 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1909 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1910 [(set_attr "type" "neon_fp_abs_<stype><q>")]
1913 (define_insn "fma<mode>4"
1914 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1915 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1916 (match_operand:VHSDF 2 "register_operand" "w")
1917 (match_operand:VHSDF 3 "register_operand" "0")))]
1919 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1920 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1923 (define_insn "*aarch64_fma4_elt<mode>"
1924 [(set (match_operand:VDQF 0 "register_operand" "=w")
1928 (match_operand:VDQF 1 "register_operand" "<h_con>")
1929 (parallel [(match_operand:SI 2 "immediate_operand")])))
1930 (match_operand:VDQF 3 "register_operand" "w")
1931 (match_operand:VDQF 4 "register_operand" "0")))]
1934 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1935 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1937 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1940 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1941 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1943 (vec_duplicate:VDQSF
1945 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1946 (parallel [(match_operand:SI 2 "immediate_operand")])))
1947 (match_operand:VDQSF 3 "register_operand" "w")
1948 (match_operand:VDQSF 4 "register_operand" "0")))]
1951 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1952 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1954 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1957 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1958 [(set (match_operand:VMUL 0 "register_operand" "=w")
1961 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1962 (match_operand:VMUL 2 "register_operand" "w")
1963 (match_operand:VMUL 3 "register_operand" "0")))]
1965 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1966 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1969 (define_insn "*aarch64_fma4_elt_to_64v2df"
1970 [(set (match_operand:DF 0 "register_operand" "=w")
1973 (match_operand:V2DF 1 "register_operand" "w")
1974 (parallel [(match_operand:SI 2 "immediate_operand")]))
1975 (match_operand:DF 3 "register_operand" "w")
1976 (match_operand:DF 4 "register_operand" "0")))]
1979 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1980 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1982 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1985 (define_insn "fnma<mode>4"
1986 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1988 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1989 (match_operand:VHSDF 2 "register_operand" "w")
1990 (match_operand:VHSDF 3 "register_operand" "0")))]
1992 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1993 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1996 (define_insn "*aarch64_fnma4_elt<mode>"
1997 [(set (match_operand:VDQF 0 "register_operand" "=w")
2000 (match_operand:VDQF 3 "register_operand" "w"))
2003 (match_operand:VDQF 1 "register_operand" "<h_con>")
2004 (parallel [(match_operand:SI 2 "immediate_operand")])))
2005 (match_operand:VDQF 4 "register_operand" "0")))]
2008 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2009 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
2011 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2014 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
2015 [(set (match_operand:VDQSF 0 "register_operand" "=w")
2018 (match_operand:VDQSF 3 "register_operand" "w"))
2019 (vec_duplicate:VDQSF
2021 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2022 (parallel [(match_operand:SI 2 "immediate_operand")])))
2023 (match_operand:VDQSF 4 "register_operand" "0")))]
2026 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2027 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
2029 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2032 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
2033 [(set (match_operand:VMUL 0 "register_operand" "=w")
2036 (match_operand:VMUL 2 "register_operand" "w"))
2038 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2039 (match_operand:VMUL 3 "register_operand" "0")))]
2041 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2042 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2045 (define_insn "*aarch64_fnma4_elt_to_64v2df"
2046 [(set (match_operand:DF 0 "register_operand" "=w")
2049 (match_operand:V2DF 1 "register_operand" "w")
2050 (parallel [(match_operand:SI 2 "immediate_operand")]))
2052 (match_operand:DF 3 "register_operand" "w"))
2053 (match_operand:DF 4 "register_operand" "0")))]
2056 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2057 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
2059 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2062 ;; Vector versions of the floating-point frint patterns.
2063 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
2064 (define_insn "<frint_pattern><mode>2"
2065 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2066 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2069 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
2070 [(set_attr "type" "neon_fp_round_<stype><q>")]
2073 ;; Vector versions of the fcvt standard patterns.
2074 ;; Expands to lbtrunc, lround, lceil, lfloor
2075 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
2076 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2077 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2078 [(match_operand:VHSDF 1 "register_operand" "w")]
2081 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
2082 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
2085 ;; HF Scalar variants of related SIMD instructions.
2086 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
2087 [(set (match_operand:HI 0 "register_operand" "=w")
2088 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
2090 "TARGET_SIMD_F16INST"
2091 "fcvt<frint_suffix><su>\t%h0, %h1"
2092 [(set_attr "type" "neon_fp_to_int_s")]
2095 (define_insn "<optab>_trunchfhi2"
2096 [(set (match_operand:HI 0 "register_operand" "=w")
2097 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
2098 "TARGET_SIMD_F16INST"
2099 "fcvtz<su>\t%h0, %h1"
2100 [(set_attr "type" "neon_fp_to_int_s")]
2103 (define_insn "<optab>hihf2"
2104 [(set (match_operand:HF 0 "register_operand" "=w")
2105 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
2106 "TARGET_SIMD_F16INST"
2107 "<su_optab>cvtf\t%h0, %h1"
2108 [(set_attr "type" "neon_int_to_fp_s")]
2111 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
2112 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2113 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2115 (match_operand:VDQF 1 "register_operand" "w")
2116 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
2119 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
2120 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
2122 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
2124 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
2125 output_asm_insn (buf, operands);
2128 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
2131 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
2132 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2133 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2134 [(match_operand:VHSDF 1 "register_operand")]
2139 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2140 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2141 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2142 [(match_operand:VHSDF 1 "register_operand")]
2147 (define_expand "ftrunc<VHSDF:mode>2"
2148 [(set (match_operand:VHSDF 0 "register_operand")
2149 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2154 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2155 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2157 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2159 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2160 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2163 ;; Conversions between vectors of floats and doubles.
2164 ;; Contains a mix of patterns to match standard pattern names
2165 ;; and those for intrinsics.
2167 ;; Float widening operations.
2169 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2170 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2171 (float_extend:<VWIDE> (vec_select:<VHALF>
2172 (match_operand:VQ_HSF 1 "register_operand" "w")
2173 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2176 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2177 [(set_attr "type" "neon_fp_cvt_widen_s")]
2180 ;; Convert between fixed-point and floating-point (vector modes)
2182 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2183 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2184 (unspec:<VHSDF:FCVT_TARGET>
2185 [(match_operand:VHSDF 1 "register_operand" "w")
2186 (match_operand:SI 2 "immediate_operand" "i")]
2189 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2190 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2193 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2194 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2195 (unspec:<VDQ_HSDI:FCVT_TARGET>
2196 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2197 (match_operand:SI 2 "immediate_operand" "i")]
2200 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2201 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2204 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2205 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2206 ;; the meaning of HI and LO changes depending on the target endianness.
2207 ;; While elsewhere we map the higher numbered elements of a vector to
2208 ;; the lower architectural lanes of the vector, for these patterns we want
2209 ;; to always treat "hi" as referring to the higher architectural lanes.
2210 ;; Consequently, while the patterns below look inconsistent with our
2211 ;; other big-endian patterns their behavior is as required.
2213 (define_expand "vec_unpacks_lo_<mode>"
2214 [(match_operand:<VWIDE> 0 "register_operand")
2215 (match_operand:VQ_HSF 1 "register_operand")]
2218 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2219 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2225 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2226 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2227 (float_extend:<VWIDE> (vec_select:<VHALF>
2228 (match_operand:VQ_HSF 1 "register_operand" "w")
2229 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2232 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2233 [(set_attr "type" "neon_fp_cvt_widen_s")]
2236 (define_expand "vec_unpacks_hi_<mode>"
2237 [(match_operand:<VWIDE> 0 "register_operand")
2238 (match_operand:VQ_HSF 1 "register_operand")]
2241 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2242 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2247 (define_insn "aarch64_float_extend_lo_<Vwide>"
2248 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2249 (float_extend:<VWIDE>
2250 (match_operand:VDF 1 "register_operand" "w")))]
2252 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2253 [(set_attr "type" "neon_fp_cvt_widen_s")]
2256 ;; Float narrowing operations.
2258 (define_insn "aarch64_float_truncate_lo_<mode>"
2259 [(set (match_operand:VDF 0 "register_operand" "=w")
2261 (match_operand:<VWIDE> 1 "register_operand" "w")))]
2263 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2264 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2267 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2268 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2270 (match_operand:VDF 1 "register_operand" "0")
2272 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2273 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2274 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2275 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2278 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2279 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2282 (match_operand:<VWIDE> 2 "register_operand" "w"))
2283 (match_operand:VDF 1 "register_operand" "0")))]
2284 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2285 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2286 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2289 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2290 [(match_operand:<VDBL> 0 "register_operand")
2291 (match_operand:VDF 1 "register_operand")
2292 (match_operand:<VWIDE> 2 "register_operand")]
2295 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2296 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2297 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2298 emit_insn (gen (operands[0], operands[1], operands[2]));
2303 (define_expand "vec_pack_trunc_v2df"
2304 [(set (match_operand:V4SF 0 "register_operand")
2306 (float_truncate:V2SF
2307 (match_operand:V2DF 1 "register_operand"))
2308 (float_truncate:V2SF
2309 (match_operand:V2DF 2 "register_operand"))
2313 rtx tmp = gen_reg_rtx (V2SFmode);
2314 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2315 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2317 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2318 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2319 tmp, operands[hi]));
2324 (define_expand "vec_pack_trunc_df"
2325 [(set (match_operand:V2SF 0 "register_operand")
2328 (match_operand:DF 1 "register_operand"))
2330 (match_operand:DF 2 "register_operand"))
2334 rtx tmp = gen_reg_rtx (V2SFmode);
2335 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2336 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2338 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2339 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2340 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2346 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2348 ;; a = (b < c) ? b : c;
2349 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
2350 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
2353 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2354 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2355 ;; operand will be returned when both operands are zero (i.e. they may not
2356 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2357 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2360 (define_insn "<su><maxmin><mode>3"
2361 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2362 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2363 (match_operand:VHSDF 2 "register_operand" "w")))]
2365 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2366 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2369 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2370 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2371 ;; which implement the IEEE fmax ()/fmin () functions.
2372 (define_insn "<maxmin_uns><mode>3"
2373 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2374 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2375 (match_operand:VHSDF 2 "register_operand" "w")]
2378 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2379 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2382 ;; 'across lanes' add.
2384 (define_expand "reduc_plus_scal_<mode>"
2385 [(match_operand:<VEL> 0 "register_operand")
2386 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand")]
2390 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2391 rtx scratch = gen_reg_rtx (<MODE>mode);
2392 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2393 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2398 (define_insn "aarch64_faddp<mode>"
2399 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2400 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2401 (match_operand:VHSDF 2 "register_operand" "w")]
2404 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2405 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2408 (define_insn "aarch64_reduc_plus_internal<mode>"
2409 [(set (match_operand:VDQV 0 "register_operand" "=w")
2410 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2413 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2414 [(set_attr "type" "neon_reduc_add<q>")]
2417 (define_insn "aarch64_reduc_plus_internalv2si"
2418 [(set (match_operand:V2SI 0 "register_operand" "=w")
2419 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2422 "addp\\t%0.2s, %1.2s, %1.2s"
2423 [(set_attr "type" "neon_reduc_add")]
2426 (define_insn "reduc_plus_scal_<mode>"
2427 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2428 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2431 "faddp\\t%<Vetype>0, %1.<Vtype>"
2432 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2435 (define_expand "reduc_plus_scal_v4sf"
2436 [(set (match_operand:SF 0 "register_operand")
2437 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2441 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2442 rtx scratch = gen_reg_rtx (V4SFmode);
2443 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2444 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2445 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2449 (define_insn "clrsb<mode>2"
2450 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2451 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2453 "cls\\t%0.<Vtype>, %1.<Vtype>"
2454 [(set_attr "type" "neon_cls<q>")]
2457 (define_insn "clz<mode>2"
2458 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2459 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2461 "clz\\t%0.<Vtype>, %1.<Vtype>"
2462 [(set_attr "type" "neon_cls<q>")]
2465 (define_insn "popcount<mode>2"
2466 [(set (match_operand:VB 0 "register_operand" "=w")
2467 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2469 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2470 [(set_attr "type" "neon_cnt<q>")]
2473 ;; 'across lanes' max and min ops.
2475 ;; Template for outputting a scalar, so we can create __builtins which can be
2476 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
2477 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2478 [(match_operand:<VEL> 0 "register_operand")
2479 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2483 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2484 rtx scratch = gen_reg_rtx (<MODE>mode);
2485 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2487 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2492 ;; Likewise for integer cases, signed and unsigned.
2493 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2494 [(match_operand:<VEL> 0 "register_operand")
2495 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2499 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2500 rtx scratch = gen_reg_rtx (<MODE>mode);
2501 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2503 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2508 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2509 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2510 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2513 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2514 [(set_attr "type" "neon_reduc_minmax<q>")]
2517 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2518 [(set (match_operand:V2SI 0 "register_operand" "=w")
2519 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2522 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2523 [(set_attr "type" "neon_reduc_minmax")]
2526 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2527 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2528 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2531 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2532 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2535 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2537 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2540 ;; Thus our BSL is of the form:
2541 ;; op0 = bsl (mask, op2, op3)
2542 ;; We can use any of:
2545 ;; bsl mask, op1, op2
2546 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2547 ;; bit op0, op2, mask
2548 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2549 ;; bif op0, op1, mask
2551 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2552 ;; Some forms of straight-line code may generate the equivalent form
2553 ;; in *aarch64_simd_bsl<mode>_alt.
2555 (define_insn "aarch64_simd_bsl<mode>_internal"
2556 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2560 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2561 (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2562 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2563 (match_dup:<V_INT_EQUIV> 3)
2567 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2568 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2569 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2570 [(set_attr "type" "neon_bsl<q>")]
2573 ;; We need this form in addition to the above pattern to match the case
2574 ;; when combine tries merging three insns such that the second operand of
2575 ;; the outer XOR matches the second operand of the inner XOR rather than
2576 ;; the first. The two are equivalent but since recog doesn't try all
2577 ;; permutations of commutative operations, we have to have a separate pattern.
2579 (define_insn "*aarch64_simd_bsl<mode>_alt"
2580 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2584 (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2585 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2586 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2587 (match_dup:<V_INT_EQUIV> 2)))]
2590 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2591 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2592 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2593 [(set_attr "type" "neon_bsl<q>")]
2596 ;; DImode is special, we want to avoid computing operations which are
2597 ;; more naturally computed in general purpose registers in the vector
2598 ;; registers. If we do that, we need to move all three operands from general
2599 ;; purpose registers to vector registers, then back again. However, we
2600 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2601 ;; optimizations based on the component operations of a BSL.
2603 ;; That means we need a splitter back to the individual operations, if they
2604 ;; would be better calculated on the integer side.
2606 (define_insn_and_split "aarch64_simd_bsldi_internal"
2607 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2611 (match_operand:DI 3 "register_operand" "w,0,w,r")
2612 (match_operand:DI 2 "register_operand" "w,w,0,r"))
2613 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2618 bsl\\t%0.8b, %2.8b, %3.8b
2619 bit\\t%0.8b, %2.8b, %1.8b
2620 bif\\t%0.8b, %3.8b, %1.8b
2622 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2623 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2625 /* Split back to individual operations. If we're before reload, and
2626 able to create a temporary register, do so. If we're after reload,
2627 we've got an early-clobber destination register, so use that.
2628 Otherwise, we can't create pseudos and we can't yet guarantee that
2629 operands[0] is safe to write, so FAIL to split. */
2632 if (reload_completed)
2633 scratch = operands[0];
2634 else if (can_create_pseudo_p ())
2635 scratch = gen_reg_rtx (DImode);
2639 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2640 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2641 emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2644 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2645 (set_attr "length" "4,4,4,12")]
2648 (define_insn_and_split "aarch64_simd_bsldi_alt"
2649 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2653 (match_operand:DI 3 "register_operand" "w,w,0,r")
2654 (match_operand:DI 2 "register_operand" "w,0,w,r"))
2655 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2660 bsl\\t%0.8b, %3.8b, %2.8b
2661 bit\\t%0.8b, %3.8b, %1.8b
2662 bif\\t%0.8b, %2.8b, %1.8b
2664 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2665 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2667 /* Split back to individual operations. If we're before reload, and
2668 able to create a temporary register, do so. If we're after reload,
2669 we've got an early-clobber destination register, so use that.
2670 Otherwise, we can't create pseudos and we can't yet guarantee that
2671 operands[0] is safe to write, so FAIL to split. */
2674 if (reload_completed)
2675 scratch = operands[0];
2676 else if (can_create_pseudo_p ())
2677 scratch = gen_reg_rtx (DImode);
2681 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2682 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2683 emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2686 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2687 (set_attr "length" "4,4,4,12")]
2690 (define_expand "aarch64_simd_bsl<mode>"
2691 [(match_operand:VALLDIF 0 "register_operand")
2692 (match_operand:<V_INT_EQUIV> 1 "register_operand")
2693 (match_operand:VALLDIF 2 "register_operand")
2694 (match_operand:VALLDIF 3 "register_operand")]
2697 /* We can't alias operands together if they have different modes. */
2698 rtx tmp = operands[0];
2699 if (FLOAT_MODE_P (<MODE>mode))
2701 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2702 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2703 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2705 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2706 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2710 if (tmp != operands[0])
2711 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2716 (define_expand "vcond_mask_<mode><v_int_equiv>"
2717 [(match_operand:VALLDI 0 "register_operand")
2718 (match_operand:VALLDI 1 "nonmemory_operand")
2719 (match_operand:VALLDI 2 "nonmemory_operand")
2720 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2723 /* If we have (a = (P) ? -1 : 0);
2724 Then we can simply move the generated mask (result must be int). */
2725 if (operands[1] == CONSTM1_RTX (<MODE>mode)
2726 && operands[2] == CONST0_RTX (<MODE>mode))
2727 emit_move_insn (operands[0], operands[3]);
2728 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
2729 else if (operands[1] == CONST0_RTX (<MODE>mode)
2730 && operands[2] == CONSTM1_RTX (<MODE>mode))
2731 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2734 if (!REG_P (operands[1]))
2735 operands[1] = force_reg (<MODE>mode, operands[1]);
2736 if (!REG_P (operands[2]))
2737 operands[2] = force_reg (<MODE>mode, operands[2]);
2738 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2739 operands[1], operands[2]));
2745 ;; Patterns comparing two vectors to produce a mask.
2747 (define_expand "vec_cmp<mode><mode>"
2748 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2749 (match_operator 1 "comparison_operator"
2750 [(match_operand:VSDQ_I_DI 2 "register_operand")
2751 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2754 rtx mask = operands[0];
2755 enum rtx_code code = GET_CODE (operands[1]);
2765 if (operands[3] == CONST0_RTX (<MODE>mode))
2770 if (!REG_P (operands[3]))
2771 operands[3] = force_reg (<MODE>mode, operands[3]);
2779 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2783 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2787 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2791 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2795 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2799 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2803 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2807 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2811 /* Handle NE as !EQ. */
2812 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2813 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2817 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2827 (define_expand "vec_cmp<mode><v_int_equiv>"
2828 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2829 (match_operator 1 "comparison_operator"
2830 [(match_operand:VDQF 2 "register_operand")
2831 (match_operand:VDQF 3 "nonmemory_operand")]))]
2834 int use_zero_form = 0;
2835 enum rtx_code code = GET_CODE (operands[1]);
2836 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2838 rtx (*comparison) (rtx, rtx, rtx) = NULL;
2847 if (operands[3] == CONST0_RTX (<MODE>mode))
2854 if (!REG_P (operands[3]))
2855 operands[3] = force_reg (<MODE>mode, operands[3]);
2865 comparison = gen_aarch64_cmlt<mode>;
2870 std::swap (operands[2], operands[3]);
2874 comparison = gen_aarch64_cmgt<mode>;
2879 comparison = gen_aarch64_cmle<mode>;
2884 std::swap (operands[2], operands[3]);
2888 comparison = gen_aarch64_cmge<mode>;
2892 comparison = gen_aarch64_cmeq<mode>;
2910 /* All of the above must not raise any FP exceptions. Thus we first
2911 check each operand for NaNs and force any elements containing NaN to
2912 zero before using them in the compare.
2913 Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2914 (cm<cc> (isnan (a) ? 0.0 : a,
2915 isnan (b) ? 0.0 : b))
2916 We use the following transformations for doing the comparisions:
2920 a UNLT b -> b GT a. */
2922 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2923 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2924 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2925 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2926 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2927 emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2928 emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2929 lowpart_subreg (<V_INT_EQUIV>mode,
2932 emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2933 lowpart_subreg (<V_INT_EQUIV>mode,
2936 gcc_assert (comparison != NULL);
2937 emit_insn (comparison (operands[0],
2938 lowpart_subreg (<MODE>mode,
2939 tmp0, <V_INT_EQUIV>mode),
2940 lowpart_subreg (<MODE>mode,
2941 tmp1, <V_INT_EQUIV>mode)));
2942 emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2952 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2953 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2959 a NE b -> ~(a EQ b) */
2960 gcc_assert (comparison != NULL);
2961 emit_insn (comparison (operands[0], operands[2], operands[3]));
2963 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2967 /* LTGT is not guranteed to not generate a FP exception. So let's
2968 go the faster way : ((a > b) || (b > a)). */
2969 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2970 operands[2], operands[3]));
2971 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2972 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2978 /* cmeq (a, a) & cmeq (b, b). */
2979 emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2980 operands[2], operands[2]));
2981 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2982 emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2984 if (code == UNORDERED)
2985 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2986 else if (code == UNEQ)
2988 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
2989 emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
3000 (define_expand "vec_cmpu<mode><mode>"
3001 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3002 (match_operator 1 "comparison_operator"
3003 [(match_operand:VSDQ_I_DI 2 "register_operand")
3004 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
3007 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
3008 operands[2], operands[3]));
3012 (define_expand "vcond<mode><mode>"
3013 [(set (match_operand:VALLDI 0 "register_operand")
3014 (if_then_else:VALLDI
3015 (match_operator 3 "comparison_operator"
3016 [(match_operand:VALLDI 4 "register_operand")
3017 (match_operand:VALLDI 5 "nonmemory_operand")])
3018 (match_operand:VALLDI 1 "nonmemory_operand")
3019 (match_operand:VALLDI 2 "nonmemory_operand")))]
3022 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3023 enum rtx_code code = GET_CODE (operands[3]);
3025 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3026 it as well as switch operands 1/2 in order to avoid the additional
3030 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3031 operands[4], operands[5]);
3032 std::swap (operands[1], operands[2]);
3034 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3035 operands[4], operands[5]));
3036 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3037 operands[2], mask));
3042 (define_expand "vcond<v_cmp_mixed><mode>"
3043 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
3044 (if_then_else:<V_cmp_mixed>
3045 (match_operator 3 "comparison_operator"
3046 [(match_operand:VDQF_COND 4 "register_operand")
3047 (match_operand:VDQF_COND 5 "nonmemory_operand")])
3048 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
3049 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
3052 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3053 enum rtx_code code = GET_CODE (operands[3]);
3055 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3056 it as well as switch operands 1/2 in order to avoid the additional
3060 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3061 operands[4], operands[5]);
3062 std::swap (operands[1], operands[2]);
3064 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3065 operands[4], operands[5]));
3066 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
3067 operands[0], operands[1],
3068 operands[2], mask));
3073 (define_expand "vcondu<mode><mode>"
3074 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3075 (if_then_else:VSDQ_I_DI
3076 (match_operator 3 "comparison_operator"
3077 [(match_operand:VSDQ_I_DI 4 "register_operand")
3078 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
3079 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
3080 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
3083 rtx mask = gen_reg_rtx (<MODE>mode);
3084 enum rtx_code code = GET_CODE (operands[3]);
3086 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3087 it as well as switch operands 1/2 in order to avoid the additional
3091 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3092 operands[4], operands[5]);
3093 std::swap (operands[1], operands[2]);
3095 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
3096 operands[4], operands[5]));
3097 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3098 operands[2], mask));
3102 (define_expand "vcondu<mode><v_cmp_mixed>"
3103 [(set (match_operand:VDQF 0 "register_operand")
3105 (match_operator 3 "comparison_operator"
3106 [(match_operand:<V_cmp_mixed> 4 "register_operand")
3107 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
3108 (match_operand:VDQF 1 "nonmemory_operand")
3109 (match_operand:VDQF 2 "nonmemory_operand")))]
3112 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3113 enum rtx_code code = GET_CODE (operands[3]);
3115 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3116 it as well as switch operands 1/2 in order to avoid the additional
3120 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3121 operands[4], operands[5]);
3122 std::swap (operands[1], operands[2]);
3124 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
3126 operands[4], operands[5]));
3127 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3128 operands[2], mask));
3132 ;; Patterns for AArch64 SIMD Intrinsics.
3134 ;; Lane extraction with sign extension to general purpose register.
3135 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3136 [(set (match_operand:GPI 0 "register_operand" "=r")
3138 (vec_select:<VDQQH:VEL>
3139 (match_operand:VDQQH 1 "register_operand" "w")
3140 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3143 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3144 INTVAL (operands[2]));
3145 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3147 [(set_attr "type" "neon_to_gp<VDQQH:q>")]
3150 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
3151 [(set (match_operand:GPI 0 "register_operand" "=r")
3153 (vec_select:<VDQQH:VEL>
3154 (match_operand:VDQQH 1 "register_operand" "w")
3155 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3158 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3159 INTVAL (operands[2]));
3160 return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
3162 [(set_attr "type" "neon_to_gp<VDQQH:q>")]
3165 ;; Lane extraction of a value, neither sign nor zero extension
3166 ;; is guaranteed so upper bits should be considered undefined.
3167 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3168 (define_insn "aarch64_get_lane<mode>"
3169 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3171 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3172 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3175 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3176 switch (which_alternative)
3179 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3181 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3183 return "st1\\t{%1.<Vetype>}[%2], %0";
3188 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3191 (define_insn "load_pair_lanes<mode>"
3192 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3194 (match_operand:VDC 1 "memory_operand" "Utq")
3195 (match_operand:VDC 2 "memory_operand" "m")))]
3196 "TARGET_SIMD && !STRICT_ALIGNMENT
3197 && rtx_equal_p (XEXP (operands[2], 0),
3198 plus_constant (Pmode,
3199 XEXP (operands[1], 0),
3200 GET_MODE_SIZE (<MODE>mode)))"
3202 [(set_attr "type" "neon_load1_1reg_q")]
3205 (define_insn "store_pair_lanes<mode>"
3206 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
3208 (match_operand:VDC 1 "register_operand" "w, r")
3209 (match_operand:VDC 2 "register_operand" "w, r")))]
3213 stp\\t%x1, %x2, %y0"
3214 [(set_attr "type" "neon_stp, store_16")]
3217 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3220 (define_insn "@aarch64_combinez<mode>"
3221 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3223 (match_operand:VDC 1 "general_operand" "w,?r,m")
3224 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3225 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3230 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3231 (set_attr "arch" "simd,fp,simd")]
3234 (define_insn "@aarch64_combinez_be<mode>"
3235 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3237 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3238 (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3239 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3244 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3245 (set_attr "arch" "simd,fp,simd")]
3248 (define_expand "aarch64_combine<mode>"
3249 [(match_operand:<VDBL> 0 "register_operand")
3250 (match_operand:VDC 1 "register_operand")
3251 (match_operand:VDC 2 "register_operand")]
3254 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3260 (define_expand "@aarch64_simd_combine<mode>"
3261 [(match_operand:<VDBL> 0 "register_operand")
3262 (match_operand:VDC 1 "register_operand")
3263 (match_operand:VDC 2 "register_operand")]
3266 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3267 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3270 [(set_attr "type" "multiple")]
3273 ;; <su><addsub>l<q>.
3275 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3276 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3277 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3278 (match_operand:VQW 1 "register_operand" "w")
3279 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3280 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3281 (match_operand:VQW 2 "register_operand" "w")
3284 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3285 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3288 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3289 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3290 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3291 (match_operand:VQW 1 "register_operand" "w")
3292 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3293 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3294 (match_operand:VQW 2 "register_operand" "w")
3297 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3298 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3302 (define_expand "aarch64_saddl2<mode>"
3303 [(match_operand:<VWIDE> 0 "register_operand")
3304 (match_operand:VQW 1 "register_operand")
3305 (match_operand:VQW 2 "register_operand")]
3308 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3309 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3314 (define_expand "aarch64_uaddl2<mode>"
3315 [(match_operand:<VWIDE> 0 "register_operand")
3316 (match_operand:VQW 1 "register_operand")
3317 (match_operand:VQW 2 "register_operand")]
3320 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3321 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3326 (define_expand "aarch64_ssubl2<mode>"
3327 [(match_operand:<VWIDE> 0 "register_operand")
3328 (match_operand:VQW 1 "register_operand")
3329 (match_operand:VQW 2 "register_operand")]
3332 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3333 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3338 (define_expand "aarch64_usubl2<mode>"
3339 [(match_operand:<VWIDE> 0 "register_operand")
3340 (match_operand:VQW 1 "register_operand")
3341 (match_operand:VQW 2 "register_operand")]
3344 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3345 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3350 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3351 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3352 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3353 (match_operand:VD_BHSI 1 "register_operand" "w"))
3355 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3357 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3358 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3361 ;; <su><addsub>w<q>.
3363 (define_expand "widen_ssum<mode>3"
3364 [(set (match_operand:<VDBLW> 0 "register_operand")
3365 (plus:<VDBLW> (sign_extend:<VDBLW>
3366 (match_operand:VQW 1 "register_operand"))
3367 (match_operand:<VDBLW> 2 "register_operand")))]
3370 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3371 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3373 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3375 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3380 (define_expand "widen_ssum<mode>3"
3381 [(set (match_operand:<VWIDE> 0 "register_operand")
3382 (plus:<VWIDE> (sign_extend:<VWIDE>
3383 (match_operand:VD_BHSI 1 "register_operand"))
3384 (match_operand:<VWIDE> 2 "register_operand")))]
3387 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3391 (define_expand "widen_usum<mode>3"
3392 [(set (match_operand:<VDBLW> 0 "register_operand")
3393 (plus:<VDBLW> (zero_extend:<VDBLW>
3394 (match_operand:VQW 1 "register_operand"))
3395 (match_operand:<VDBLW> 2 "register_operand")))]
3398 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3399 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3401 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3403 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3408 (define_expand "widen_usum<mode>3"
3409 [(set (match_operand:<VWIDE> 0 "register_operand")
3410 (plus:<VWIDE> (zero_extend:<VWIDE>
3411 (match_operand:VD_BHSI 1 "register_operand"))
3412 (match_operand:<VWIDE> 2 "register_operand")))]
3415 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3419 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
3420 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3421 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3423 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3425 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3426 [(set_attr "type" "neon_sub_widen")]
3429 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
3430 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3431 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3434 (match_operand:VQW 2 "register_operand" "w")
3435 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3437 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3438 [(set_attr "type" "neon_sub_widen")]
3441 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
3442 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3443 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3446 (match_operand:VQW 2 "register_operand" "w")
3447 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3449 "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3450 [(set_attr "type" "neon_sub_widen")]
3453 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
3454 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3456 (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
3457 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3459 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3460 [(set_attr "type" "neon_add_widen")]
3463 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
3464 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3468 (match_operand:VQW 2 "register_operand" "w")
3469 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3470 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3472 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3473 [(set_attr "type" "neon_add_widen")]
3476 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
3477 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3481 (match_operand:VQW 2 "register_operand" "w")
3482 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3483 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3485 "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3486 [(set_attr "type" "neon_add_widen")]
3489 (define_expand "aarch64_saddw2<mode>"
3490 [(match_operand:<VWIDE> 0 "register_operand")
3491 (match_operand:<VWIDE> 1 "register_operand")
3492 (match_operand:VQW 2 "register_operand")]
3495 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3496 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3501 (define_expand "aarch64_uaddw2<mode>"
3502 [(match_operand:<VWIDE> 0 "register_operand")
3503 (match_operand:<VWIDE> 1 "register_operand")
3504 (match_operand:VQW 2 "register_operand")]
3507 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3508 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3514 (define_expand "aarch64_ssubw2<mode>"
3515 [(match_operand:<VWIDE> 0 "register_operand")
3516 (match_operand:<VWIDE> 1 "register_operand")
3517 (match_operand:VQW 2 "register_operand")]
3520 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3521 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3526 (define_expand "aarch64_usubw2<mode>"
3527 [(match_operand:<VWIDE> 0 "register_operand")
3528 (match_operand:<VWIDE> 1 "register_operand")
3529 (match_operand:VQW 2 "register_operand")]
3532 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3533 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3538 ;; <su><r>h<addsub>.
3540 (define_expand "<u>avg<mode>3_floor"
3541 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3542 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3543 (match_operand:VDQ_BHSI 2 "register_operand")]
3548 (define_expand "<u>avg<mode>3_ceil"
3549 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3550 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3551 (match_operand:VDQ_BHSI 2 "register_operand")]
3556 (define_insn "aarch64_<sur>h<addsub><mode>"
3557 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3558 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3559 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3562 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3563 [(set_attr "type" "neon_<addsub>_halve<q>")]
3566 ;; <r><addsub>hn<q>.
3568 (define_insn "aarch64_<sur><addsub>hn<mode>"
3569 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3570 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3571 (match_operand:VQN 2 "register_operand" "w")]
3574 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3575 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3578 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3579 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3580 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3581 (match_operand:VQN 2 "register_operand" "w")
3582 (match_operand:VQN 3 "register_operand" "w")]
3585 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3586 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3591 (define_insn "aarch64_pmul<mode>"
3592 [(set (match_operand:VB 0 "register_operand" "=w")
3593 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3594 (match_operand:VB 2 "register_operand" "w")]
3597 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3598 [(set_attr "type" "neon_mul_<Vetype><q>")]
3603 (define_insn "aarch64_fmulx<mode>"
3604 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3606 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3607 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3610 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3611 [(set_attr "type" "neon_fp_mul_<stype>")]
3614 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3616 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3617 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3619 [(match_operand:VDQSF 1 "register_operand" "w")
3620 (vec_duplicate:VDQSF
3622 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3623 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3627 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3628 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3630 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3633 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3635 (define_insn "*aarch64_mulx_elt<mode>"
3636 [(set (match_operand:VDQF 0 "register_operand" "=w")
3638 [(match_operand:VDQF 1 "register_operand" "w")
3641 (match_operand:VDQF 2 "register_operand" "w")
3642 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3646 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3647 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3649 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3654 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3655 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3657 [(match_operand:VHSDF 1 "register_operand" "w")
3658 (vec_duplicate:VHSDF
3659 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3662 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3663 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3666 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3667 ;; vmulxd_lane_f64 == vmulx_lane_f64
3668 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3670 (define_insn "*aarch64_vgetfmulx<mode>"
3671 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3673 [(match_operand:<VEL> 1 "register_operand" "w")
3675 (match_operand:VDQF 2 "register_operand" "w")
3676 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3680 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3681 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3683 [(set_attr "type" "fmul<Vetype>")]
3687 (define_insn "aarch64_<su_optab><optab><mode>"
3688 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3689 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3690 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3692 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3693 [(set_attr "type" "neon_<optab><q>")]
3696 ;; suqadd and usqadd
3698 (define_insn "aarch64_<sur>qadd<mode>"
3699 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3700 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3701 (match_operand:VSDQ_I 2 "register_operand" "w")]
3704 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3705 [(set_attr "type" "neon_qadd<q>")]
3710 (define_insn "aarch64_sqmovun<mode>"
3711 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3712 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3715 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3716 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3719 ;; sqmovn and uqmovn
3721 (define_insn "aarch64_<sur>qmovn<mode>"
3722 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3723 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3726 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3727 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3732 (define_insn "aarch64_s<optab><mode>"
3733 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3735 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3737 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3738 [(set_attr "type" "neon_<optab><q>")]
3743 (define_insn "aarch64_sq<r>dmulh<mode>"
3744 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3746 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3747 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3750 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3751 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3756 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3757 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3759 [(match_operand:VDQHS 1 "register_operand" "w")
3761 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3762 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3766 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3767 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3768 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3771 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3772 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3774 [(match_operand:VDQHS 1 "register_operand" "w")
3776 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3777 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3781 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3782 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3783 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3786 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3787 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3789 [(match_operand:SD_HSI 1 "register_operand" "w")
3791 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3792 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3796 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3797 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3798 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3801 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3802 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3804 [(match_operand:SD_HSI 1 "register_operand" "w")
3806 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3807 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3811 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3812 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3813 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3818 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3819 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3821 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3822 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3823 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3826 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3827 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3830 ;; sqrdml[as]h_lane.
3832 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3833 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3835 [(match_operand:VDQHS 1 "register_operand" "0")
3836 (match_operand:VDQHS 2 "register_operand" "w")
3838 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3839 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3843 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3845 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3847 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3850 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3851 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3853 [(match_operand:SD_HSI 1 "register_operand" "0")
3854 (match_operand:SD_HSI 2 "register_operand" "w")
3856 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3857 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3861 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3863 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3865 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3868 ;; sqrdml[as]h_laneq.
3870 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3871 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3873 [(match_operand:VDQHS 1 "register_operand" "0")
3874 (match_operand:VDQHS 2 "register_operand" "w")
3876 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3877 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3881 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3883 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3885 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3888 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3889 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3891 [(match_operand:SD_HSI 1 "register_operand" "0")
3892 (match_operand:SD_HSI 2 "register_operand" "w")
3894 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3895 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3899 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3901 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3903 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3908 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3909 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3911 (match_operand:<VWIDE> 1 "register_operand" "0")
3914 (sign_extend:<VWIDE>
3915 (match_operand:VSD_HSI 2 "register_operand" "w"))
3916 (sign_extend:<VWIDE>
3917 (match_operand:VSD_HSI 3 "register_operand" "w")))
3920 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3921 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3926 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3927 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3929 (match_operand:<VWIDE> 1 "register_operand" "0")
3932 (sign_extend:<VWIDE>
3933 (match_operand:VD_HSI 2 "register_operand" "w"))
3934 (sign_extend:<VWIDE>
3935 (vec_duplicate:VD_HSI
3937 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3938 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3943 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3945 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3947 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3950 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3951 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3953 (match_operand:<VWIDE> 1 "register_operand" "0")
3956 (sign_extend:<VWIDE>
3957 (match_operand:VD_HSI 2 "register_operand" "w"))
3958 (sign_extend:<VWIDE>
3959 (vec_duplicate:VD_HSI
3961 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3962 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3967 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3969 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3971 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3974 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3975 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3977 (match_operand:<VWIDE> 1 "register_operand" "0")
3980 (sign_extend:<VWIDE>
3981 (match_operand:SD_HSI 2 "register_operand" "w"))
3982 (sign_extend:<VWIDE>
3984 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3985 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3990 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3992 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3994 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3997 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3998 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4000 (match_operand:<VWIDE> 1 "register_operand" "0")
4003 (sign_extend:<VWIDE>
4004 (match_operand:SD_HSI 2 "register_operand" "w"))
4005 (sign_extend:<VWIDE>
4007 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4008 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4013 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4015 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4017 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4022 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
4023 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4025 (match_operand:<VWIDE> 1 "register_operand" "0")
4028 (sign_extend:<VWIDE>
4029 (match_operand:VD_HSI 2 "register_operand" "w"))
4030 (sign_extend:<VWIDE>
4031 (vec_duplicate:VD_HSI
4032 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4035 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4036 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4041 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
4042 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4044 (match_operand:<VWIDE> 1 "register_operand" "0")
4047 (sign_extend:<VWIDE>
4049 (match_operand:VQ_HSI 2 "register_operand" "w")
4050 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4051 (sign_extend:<VWIDE>
4053 (match_operand:VQ_HSI 3 "register_operand" "w")
4057 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4058 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4061 (define_expand "aarch64_sqdmlal2<mode>"
4062 [(match_operand:<VWIDE> 0 "register_operand")
4063 (match_operand:<VWIDE> 1 "register_operand")
4064 (match_operand:VQ_HSI 2 "register_operand")
4065 (match_operand:VQ_HSI 3 "register_operand")]
4068 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4069 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
4070 operands[2], operands[3], p));
4074 (define_expand "aarch64_sqdmlsl2<mode>"
4075 [(match_operand:<VWIDE> 0 "register_operand")
4076 (match_operand:<VWIDE> 1 "register_operand")
4077 (match_operand:VQ_HSI 2 "register_operand")
4078 (match_operand:VQ_HSI 3 "register_operand")]
4081 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4082 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
4083 operands[2], operands[3], p));
4089 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
4090 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4092 (match_operand:<VWIDE> 1 "register_operand" "0")
4095 (sign_extend:<VWIDE>
4097 (match_operand:VQ_HSI 2 "register_operand" "w")
4098 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4099 (sign_extend:<VWIDE>
4100 (vec_duplicate:<VHALF>
4102 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4103 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4108 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4110 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4112 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4115 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
4116 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4118 (match_operand:<VWIDE> 1 "register_operand" "0")
4121 (sign_extend:<VWIDE>
4123 (match_operand:VQ_HSI 2 "register_operand" "w")
4124 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4125 (sign_extend:<VWIDE>
4126 (vec_duplicate:<VHALF>
4128 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4129 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4134 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4136 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4138 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4141 (define_expand "aarch64_sqdmlal2_lane<mode>"
4142 [(match_operand:<VWIDE> 0 "register_operand")
4143 (match_operand:<VWIDE> 1 "register_operand")
4144 (match_operand:VQ_HSI 2 "register_operand")
4145 (match_operand:<VCOND> 3 "register_operand")
4146 (match_operand:SI 4 "immediate_operand")]
4149 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4150 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
4151 operands[2], operands[3],
4156 (define_expand "aarch64_sqdmlal2_laneq<mode>"
4157 [(match_operand:<VWIDE> 0 "register_operand")
4158 (match_operand:<VWIDE> 1 "register_operand")
4159 (match_operand:VQ_HSI 2 "register_operand")
4160 (match_operand:<VCONQ> 3 "register_operand")
4161 (match_operand:SI 4 "immediate_operand")]
4164 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4165 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
4166 operands[2], operands[3],
4171 (define_expand "aarch64_sqdmlsl2_lane<mode>"
4172 [(match_operand:<VWIDE> 0 "register_operand")
4173 (match_operand:<VWIDE> 1 "register_operand")
4174 (match_operand:VQ_HSI 2 "register_operand")
4175 (match_operand:<VCOND> 3 "register_operand")
4176 (match_operand:SI 4 "immediate_operand")]
4179 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4180 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
4181 operands[2], operands[3],
4186 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
4187 [(match_operand:<VWIDE> 0 "register_operand")
4188 (match_operand:<VWIDE> 1 "register_operand")
4189 (match_operand:VQ_HSI 2 "register_operand")
4190 (match_operand:<VCONQ> 3 "register_operand")
4191 (match_operand:SI 4 "immediate_operand")]
4194 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4195 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4196 operands[2], operands[3],
4201 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4202 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4204 (match_operand:<VWIDE> 1 "register_operand" "0")
4207 (sign_extend:<VWIDE>
4209 (match_operand:VQ_HSI 2 "register_operand" "w")
4210 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4211 (sign_extend:<VWIDE>
4212 (vec_duplicate:<VHALF>
4213 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4216 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4217 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4220 (define_expand "aarch64_sqdmlal2_n<mode>"
4221 [(match_operand:<VWIDE> 0 "register_operand")
4222 (match_operand:<VWIDE> 1 "register_operand")
4223 (match_operand:VQ_HSI 2 "register_operand")
4224 (match_operand:<VEL> 3 "register_operand")]
4227 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4228 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4229 operands[2], operands[3],
4234 (define_expand "aarch64_sqdmlsl2_n<mode>"
4235 [(match_operand:<VWIDE> 0 "register_operand")
4236 (match_operand:<VWIDE> 1 "register_operand")
4237 (match_operand:VQ_HSI 2 "register_operand")
4238 (match_operand:<VEL> 3 "register_operand")]
4241 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4242 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4243 operands[2], operands[3],
4250 (define_insn "aarch64_sqdmull<mode>"
4251 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4254 (sign_extend:<VWIDE>
4255 (match_operand:VSD_HSI 1 "register_operand" "w"))
4256 (sign_extend:<VWIDE>
4257 (match_operand:VSD_HSI 2 "register_operand" "w")))
4260 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4261 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4266 (define_insn "aarch64_sqdmull_lane<mode>"
4267 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4270 (sign_extend:<VWIDE>
4271 (match_operand:VD_HSI 1 "register_operand" "w"))
4272 (sign_extend:<VWIDE>
4273 (vec_duplicate:VD_HSI
4275 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4276 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4281 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4282 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4284 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4287 (define_insn "aarch64_sqdmull_laneq<mode>"
4288 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4291 (sign_extend:<VWIDE>
4292 (match_operand:VD_HSI 1 "register_operand" "w"))
4293 (sign_extend:<VWIDE>
4294 (vec_duplicate:VD_HSI
4296 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4297 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4302 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4303 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4305 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4308 (define_insn "aarch64_sqdmull_lane<mode>"
4309 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4312 (sign_extend:<VWIDE>
4313 (match_operand:SD_HSI 1 "register_operand" "w"))
4314 (sign_extend:<VWIDE>
4316 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4317 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4322 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4323 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4325 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4328 (define_insn "aarch64_sqdmull_laneq<mode>"
4329 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4332 (sign_extend:<VWIDE>
4333 (match_operand:SD_HSI 1 "register_operand" "w"))
4334 (sign_extend:<VWIDE>
4336 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4337 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4342 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4343 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4345 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4350 (define_insn "aarch64_sqdmull_n<mode>"
4351 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4354 (sign_extend:<VWIDE>
4355 (match_operand:VD_HSI 1 "register_operand" "w"))
4356 (sign_extend:<VWIDE>
4357 (vec_duplicate:VD_HSI
4358 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4362 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4363 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4370 (define_insn "aarch64_sqdmull2<mode>_internal"
4371 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4374 (sign_extend:<VWIDE>
4376 (match_operand:VQ_HSI 1 "register_operand" "w")
4377 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4378 (sign_extend:<VWIDE>
4380 (match_operand:VQ_HSI 2 "register_operand" "w")
4385 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4386 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4389 (define_expand "aarch64_sqdmull2<mode>"
4390 [(match_operand:<VWIDE> 0 "register_operand")
4391 (match_operand:VQ_HSI 1 "register_operand")
4392 (match_operand:VQ_HSI 2 "register_operand")]
4395 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4396 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4403 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4404 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4407 (sign_extend:<VWIDE>
4409 (match_operand:VQ_HSI 1 "register_operand" "w")
4410 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4411 (sign_extend:<VWIDE>
4412 (vec_duplicate:<VHALF>
4414 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4415 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4420 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4421 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4423 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4426 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4427 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4430 (sign_extend:<VWIDE>
4432 (match_operand:VQ_HSI 1 "register_operand" "w")
4433 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4434 (sign_extend:<VWIDE>
4435 (vec_duplicate:<VHALF>
4437 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4438 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4443 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4444 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4446 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4449 (define_expand "aarch64_sqdmull2_lane<mode>"
4450 [(match_operand:<VWIDE> 0 "register_operand")
4451 (match_operand:VQ_HSI 1 "register_operand")
4452 (match_operand:<VCOND> 2 "register_operand")
4453 (match_operand:SI 3 "immediate_operand")]
4456 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4457 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4458 operands[2], operands[3],
4463 (define_expand "aarch64_sqdmull2_laneq<mode>"
4464 [(match_operand:<VWIDE> 0 "register_operand")
4465 (match_operand:VQ_HSI 1 "register_operand")
4466 (match_operand:<VCONQ> 2 "register_operand")
4467 (match_operand:SI 3 "immediate_operand")]
4470 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4471 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4472 operands[2], operands[3],
4479 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4480 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4483 (sign_extend:<VWIDE>
4485 (match_operand:VQ_HSI 1 "register_operand" "w")
4486 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4487 (sign_extend:<VWIDE>
4488 (vec_duplicate:<VHALF>
4489 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4493 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4494 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4497 (define_expand "aarch64_sqdmull2_n<mode>"
4498 [(match_operand:<VWIDE> 0 "register_operand")
4499 (match_operand:VQ_HSI 1 "register_operand")
4500 (match_operand:<VEL> 2 "register_operand")]
4503 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4504 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4511 (define_insn "aarch64_<sur>shl<mode>"
4512 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4514 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4515 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4518 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4519 [(set_attr "type" "neon_shift_reg<q>")]
4525 (define_insn "aarch64_<sur>q<r>shl<mode>"
4526 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4528 [(match_operand:VSDQ_I 1 "register_operand" "w")
4529 (match_operand:VSDQ_I 2 "register_operand" "w")]
4532 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4533 [(set_attr "type" "neon_sat_shift_reg<q>")]
4538 (define_insn "aarch64_<sur>shll_n<mode>"
4539 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4540 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4542 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4546 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4547 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4549 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4551 [(set_attr "type" "neon_shift_imm_long")]
4556 (define_insn "aarch64_<sur>shll2_n<mode>"
4557 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4558 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4559 (match_operand:SI 2 "immediate_operand" "i")]
4563 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4564 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4566 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4568 [(set_attr "type" "neon_shift_imm_long")]
4573 (define_insn "aarch64_<sur>shr_n<mode>"
4574 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4575 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4577 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4580 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4581 [(set_attr "type" "neon_sat_shift_imm<q>")]
4586 (define_insn "aarch64_<sur>sra_n<mode>"
4587 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4588 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4589 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4591 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4594 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4595 [(set_attr "type" "neon_shift_acc<q>")]
4600 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4601 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4602 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4603 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4605 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4608 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4609 [(set_attr "type" "neon_shift_imm<q>")]
4614 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4615 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4616 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4618 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4621 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4622 [(set_attr "type" "neon_sat_shift_imm<q>")]
4628 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4629 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4630 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4632 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4635 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4636 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4640 ;; cm(eq|ge|gt|lt|le)
4641 ;; Note, we have constraints for Dz and Z as different expanders
4642 ;; have different ideas of what should be passed to this pattern.
4644 (define_insn "aarch64_cm<optab><mode>"
4645 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4647 (COMPARISONS:<V_INT_EQUIV>
4648 (match_operand:VDQ_I 1 "register_operand" "w,w")
4649 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4653 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4654 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4655 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4658 (define_insn_and_split "aarch64_cm<optab>di"
4659 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4662 (match_operand:DI 1 "register_operand" "w,w,r")
4663 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4665 (clobber (reg:CC CC_REGNUM))]
4668 "&& reload_completed"
4669 [(set (match_operand:DI 0 "register_operand")
4672 (match_operand:DI 1 "register_operand")
4673 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4676 /* If we are in the general purpose register file,
4677 we split to a sequence of comparison and store. */
4678 if (GP_REGNUM_P (REGNO (operands[0]))
4679 && GP_REGNUM_P (REGNO (operands[1])))
4681 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4682 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4683 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4684 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4687 /* Otherwise, we expand to a similar pattern which does not
4688 clobber CC_REGNUM. */
4690 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4693 (define_insn "*aarch64_cm<optab>di"
4694 [(set (match_operand:DI 0 "register_operand" "=w,w")
4697 (match_operand:DI 1 "register_operand" "w,w")
4698 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4700 "TARGET_SIMD && reload_completed"
4702 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4703 cm<optab>\t%d0, %d1, #0"
4704 [(set_attr "type" "neon_compare, neon_compare_zero")]
4709 (define_insn "aarch64_cm<optab><mode>"
4710 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4712 (UCOMPARISONS:<V_INT_EQUIV>
4713 (match_operand:VDQ_I 1 "register_operand" "w")
4714 (match_operand:VDQ_I 2 "register_operand" "w")
4717 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4718 [(set_attr "type" "neon_compare<q>")]
4721 (define_insn_and_split "aarch64_cm<optab>di"
4722 [(set (match_operand:DI 0 "register_operand" "=w,r")
4725 (match_operand:DI 1 "register_operand" "w,r")
4726 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4728 (clobber (reg:CC CC_REGNUM))]
4731 "&& reload_completed"
4732 [(set (match_operand:DI 0 "register_operand")
4735 (match_operand:DI 1 "register_operand")
4736 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4739 /* If we are in the general purpose register file,
4740 we split to a sequence of comparison and store. */
4741 if (GP_REGNUM_P (REGNO (operands[0]))
4742 && GP_REGNUM_P (REGNO (operands[1])))
4744 machine_mode mode = CCmode;
4745 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4746 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4747 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4750 /* Otherwise, we expand to a similar pattern which does not
4751 clobber CC_REGNUM. */
4753 [(set_attr "type" "neon_compare,multiple")]
4756 (define_insn "*aarch64_cm<optab>di"
4757 [(set (match_operand:DI 0 "register_operand" "=w")
4760 (match_operand:DI 1 "register_operand" "w")
4761 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4763 "TARGET_SIMD && reload_completed"
4764 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4765 [(set_attr "type" "neon_compare")]
4770 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4771 ;; we don't have any insns using ne, and aarch64_vcond outputs
4772 ;; not (neg (eq (and x y) 0))
4773 ;; which is rewritten by simplify_rtx as
4774 ;; plus (eq (and x y) 0) -1.
4776 (define_insn "aarch64_cmtst<mode>"
4777 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4781 (match_operand:VDQ_I 1 "register_operand" "w")
4782 (match_operand:VDQ_I 2 "register_operand" "w"))
4783 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4784 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4787 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4788 [(set_attr "type" "neon_tst<q>")]
4791 (define_insn_and_split "aarch64_cmtstdi"
4792 [(set (match_operand:DI 0 "register_operand" "=w,r")
4796 (match_operand:DI 1 "register_operand" "w,r")
4797 (match_operand:DI 2 "register_operand" "w,r"))
4799 (clobber (reg:CC CC_REGNUM))]
4802 "&& reload_completed"
4803 [(set (match_operand:DI 0 "register_operand")
4807 (match_operand:DI 1 "register_operand")
4808 (match_operand:DI 2 "register_operand"))
4811 /* If we are in the general purpose register file,
4812 we split to a sequence of comparison and store. */
4813 if (GP_REGNUM_P (REGNO (operands[0]))
4814 && GP_REGNUM_P (REGNO (operands[1])))
4816 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4817 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4818 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4819 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4820 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4823 /* Otherwise, we expand to a similar pattern which does not
4824 clobber CC_REGNUM. */
4826 [(set_attr "type" "neon_tst,multiple")]
4829 (define_insn "*aarch64_cmtstdi"
4830 [(set (match_operand:DI 0 "register_operand" "=w")
4834 (match_operand:DI 1 "register_operand" "w")
4835 (match_operand:DI 2 "register_operand" "w"))
4838 "cmtst\t%d0, %d1, %d2"
4839 [(set_attr "type" "neon_tst")]
4842 ;; fcm(eq|ge|gt|le|lt)
4844 (define_insn "aarch64_cm<optab><mode>"
4845 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4847 (COMPARISONS:<V_INT_EQUIV>
4848 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4849 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4853 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4854 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4855 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4859 ;; Note we can also handle what would be fac(le|lt) by
4860 ;; generating fac(ge|gt).
4862 (define_insn "aarch64_fac<optab><mode>"
4863 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4865 (FAC_COMPARISONS:<V_INT_EQUIV>
4867 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4869 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4872 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4873 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4878 (define_insn "aarch64_addp<mode>"
4879 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4881 [(match_operand:VD_BHSI 1 "register_operand" "w")
4882 (match_operand:VD_BHSI 2 "register_operand" "w")]
4885 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4886 [(set_attr "type" "neon_reduc_add<q>")]
4889 (define_insn "aarch64_addpdi"
4890 [(set (match_operand:DI 0 "register_operand" "=w")
4892 [(match_operand:V2DI 1 "register_operand" "w")]
4896 [(set_attr "type" "neon_reduc_add")]
4901 (define_expand "sqrt<mode>2"
4902 [(set (match_operand:VHSDF 0 "register_operand")
4903 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))]
4906 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4910 (define_insn "*sqrt<mode>2"
4911 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4912 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4914 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4915 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4918 ;; Patterns for vector struct loads and stores.
4920 (define_insn "aarch64_simd_ld2<mode>"
4921 [(set (match_operand:OI 0 "register_operand" "=w")
4922 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4923 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4926 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4927 [(set_attr "type" "neon_load2_2reg<q>")]
4930 (define_insn "aarch64_simd_ld2r<mode>"
4931 [(set (match_operand:OI 0 "register_operand" "=w")
4932 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4933 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4936 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4937 [(set_attr "type" "neon_load2_all_lanes<q>")]
4940 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4941 [(set (match_operand:OI 0 "register_operand" "=w")
4942 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4943 (match_operand:OI 2 "register_operand" "0")
4944 (match_operand:SI 3 "immediate_operand" "i")
4945 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4949 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4950 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4952 [(set_attr "type" "neon_load2_one_lane")]
4955 (define_expand "vec_load_lanesoi<mode>"
4956 [(set (match_operand:OI 0 "register_operand")
4957 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand")
4958 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4962 if (BYTES_BIG_ENDIAN)
4964 rtx tmp = gen_reg_rtx (OImode);
4965 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4966 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4967 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4970 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4974 (define_insn "aarch64_simd_st2<mode>"
4975 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4976 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4977 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4980 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4981 [(set_attr "type" "neon_store2_2reg<q>")]
4984 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4985 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4986 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4987 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4988 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4989 (match_operand:SI 2 "immediate_operand" "i")]
4993 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4994 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4996 [(set_attr "type" "neon_store2_one_lane<q>")]
4999 (define_expand "vec_store_lanesoi<mode>"
5000 [(set (match_operand:OI 0 "aarch64_simd_struct_operand")
5001 (unspec:OI [(match_operand:OI 1 "register_operand")
5002 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5006 if (BYTES_BIG_ENDIAN)
5008 rtx tmp = gen_reg_rtx (OImode);
5009 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5010 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
5011 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
5014 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
5018 (define_insn "aarch64_simd_ld3<mode>"
5019 [(set (match_operand:CI 0 "register_operand" "=w")
5020 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5021 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5024 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5025 [(set_attr "type" "neon_load3_3reg<q>")]
5028 (define_insn "aarch64_simd_ld3r<mode>"
5029 [(set (match_operand:CI 0 "register_operand" "=w")
5030 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5031 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5034 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5035 [(set_attr "type" "neon_load3_all_lanes<q>")]
5038 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
5039 [(set (match_operand:CI 0 "register_operand" "=w")
5040 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5041 (match_operand:CI 2 "register_operand" "0")
5042 (match_operand:SI 3 "immediate_operand" "i")
5043 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5047 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5048 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
5050 [(set_attr "type" "neon_load3_one_lane")]
5053 (define_expand "vec_load_lanesci<mode>"
5054 [(set (match_operand:CI 0 "register_operand")
5055 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand")
5056 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5060 if (BYTES_BIG_ENDIAN)
5062 rtx tmp = gen_reg_rtx (CImode);
5063 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5064 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
5065 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
5068 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
5072 (define_insn "aarch64_simd_st3<mode>"
5073 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5074 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
5075 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5078 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5079 [(set_attr "type" "neon_store3_3reg<q>")]
5082 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5083 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
5084 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5085 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5086 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5087 (match_operand:SI 2 "immediate_operand" "i")]
5091 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5092 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
5094 [(set_attr "type" "neon_store3_one_lane<q>")]
5097 (define_expand "vec_store_lanesci<mode>"
5098 [(set (match_operand:CI 0 "aarch64_simd_struct_operand")
5099 (unspec:CI [(match_operand:CI 1 "register_operand")
5100 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5104 if (BYTES_BIG_ENDIAN)
5106 rtx tmp = gen_reg_rtx (CImode);
5107 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5108 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
5109 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
5112 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
5116 (define_insn "aarch64_simd_ld4<mode>"
5117 [(set (match_operand:XI 0 "register_operand" "=w")
5118 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5119 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5122 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5123 [(set_attr "type" "neon_load4_4reg<q>")]
5126 (define_insn "aarch64_simd_ld4r<mode>"
5127 [(set (match_operand:XI 0 "register_operand" "=w")
5128 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5129 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5132 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5133 [(set_attr "type" "neon_load4_all_lanes<q>")]
5136 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
5137 [(set (match_operand:XI 0 "register_operand" "=w")
5138 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5139 (match_operand:XI 2 "register_operand" "0")
5140 (match_operand:SI 3 "immediate_operand" "i")
5141 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5145 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5146 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
5148 [(set_attr "type" "neon_load4_one_lane")]
5151 (define_expand "vec_load_lanesxi<mode>"
5152 [(set (match_operand:XI 0 "register_operand")
5153 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand")
5154 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5158 if (BYTES_BIG_ENDIAN)
5160 rtx tmp = gen_reg_rtx (XImode);
5161 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5162 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
5163 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
5166 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
5170 (define_insn "aarch64_simd_st4<mode>"
5171 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5172 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5173 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5176 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5177 [(set_attr "type" "neon_store4_4reg<q>")]
5180 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5181 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
5182 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5183 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5184 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5185 (match_operand:SI 2 "immediate_operand" "i")]
5189 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5190 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5192 [(set_attr "type" "neon_store4_one_lane<q>")]
5195 (define_expand "vec_store_lanesxi<mode>"
5196 [(set (match_operand:XI 0 "aarch64_simd_struct_operand")
5197 (unspec:XI [(match_operand:XI 1 "register_operand")
5198 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5202 if (BYTES_BIG_ENDIAN)
5204 rtx tmp = gen_reg_rtx (XImode);
5205 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5206 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5207 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5210 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5214 (define_insn_and_split "aarch64_rev_reglist<mode>"
5215 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5217 [(match_operand:VSTRUCT 1 "register_operand" "w")
5218 (match_operand:V16QI 2 "register_operand" "w")]
5219 UNSPEC_REV_REGLIST))]
5222 "&& reload_completed"
5226 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5227 for (i = 0; i < nregs; i++)
5229 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5230 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5231 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5235 [(set_attr "type" "neon_tbl1_q")
5236 (set_attr "length" "<insn_count>")]
5239 ;; Reload patterns for AdvSIMD register list operands.
5241 (define_expand "mov<mode>"
5242 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
5243 (match_operand:VSTRUCT 1 "general_operand"))]
5246 if (can_create_pseudo_p ())
5248 if (GET_CODE (operands[0]) != REG)
5249 operands[1] = force_reg (<MODE>mode, operands[1]);
5254 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5255 [(match_operand:CI 0 "register_operand")
5256 (match_operand:DI 1 "register_operand")
5257 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5260 rtx mem = gen_rtx_MEM (CImode, operands[1]);
5261 emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5265 (define_insn "aarch64_ld1_x3_<mode>"
5266 [(set (match_operand:CI 0 "register_operand" "=w")
5268 [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5269 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5271 "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5272 [(set_attr "type" "neon_load1_3reg<q>")]
5275 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5276 [(match_operand:DI 0 "register_operand")
5277 (match_operand:OI 1 "register_operand")
5278 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5281 rtx mem = gen_rtx_MEM (OImode, operands[0]);
5282 emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5286 (define_insn "aarch64_st1_x2_<mode>"
5287 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5289 [(match_operand:OI 1 "register_operand" "w")
5290 (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5292 "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5293 [(set_attr "type" "neon_store1_2reg<q>")]
5296 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5297 [(match_operand:DI 0 "register_operand")
5298 (match_operand:CI 1 "register_operand")
5299 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5302 rtx mem = gen_rtx_MEM (CImode, operands[0]);
5303 emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5307 (define_insn "aarch64_st1_x3_<mode>"
5308 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5310 [(match_operand:CI 1 "register_operand" "w")
5311 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5313 "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5314 [(set_attr "type" "neon_store1_3reg<q>")]
5317 (define_insn "*aarch64_mov<mode>"
5318 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5319 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5320 "TARGET_SIMD && !BYTES_BIG_ENDIAN
5321 && (register_operand (operands[0], <MODE>mode)
5322 || register_operand (operands[1], <MODE>mode))"
5325 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5326 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5327 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5328 neon_load<nregs>_<nregs>reg_q")
5329 (set_attr "length" "<insn_count>,4,4")]
5332 (define_insn "aarch64_be_ld1<mode>"
5333 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
5334 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5335 "aarch64_simd_struct_operand" "Utv")]
5338 "ld1\\t{%0<Vmtype>}, %1"
5339 [(set_attr "type" "neon_load1_1reg<q>")]
5342 (define_insn "aarch64_be_st1<mode>"
5343 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5344 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5347 "st1\\t{%1<Vmtype>}, %0"
5348 [(set_attr "type" "neon_store1_1reg<q>")]
5351 (define_insn "*aarch64_be_movoi"
5352 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5353 (match_operand:OI 1 "general_operand" " w,w,m"))]
5354 "TARGET_SIMD && BYTES_BIG_ENDIAN
5355 && (register_operand (operands[0], OImode)
5356 || register_operand (operands[1], OImode))"
5361 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5362 (set_attr "length" "8,4,4")]
5365 (define_insn "*aarch64_be_movci"
5366 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5367 (match_operand:CI 1 "general_operand" " w,w,o"))]
5368 "TARGET_SIMD && BYTES_BIG_ENDIAN
5369 && (register_operand (operands[0], CImode)
5370 || register_operand (operands[1], CImode))"
5372 [(set_attr "type" "multiple")
5373 (set_attr "length" "12,4,4")]
5376 (define_insn "*aarch64_be_movxi"
5377 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5378 (match_operand:XI 1 "general_operand" " w,w,o"))]
5379 "TARGET_SIMD && BYTES_BIG_ENDIAN
5380 && (register_operand (operands[0], XImode)
5381 || register_operand (operands[1], XImode))"
5383 [(set_attr "type" "multiple")
5384 (set_attr "length" "16,4,4")]
5388 [(set (match_operand:OI 0 "register_operand")
5389 (match_operand:OI 1 "register_operand"))]
5390 "TARGET_SIMD && reload_completed"
5393 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5398 [(set (match_operand:CI 0 "nonimmediate_operand")
5399 (match_operand:CI 1 "general_operand"))]
5400 "TARGET_SIMD && reload_completed"
5403 if (register_operand (operands[0], CImode)
5404 && register_operand (operands[1], CImode))
5406 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5409 else if (BYTES_BIG_ENDIAN)
5411 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5412 simplify_gen_subreg (OImode, operands[1], CImode, 0));
5413 emit_move_insn (gen_lowpart (V16QImode,
5414 simplify_gen_subreg (TImode, operands[0],
5416 gen_lowpart (V16QImode,
5417 simplify_gen_subreg (TImode, operands[1],
5426 [(set (match_operand:XI 0 "nonimmediate_operand")
5427 (match_operand:XI 1 "general_operand"))]
5428 "TARGET_SIMD && reload_completed"
5431 if (register_operand (operands[0], XImode)
5432 && register_operand (operands[1], XImode))
5434 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5437 else if (BYTES_BIG_ENDIAN)
5439 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5440 simplify_gen_subreg (OImode, operands[1], XImode, 0));
5441 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5442 simplify_gen_subreg (OImode, operands[1], XImode, 32));
5449 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5450 [(match_operand:VSTRUCT 0 "register_operand")
5451 (match_operand:DI 1 "register_operand")
5452 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5455 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5456 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5459 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5464 (define_insn "aarch64_ld2<mode>_dreg"
5465 [(set (match_operand:OI 0 "register_operand" "=w")
5466 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5467 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5470 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5471 [(set_attr "type" "neon_load2_2reg<q>")]
5474 (define_insn "aarch64_ld2<mode>_dreg"
5475 [(set (match_operand:OI 0 "register_operand" "=w")
5476 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5477 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5480 "ld1\\t{%S0.1d - %T0.1d}, %1"
5481 [(set_attr "type" "neon_load1_2reg<q>")]
5484 (define_insn "aarch64_ld3<mode>_dreg"
5485 [(set (match_operand:CI 0 "register_operand" "=w")
5486 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5487 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5490 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5491 [(set_attr "type" "neon_load3_3reg<q>")]
5494 (define_insn "aarch64_ld3<mode>_dreg"
5495 [(set (match_operand:CI 0 "register_operand" "=w")
5496 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5497 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5500 "ld1\\t{%S0.1d - %U0.1d}, %1"
5501 [(set_attr "type" "neon_load1_3reg<q>")]
5504 (define_insn "aarch64_ld4<mode>_dreg"
5505 [(set (match_operand:XI 0 "register_operand" "=w")
5506 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5507 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5510 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5511 [(set_attr "type" "neon_load4_4reg<q>")]
5514 (define_insn "aarch64_ld4<mode>_dreg"
5515 [(set (match_operand:XI 0 "register_operand" "=w")
5516 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5517 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5520 "ld1\\t{%S0.1d - %V0.1d}, %1"
5521 [(set_attr "type" "neon_load1_4reg<q>")]
5524 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5525 [(match_operand:VSTRUCT 0 "register_operand")
5526 (match_operand:DI 1 "register_operand")
5527 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5530 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5531 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5533 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5537 (define_expand "aarch64_ld1<VALL_F16:mode>"
5538 [(match_operand:VALL_F16 0 "register_operand")
5539 (match_operand:DI 1 "register_operand")]
5542 machine_mode mode = <VALL_F16:MODE>mode;
5543 rtx mem = gen_rtx_MEM (mode, operands[1]);
5545 if (BYTES_BIG_ENDIAN)
5546 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5548 emit_move_insn (operands[0], mem);
5552 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5553 [(match_operand:VSTRUCT 0 "register_operand")
5554 (match_operand:DI 1 "register_operand")
5555 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5558 machine_mode mode = <VSTRUCT:MODE>mode;
5559 rtx mem = gen_rtx_MEM (mode, operands[1]);
5561 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5565 (define_expand "aarch64_ld1x2<VQ:mode>"
5566 [(match_operand:OI 0 "register_operand")
5567 (match_operand:DI 1 "register_operand")
5568 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5571 machine_mode mode = OImode;
5572 rtx mem = gen_rtx_MEM (mode, operands[1]);
5574 emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5578 (define_expand "aarch64_ld1x2<VDC:mode>"
5579 [(match_operand:OI 0 "register_operand")
5580 (match_operand:DI 1 "register_operand")
5581 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5584 machine_mode mode = OImode;
5585 rtx mem = gen_rtx_MEM (mode, operands[1]);
5587 emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5592 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5593 [(match_operand:VSTRUCT 0 "register_operand")
5594 (match_operand:DI 1 "register_operand")
5595 (match_operand:VSTRUCT 2 "register_operand")
5596 (match_operand:SI 3 "immediate_operand")
5597 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5600 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5601 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5604 aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5605 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5606 operands[0], mem, operands[2], operands[3]));
5610 ;; Expanders for builtins to extract vector registers from large
5611 ;; opaque integer modes.
5615 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5616 [(match_operand:VDC 0 "register_operand")
5617 (match_operand:VSTRUCT 1 "register_operand")
5618 (match_operand:SI 2 "immediate_operand")]
5621 int part = INTVAL (operands[2]);
5622 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5623 int offset = part * 16;
5625 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5626 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5632 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5633 [(match_operand:VQ 0 "register_operand")
5634 (match_operand:VSTRUCT 1 "register_operand")
5635 (match_operand:SI 2 "immediate_operand")]
5638 int part = INTVAL (operands[2]);
5639 int offset = part * 16;
5641 emit_move_insn (operands[0],
5642 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5646 ;; Permuted-store expanders for neon intrinsics.
5648 ;; Permute instructions
5652 (define_expand "vec_perm<mode>"
5653 [(match_operand:VB 0 "register_operand")
5654 (match_operand:VB 1 "register_operand")
5655 (match_operand:VB 2 "register_operand")
5656 (match_operand:VB 3 "register_operand")]
5659 aarch64_expand_vec_perm (operands[0], operands[1],
5660 operands[2], operands[3], <nunits>);
5664 (define_insn "aarch64_tbl1<mode>"
5665 [(set (match_operand:VB 0 "register_operand" "=w")
5666 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5667 (match_operand:VB 2 "register_operand" "w")]
5670 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5671 [(set_attr "type" "neon_tbl1<q>")]
5674 ;; Two source registers.
5676 (define_insn "aarch64_tbl2v16qi"
5677 [(set (match_operand:V16QI 0 "register_operand" "=w")
5678 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5679 (match_operand:V16QI 2 "register_operand" "w")]
5682 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5683 [(set_attr "type" "neon_tbl2_q")]
5686 (define_insn "aarch64_tbl3<mode>"
5687 [(set (match_operand:VB 0 "register_operand" "=w")
5688 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5689 (match_operand:VB 2 "register_operand" "w")]
5692 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5693 [(set_attr "type" "neon_tbl3")]
5696 (define_insn "aarch64_tbx4<mode>"
5697 [(set (match_operand:VB 0 "register_operand" "=w")
5698 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5699 (match_operand:OI 2 "register_operand" "w")
5700 (match_operand:VB 3 "register_operand" "w")]
5703 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5704 [(set_attr "type" "neon_tbl4")]
5707 ;; Three source registers.
5709 (define_insn "aarch64_qtbl3<mode>"
5710 [(set (match_operand:VB 0 "register_operand" "=w")
5711 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5712 (match_operand:VB 2 "register_operand" "w")]
5715 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5716 [(set_attr "type" "neon_tbl3")]
5719 (define_insn "aarch64_qtbx3<mode>"
5720 [(set (match_operand:VB 0 "register_operand" "=w")
5721 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5722 (match_operand:CI 2 "register_operand" "w")
5723 (match_operand:VB 3 "register_operand" "w")]
5726 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5727 [(set_attr "type" "neon_tbl3")]
5730 ;; Four source registers.
5732 (define_insn "aarch64_qtbl4<mode>"
5733 [(set (match_operand:VB 0 "register_operand" "=w")
5734 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5735 (match_operand:VB 2 "register_operand" "w")]
5738 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5739 [(set_attr "type" "neon_tbl4")]
5742 (define_insn "aarch64_qtbx4<mode>"
5743 [(set (match_operand:VB 0 "register_operand" "=w")
5744 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5745 (match_operand:XI 2 "register_operand" "w")
5746 (match_operand:VB 3 "register_operand" "w")]
5749 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5750 [(set_attr "type" "neon_tbl4")]
5753 (define_insn_and_split "aarch64_combinev16qi"
5754 [(set (match_operand:OI 0 "register_operand" "=w")
5755 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5756 (match_operand:V16QI 2 "register_operand" "w")]
5760 "&& reload_completed"
5763 aarch64_split_combinev16qi (operands);
5766 [(set_attr "type" "multiple")]
5769 ;; This instruction's pattern is generated directly by
5770 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5771 ;; need corresponding changes there.
5772 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5773 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5774 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5775 (match_operand:VALL_F16 2 "register_operand" "w")]
5778 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5779 [(set_attr "type" "neon_permute<q>")]
5782 ;; This instruction's pattern is generated directly by
5783 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5784 ;; need corresponding changes there. Note that the immediate (third)
5785 ;; operand is a lane index not a byte index.
5786 (define_insn "aarch64_ext<mode>"
5787 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5788 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5789 (match_operand:VALL_F16 2 "register_operand" "w")
5790 (match_operand:SI 3 "immediate_operand" "i")]
5794 operands[3] = GEN_INT (INTVAL (operands[3])
5795 * GET_MODE_UNIT_SIZE (<MODE>mode));
5796 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5798 [(set_attr "type" "neon_ext<q>")]
5801 ;; This instruction's pattern is generated directly by
5802 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5803 ;; need corresponding changes there.
5804 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5805 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5806 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5809 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5810 [(set_attr "type" "neon_rev<q>")]
5813 (define_insn "aarch64_st2<mode>_dreg"
5814 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5815 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5816 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5819 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5820 [(set_attr "type" "neon_store2_2reg")]
5823 (define_insn "aarch64_st2<mode>_dreg"
5824 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5825 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5826 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5829 "st1\\t{%S1.1d - %T1.1d}, %0"
5830 [(set_attr "type" "neon_store1_2reg")]
5833 (define_insn "aarch64_st3<mode>_dreg"
5834 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5835 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5836 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5839 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5840 [(set_attr "type" "neon_store3_3reg")]
5843 (define_insn "aarch64_st3<mode>_dreg"
5844 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5845 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5846 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5849 "st1\\t{%S1.1d - %U1.1d}, %0"
5850 [(set_attr "type" "neon_store1_3reg")]
5853 (define_insn "aarch64_st4<mode>_dreg"
5854 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5855 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5856 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5859 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5860 [(set_attr "type" "neon_store4_4reg")]
5863 (define_insn "aarch64_st4<mode>_dreg"
5864 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5865 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5866 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5869 "st1\\t{%S1.1d - %V1.1d}, %0"
5870 [(set_attr "type" "neon_store1_4reg")]
5873 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5874 [(match_operand:DI 0 "register_operand")
5875 (match_operand:VSTRUCT 1 "register_operand")
5876 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5879 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5880 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5882 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5886 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5887 [(match_operand:DI 0 "register_operand")
5888 (match_operand:VSTRUCT 1 "register_operand")
5889 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5892 machine_mode mode = <VSTRUCT:MODE>mode;
5893 rtx mem = gen_rtx_MEM (mode, operands[0]);
5895 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5899 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5900 [(match_operand:DI 0 "register_operand")
5901 (match_operand:VSTRUCT 1 "register_operand")
5902 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5903 (match_operand:SI 2 "immediate_operand")]
5906 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5907 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5910 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5911 mem, operands[1], operands[2]));
5915 (define_expand "aarch64_st1<VALL_F16:mode>"
5916 [(match_operand:DI 0 "register_operand")
5917 (match_operand:VALL_F16 1 "register_operand")]
5920 machine_mode mode = <VALL_F16:MODE>mode;
5921 rtx mem = gen_rtx_MEM (mode, operands[0]);
5923 if (BYTES_BIG_ENDIAN)
5924 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5926 emit_move_insn (mem, operands[1]);
5930 ;; Expander for builtins to insert vector registers into large
5931 ;; opaque integer modes.
5933 ;; Q-register list. We don't need a D-reg inserter as we zero
5934 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5936 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5937 [(match_operand:VSTRUCT 0 "register_operand")
5938 (match_operand:VSTRUCT 1 "register_operand")
5939 (match_operand:VQ 2 "register_operand")
5940 (match_operand:SI 3 "immediate_operand")]
5943 int part = INTVAL (operands[3]);
5944 int offset = part * 16;
5946 emit_move_insn (operands[0], operands[1]);
5947 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5952 ;; Standard pattern name vec_init<mode><Vel>.
5954 (define_expand "vec_init<mode><Vel>"
5955 [(match_operand:VALL_F16 0 "register_operand")
5956 (match_operand 1 "" "")]
5959 aarch64_expand_vector_init (operands[0], operands[1]);
5963 (define_expand "vec_init<mode><Vhalf>"
5964 [(match_operand:VQ_NO2E 0 "register_operand")
5965 (match_operand 1 "" "")]
5968 aarch64_expand_vector_init (operands[0], operands[1]);
5972 (define_insn "*aarch64_simd_ld1r<mode>"
5973 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5974 (vec_duplicate:VALL_F16
5975 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5977 "ld1r\\t{%0.<Vtype>}, %1"
5978 [(set_attr "type" "neon_load1_all_lanes")]
5981 (define_insn "aarch64_simd_ld1<mode>_x2"
5982 [(set (match_operand:OI 0 "register_operand" "=w")
5983 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5984 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5987 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5988 [(set_attr "type" "neon_load1_2reg<q>")]
5991 (define_insn "aarch64_simd_ld1<mode>_x2"
5992 [(set (match_operand:OI 0 "register_operand" "=w")
5993 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5994 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5997 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5998 [(set_attr "type" "neon_load1_2reg<q>")]
6002 (define_insn "@aarch64_frecpe<mode>"
6003 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
6005 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
6008 "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
6009 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
6012 (define_insn "aarch64_frecpx<mode>"
6013 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
6014 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
6017 "frecpx\t%<s>0, %<s>1"
6018 [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
6021 (define_insn "@aarch64_frecps<mode>"
6022 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
6024 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
6025 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
6028 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6029 [(set_attr "type" "neon_fp_recps_<stype><q>")]
6032 (define_insn "aarch64_urecpe<mode>"
6033 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
6034 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
6037 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
6038 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
6040 ;; Standard pattern name vec_extract<mode><Vel>.
6042 (define_expand "vec_extract<mode><Vel>"
6043 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
6044 (match_operand:VALL_F16 1 "register_operand")
6045 (match_operand:SI 2 "immediate_operand")]
6049 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
6055 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
6056 [(set (match_operand:V16QI 0 "register_operand" "=w")
6059 (match_operand:V16QI 1 "register_operand" "%0")
6060 (match_operand:V16QI 2 "register_operand" "w"))]
6062 "TARGET_SIMD && TARGET_AES"
6063 "aes<aes_op>\\t%0.16b, %2.16b"
6064 [(set_attr "type" "crypto_aese")]
6067 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
6068 [(set (match_operand:V16QI 0 "register_operand" "=w")
6069 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
6071 "TARGET_SIMD && TARGET_AES"
6072 "aes<aesmc_op>\\t%0.16b, %1.16b"
6073 [(set_attr "type" "crypto_aesmc")]
6076 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
6077 ;; and enforce the register dependency without scheduling or register
6078 ;; allocation messing up the order or introducing moves inbetween.
6079 ;; Mash the two together during combine.
6081 (define_insn "*aarch64_crypto_aese_fused"
6082 [(set (match_operand:V16QI 0 "register_operand" "=w")
6086 (match_operand:V16QI 1 "register_operand" "%0")
6087 (match_operand:V16QI 2 "register_operand" "w"))]
6090 "TARGET_SIMD && TARGET_AES
6091 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6092 "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
6093 [(set_attr "type" "crypto_aese")
6094 (set_attr "length" "8")]
6097 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
6098 ;; and enforce the register dependency without scheduling or register
6099 ;; allocation messing up the order or introducing moves inbetween.
6100 ;; Mash the two together during combine.
6102 (define_insn "*aarch64_crypto_aesd_fused"
6103 [(set (match_operand:V16QI 0 "register_operand" "=w")
6107 (match_operand:V16QI 1 "register_operand" "%0")
6108 (match_operand:V16QI 2 "register_operand" "w"))]
6111 "TARGET_SIMD && TARGET_AES
6112 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6113 "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
6114 [(set_attr "type" "crypto_aese")
6115 (set_attr "length" "8")]
6120 (define_insn "aarch64_crypto_sha1hsi"
6121 [(set (match_operand:SI 0 "register_operand" "=w")
6122 (unspec:SI [(match_operand:SI 1
6123 "register_operand" "w")]
6125 "TARGET_SIMD && TARGET_SHA2"
6127 [(set_attr "type" "crypto_sha1_fast")]
6130 (define_insn "aarch64_crypto_sha1hv4si"
6131 [(set (match_operand:SI 0 "register_operand" "=w")
6132 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6133 (parallel [(const_int 0)]))]
6135 "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
6137 [(set_attr "type" "crypto_sha1_fast")]
6140 (define_insn "aarch64_be_crypto_sha1hv4si"
6141 [(set (match_operand:SI 0 "register_operand" "=w")
6142 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6143 (parallel [(const_int 3)]))]
6145 "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
6147 [(set_attr "type" "crypto_sha1_fast")]
6150 (define_insn "aarch64_crypto_sha1su1v4si"
6151 [(set (match_operand:V4SI 0 "register_operand" "=w")
6152 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6153 (match_operand:V4SI 2 "register_operand" "w")]
6155 "TARGET_SIMD && TARGET_SHA2"
6156 "sha1su1\\t%0.4s, %2.4s"
6157 [(set_attr "type" "crypto_sha1_fast")]
6160 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
6161 [(set (match_operand:V4SI 0 "register_operand" "=w")
6162 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6163 (match_operand:SI 2 "register_operand" "w")
6164 (match_operand:V4SI 3 "register_operand" "w")]
6166 "TARGET_SIMD && TARGET_SHA2"
6167 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
6168 [(set_attr "type" "crypto_sha1_slow")]
6171 (define_insn "aarch64_crypto_sha1su0v4si"
6172 [(set (match_operand:V4SI 0 "register_operand" "=w")
6173 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6174 (match_operand:V4SI 2 "register_operand" "w")
6175 (match_operand:V4SI 3 "register_operand" "w")]
6177 "TARGET_SIMD && TARGET_SHA2"
6178 "sha1su0\\t%0.4s, %2.4s, %3.4s"
6179 [(set_attr "type" "crypto_sha1_xor")]
6184 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6185 [(set (match_operand:V4SI 0 "register_operand" "=w")
6186 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6187 (match_operand:V4SI 2 "register_operand" "w")
6188 (match_operand:V4SI 3 "register_operand" "w")]
6190 "TARGET_SIMD && TARGET_SHA2"
6191 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
6192 [(set_attr "type" "crypto_sha256_slow")]
6195 (define_insn "aarch64_crypto_sha256su0v4si"
6196 [(set (match_operand:V4SI 0 "register_operand" "=w")
6197 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6198 (match_operand:V4SI 2 "register_operand" "w")]
6200 "TARGET_SIMD && TARGET_SHA2"
6201 "sha256su0\\t%0.4s, %2.4s"
6202 [(set_attr "type" "crypto_sha256_fast")]
6205 (define_insn "aarch64_crypto_sha256su1v4si"
6206 [(set (match_operand:V4SI 0 "register_operand" "=w")
6207 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6208 (match_operand:V4SI 2 "register_operand" "w")
6209 (match_operand:V4SI 3 "register_operand" "w")]
6211 "TARGET_SIMD && TARGET_SHA2"
6212 "sha256su1\\t%0.4s, %2.4s, %3.4s"
6213 [(set_attr "type" "crypto_sha256_slow")]
6218 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6219 [(set (match_operand:V2DI 0 "register_operand" "=w")
6220 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6221 (match_operand:V2DI 2 "register_operand" "w")
6222 (match_operand:V2DI 3 "register_operand" "w")]
6224 "TARGET_SIMD && TARGET_SHA3"
6225 "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6226 [(set_attr "type" "crypto_sha512")]
6229 (define_insn "aarch64_crypto_sha512su0qv2di"
6230 [(set (match_operand:V2DI 0 "register_operand" "=w")
6231 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6232 (match_operand:V2DI 2 "register_operand" "w")]
6234 "TARGET_SIMD && TARGET_SHA3"
6235 "sha512su0\\t%0.2d, %2.2d"
6236 [(set_attr "type" "crypto_sha512")]
6239 (define_insn "aarch64_crypto_sha512su1qv2di"
6240 [(set (match_operand:V2DI 0 "register_operand" "=w")
6241 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6242 (match_operand:V2DI 2 "register_operand" "w")
6243 (match_operand:V2DI 3 "register_operand" "w")]
6245 "TARGET_SIMD && TARGET_SHA3"
6246 "sha512su1\\t%0.2d, %2.2d, %3.2d"
6247 [(set_attr "type" "crypto_sha512")]
6252 (define_insn "eor3q<mode>4"
6253 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6256 (match_operand:VQ_I 2 "register_operand" "w")
6257 (match_operand:VQ_I 3 "register_operand" "w"))
6258 (match_operand:VQ_I 1 "register_operand" "w")))]
6259 "TARGET_SIMD && TARGET_SHA3"
6260 "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6261 [(set_attr "type" "crypto_sha3")]
6264 (define_insn "aarch64_rax1qv2di"
6265 [(set (match_operand:V2DI 0 "register_operand" "=w")
6268 (match_operand:V2DI 2 "register_operand" "w")
6270 (match_operand:V2DI 1 "register_operand" "w")))]
6271 "TARGET_SIMD && TARGET_SHA3"
6272 "rax1\\t%0.2d, %1.2d, %2.2d"
6273 [(set_attr "type" "crypto_sha3")]
6276 (define_insn "aarch64_xarqv2di"
6277 [(set (match_operand:V2DI 0 "register_operand" "=w")
6280 (match_operand:V2DI 1 "register_operand" "%w")
6281 (match_operand:V2DI 2 "register_operand" "w"))
6282 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6283 "TARGET_SIMD && TARGET_SHA3"
6284 "xar\\t%0.2d, %1.2d, %2.2d, %3"
6285 [(set_attr "type" "crypto_sha3")]
6288 (define_insn "bcaxq<mode>4"
6289 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6292 (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6293 (match_operand:VQ_I 2 "register_operand" "w"))
6294 (match_operand:VQ_I 1 "register_operand" "w")))]
6295 "TARGET_SIMD && TARGET_SHA3"
6296 "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6297 [(set_attr "type" "crypto_sha3")]
6302 (define_insn "aarch64_sm3ss1qv4si"
6303 [(set (match_operand:V4SI 0 "register_operand" "=w")
6304 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6305 (match_operand:V4SI 2 "register_operand" "w")
6306 (match_operand:V4SI 3 "register_operand" "w")]
6308 "TARGET_SIMD && TARGET_SM4"
6309 "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6310 [(set_attr "type" "crypto_sm3")]
6314 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6315 [(set (match_operand:V4SI 0 "register_operand" "=w")
6316 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6317 (match_operand:V4SI 2 "register_operand" "w")
6318 (match_operand:V4SI 3 "register_operand" "w")
6319 (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6321 "TARGET_SIMD && TARGET_SM4"
6322 "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6323 [(set_attr "type" "crypto_sm3")]
6326 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6327 [(set (match_operand:V4SI 0 "register_operand" "=w")
6328 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6329 (match_operand:V4SI 2 "register_operand" "w")
6330 (match_operand:V4SI 3 "register_operand" "w")]
6332 "TARGET_SIMD && TARGET_SM4"
6333 "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6334 [(set_attr "type" "crypto_sm3")]
6339 (define_insn "aarch64_sm4eqv4si"
6340 [(set (match_operand:V4SI 0 "register_operand" "=w")
6341 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6342 (match_operand:V4SI 2 "register_operand" "w")]
6344 "TARGET_SIMD && TARGET_SM4"
6345 "sm4e\\t%0.4s, %2.4s"
6346 [(set_attr "type" "crypto_sm4")]
6349 (define_insn "aarch64_sm4ekeyqv4si"
6350 [(set (match_operand:V4SI 0 "register_operand" "=w")
6351 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6352 (match_operand:V4SI 2 "register_operand" "w")]
6354 "TARGET_SIMD && TARGET_SM4"
6355 "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6356 [(set_attr "type" "crypto_sm4")]
6361 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6362 [(set (match_operand:VDQSF 0 "register_operand")
6364 [(match_operand:VDQSF 1 "register_operand")
6365 (match_operand:<VFMLA_W> 2 "register_operand")
6366 (match_operand:<VFMLA_W> 3 "register_operand")]
6370 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6371 <nunits> * 2, false);
6372 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6373 <nunits> * 2, false);
6375 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6384 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6385 [(set (match_operand:VDQSF 0 "register_operand")
6387 [(match_operand:VDQSF 1 "register_operand")
6388 (match_operand:<VFMLA_W> 2 "register_operand")
6389 (match_operand:<VFMLA_W> 3 "register_operand")]
6393 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6394 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6396 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6404 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6405 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6408 (vec_select:<VFMLA_SEL_W>
6409 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6410 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6412 (vec_select:<VFMLA_SEL_W>
6413 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6414 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6415 (match_operand:VDQSF 1 "register_operand" "0")))]
6417 "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6418 [(set_attr "type" "neon_fp_mul_s")]
6421 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6422 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6426 (vec_select:<VFMLA_SEL_W>
6427 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6428 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6430 (vec_select:<VFMLA_SEL_W>
6431 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6432 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6433 (match_operand:VDQSF 1 "register_operand" "0")))]
6435 "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6436 [(set_attr "type" "neon_fp_mul_s")]
6439 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6440 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6443 (vec_select:<VFMLA_SEL_W>
6444 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6445 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6447 (vec_select:<VFMLA_SEL_W>
6448 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6449 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6450 (match_operand:VDQSF 1 "register_operand" "0")))]
6452 "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6453 [(set_attr "type" "neon_fp_mul_s")]
6456 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6457 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6461 (vec_select:<VFMLA_SEL_W>
6462 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6463 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6465 (vec_select:<VFMLA_SEL_W>
6466 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6467 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6468 (match_operand:VDQSF 1 "register_operand" "0")))]
6470 "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6471 [(set_attr "type" "neon_fp_mul_s")]
6474 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6475 [(set (match_operand:V2SF 0 "register_operand")
6476 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6477 (match_operand:V4HF 2 "register_operand")
6478 (match_operand:V4HF 3 "register_operand")
6479 (match_operand:SI 4 "aarch64_imm2")]
6483 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6484 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6486 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6495 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6496 [(set (match_operand:V2SF 0 "register_operand")
6497 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6498 (match_operand:V4HF 2 "register_operand")
6499 (match_operand:V4HF 3 "register_operand")
6500 (match_operand:SI 4 "aarch64_imm2")]
6504 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6505 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6507 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6515 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6516 [(set (match_operand:V2SF 0 "register_operand" "=w")
6520 (match_operand:V4HF 2 "register_operand" "w")
6521 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6525 (match_operand:V4HF 3 "register_operand" "x")
6526 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6527 (match_operand:V2SF 1 "register_operand" "0")))]
6529 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6530 [(set_attr "type" "neon_fp_mul_s")]
6533 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6534 [(set (match_operand:V2SF 0 "register_operand" "=w")
6539 (match_operand:V4HF 2 "register_operand" "w")
6540 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6544 (match_operand:V4HF 3 "register_operand" "x")
6545 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6546 (match_operand:V2SF 1 "register_operand" "0")))]
6548 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6549 [(set_attr "type" "neon_fp_mul_s")]
6552 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6553 [(set (match_operand:V2SF 0 "register_operand" "=w")
6557 (match_operand:V4HF 2 "register_operand" "w")
6558 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6562 (match_operand:V4HF 3 "register_operand" "x")
6563 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6564 (match_operand:V2SF 1 "register_operand" "0")))]
6566 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6567 [(set_attr "type" "neon_fp_mul_s")]
6570 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6571 [(set (match_operand:V2SF 0 "register_operand" "=w")
6576 (match_operand:V4HF 2 "register_operand" "w")
6577 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6581 (match_operand:V4HF 3 "register_operand" "x")
6582 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6583 (match_operand:V2SF 1 "register_operand" "0")))]
6585 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6586 [(set_attr "type" "neon_fp_mul_s")]
6589 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6590 [(set (match_operand:V4SF 0 "register_operand")
6591 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6592 (match_operand:V8HF 2 "register_operand")
6593 (match_operand:V8HF 3 "register_operand")
6594 (match_operand:SI 4 "aarch64_lane_imm3")]
6598 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6599 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6601 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6609 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6610 [(set (match_operand:V4SF 0 "register_operand")
6611 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6612 (match_operand:V8HF 2 "register_operand")
6613 (match_operand:V8HF 3 "register_operand")
6614 (match_operand:SI 4 "aarch64_lane_imm3")]
6618 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6619 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6621 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6629 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6630 [(set (match_operand:V4SF 0 "register_operand" "=w")
6634 (match_operand:V8HF 2 "register_operand" "w")
6635 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6639 (match_operand:V8HF 3 "register_operand" "x")
6640 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6641 (match_operand:V4SF 1 "register_operand" "0")))]
6643 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6644 [(set_attr "type" "neon_fp_mul_s")]
6647 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6648 [(set (match_operand:V4SF 0 "register_operand" "=w")
6653 (match_operand:V8HF 2 "register_operand" "w")
6654 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6658 (match_operand:V8HF 3 "register_operand" "x")
6659 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6660 (match_operand:V4SF 1 "register_operand" "0")))]
6662 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6663 [(set_attr "type" "neon_fp_mul_s")]
6666 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6667 [(set (match_operand:V4SF 0 "register_operand" "=w")
6671 (match_operand:V8HF 2 "register_operand" "w")
6672 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6676 (match_operand:V8HF 3 "register_operand" "x")
6677 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6678 (match_operand:V4SF 1 "register_operand" "0")))]
6680 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6681 [(set_attr "type" "neon_fp_mul_s")]
6684 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6685 [(set (match_operand:V4SF 0 "register_operand" "=w")
6690 (match_operand:V8HF 2 "register_operand" "w")
6691 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6695 (match_operand:V8HF 3 "register_operand" "x")
6696 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6697 (match_operand:V4SF 1 "register_operand" "0")))]
6699 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6700 [(set_attr "type" "neon_fp_mul_s")]
6703 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6704 [(set (match_operand:V2SF 0 "register_operand")
6705 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6706 (match_operand:V4HF 2 "register_operand")
6707 (match_operand:V8HF 3 "register_operand")
6708 (match_operand:SI 4 "aarch64_lane_imm3")]
6712 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6713 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6715 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6724 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6725 [(set (match_operand:V2SF 0 "register_operand")
6726 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6727 (match_operand:V4HF 2 "register_operand")
6728 (match_operand:V8HF 3 "register_operand")
6729 (match_operand:SI 4 "aarch64_lane_imm3")]
6733 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6734 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6736 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6745 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6746 [(set (match_operand:V2SF 0 "register_operand" "=w")
6750 (match_operand:V4HF 2 "register_operand" "w")
6751 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6755 (match_operand:V8HF 3 "register_operand" "x")
6756 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6757 (match_operand:V2SF 1 "register_operand" "0")))]
6759 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6760 [(set_attr "type" "neon_fp_mul_s")]
6763 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6764 [(set (match_operand:V2SF 0 "register_operand" "=w")
6769 (match_operand:V4HF 2 "register_operand" "w")
6770 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6774 (match_operand:V8HF 3 "register_operand" "x")
6775 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6776 (match_operand:V2SF 1 "register_operand" "0")))]
6778 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6779 [(set_attr "type" "neon_fp_mul_s")]
6782 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6783 [(set (match_operand:V2SF 0 "register_operand" "=w")
6787 (match_operand:V4HF 2 "register_operand" "w")
6788 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6792 (match_operand:V8HF 3 "register_operand" "x")
6793 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6794 (match_operand:V2SF 1 "register_operand" "0")))]
6796 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6797 [(set_attr "type" "neon_fp_mul_s")]
6800 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6801 [(set (match_operand:V2SF 0 "register_operand" "=w")
6806 (match_operand:V4HF 2 "register_operand" "w")
6807 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6811 (match_operand:V8HF 3 "register_operand" "x")
6812 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6813 (match_operand:V2SF 1 "register_operand" "0")))]
6815 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6816 [(set_attr "type" "neon_fp_mul_s")]
6819 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6820 [(set (match_operand:V4SF 0 "register_operand")
6821 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6822 (match_operand:V8HF 2 "register_operand")
6823 (match_operand:V4HF 3 "register_operand")
6824 (match_operand:SI 4 "aarch64_imm2")]
6828 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6829 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6831 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6839 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6840 [(set (match_operand:V4SF 0 "register_operand")
6841 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6842 (match_operand:V8HF 2 "register_operand")
6843 (match_operand:V4HF 3 "register_operand")
6844 (match_operand:SI 4 "aarch64_imm2")]
6848 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6849 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6851 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6859 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6860 [(set (match_operand:V4SF 0 "register_operand" "=w")
6864 (match_operand:V8HF 2 "register_operand" "w")
6865 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6869 (match_operand:V4HF 3 "register_operand" "x")
6870 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6871 (match_operand:V4SF 1 "register_operand" "0")))]
6873 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6874 [(set_attr "type" "neon_fp_mul_s")]
6877 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6878 [(set (match_operand:V4SF 0 "register_operand" "=w")
6883 (match_operand:V8HF 2 "register_operand" "w")
6884 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6888 (match_operand:V4HF 3 "register_operand" "x")
6889 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6890 (match_operand:V4SF 1 "register_operand" "0")))]
6892 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6893 [(set_attr "type" "neon_fp_mul_s")]
6896 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6897 [(set (match_operand:V4SF 0 "register_operand" "=w")
6901 (match_operand:V8HF 2 "register_operand" "w")
6902 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6906 (match_operand:V4HF 3 "register_operand" "x")
6907 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6908 (match_operand:V4SF 1 "register_operand" "0")))]
6910 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6911 [(set_attr "type" "neon_fp_mul_s")]
6914 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6915 [(set (match_operand:V4SF 0 "register_operand" "=w")
6920 (match_operand:V8HF 2 "register_operand" "w")
6921 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6925 (match_operand:V4HF 3 "register_operand" "x")
6926 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6927 (match_operand:V4SF 1 "register_operand" "0")))]
6929 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6930 [(set_attr "type" "neon_fp_mul_s")]
6935 (define_insn "aarch64_crypto_pmulldi"
6936 [(set (match_operand:TI 0 "register_operand" "=w")
6937 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
6938 (match_operand:DI 2 "register_operand" "w")]
6940 "TARGET_SIMD && TARGET_AES"
6941 "pmull\\t%0.1q, %1.1d, %2.1d"
6942 [(set_attr "type" "crypto_pmull")]
6945 (define_insn "aarch64_crypto_pmullv2di"
6946 [(set (match_operand:TI 0 "register_operand" "=w")
6947 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
6948 (match_operand:V2DI 2 "register_operand" "w")]
6950 "TARGET_SIMD && TARGET_AES"
6951 "pmull2\\t%0.1q, %1.2d, %2.2d"
6952 [(set_attr "type" "crypto_pmull")]