1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2019 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16 1 "general_operand" ""))]
26 /* Force the operand into a register if it is not an
27 immediate whose use can be replaced with xzr.
28 If the mode is 16 bytes wide, then we will be doing
29 a stp in DI mode, so we check the validity of that.
30 If the mode is 8 bytes wide, then we will do doing a
31 normal str, so the check need not apply. */
32 if (GET_CODE (operands[0]) == MEM
33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34 && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
35 && aarch64_mem_pair_operand (operands[0], DImode))
36 || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
37 operands[1] = force_reg (<MODE>mode, operands[1]);
41 (define_expand "movmisalign<mode>"
42 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
43 (match_operand:VALL 1 "general_operand" ""))]
46 /* This pattern is not permitted to fail during expansion: if both arguments
47 are non-registers (e.g. memory := constant, which can be created by the
48 auto-vectorizer), force operand 1 into a register. */
49 if (!register_operand (operands[0], <MODE>mode)
50 && !register_operand (operands[1], <MODE>mode))
51 operands[1] = force_reg (<MODE>mode, operands[1]);
54 (define_insn "aarch64_simd_dup<mode>"
55 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
57 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
60 dup\\t%0.<Vtype>, %1.<Vetype>[0]
61 dup\\t%0.<Vtype>, %<vw>1"
62 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
65 (define_insn "aarch64_simd_dup<mode>"
66 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
67 (vec_duplicate:VDQF_F16
68 (match_operand:<VEL> 1 "register_operand" "w")))]
70 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
71 [(set_attr "type" "neon_dup<q>")]
74 (define_insn "aarch64_dup_lane<mode>"
75 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
76 (vec_duplicate:VALL_F16
78 (match_operand:VALL_F16 1 "register_operand" "w")
79 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
83 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
84 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
86 [(set_attr "type" "neon_dup<q>")]
89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
90 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
91 (vec_duplicate:VALL_F16_NO_V2Q
93 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
98 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
99 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
101 [(set_attr "type" "neon_dup<q>")]
104 (define_insn "*aarch64_simd_mov<VD:mode>"
105 [(set (match_operand:VD 0 "nonimmediate_operand"
106 "=w, m, m, w, ?r, ?w, ?r, w")
107 (match_operand:VD 1 "general_operand"
108 "m, Dz, w, w, w, r, r, Dn"))]
110 && (register_operand (operands[0], <MODE>mode)
111 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
113 switch (which_alternative)
115 case 0: return "ldr\t%d0, %1";
116 case 1: return "str\txzr, %0";
117 case 2: return "str\t%d1, %0";
118 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
119 case 4: return "umov\t%0, %1.d[0]";
120 case 5: return "fmov\t%d0, %1";
121 case 6: return "mov\t%0, %1";
123 return aarch64_output_simd_mov_immediate (operands[1], 64);
124 default: gcc_unreachable ();
127 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
128 neon_logic<q>, neon_to_gp<q>, f_mcr,\
129 mov_reg, neon_move<q>")]
132 (define_insn "*aarch64_simd_mov<VQ:mode>"
133 [(set (match_operand:VQ 0 "nonimmediate_operand"
134 "=w, Umn, m, w, ?r, ?w, ?r, w")
135 (match_operand:VQ 1 "general_operand"
136 "m, Dz, w, w, w, r, r, Dn"))]
138 && (register_operand (operands[0], <MODE>mode)
139 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
141 switch (which_alternative)
144 return "ldr\t%q0, %1";
146 return "stp\txzr, xzr, %0";
148 return "str\t%q1, %0";
150 return "mov\t%0.<Vbtype>, %1.<Vbtype>";
156 return aarch64_output_simd_mov_immediate (operands[1], 128);
161 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
162 neon_logic<q>, multiple, multiple,\
163 multiple, neon_move<q>")
164 (set_attr "length" "4,4,4,4,8,8,8,4")]
167 ;; When storing lane zero we can use the normal STR and its more permissive
170 (define_insn "aarch64_store_lane0<mode>"
171 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
172 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
173 (parallel [(match_operand 2 "const_int_operand" "n")])))]
175 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
176 "str\\t%<Vetype>1, %0"
177 [(set_attr "type" "neon_store1_1reg<q>")]
180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
181 [(set (match_operand:DREG 0 "register_operand" "=w")
182 (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
183 (set (match_operand:DREG2 2 "register_operand" "=w")
184 (match_operand:DREG2 3 "memory_operand" "m"))]
186 && rtx_equal_p (XEXP (operands[3], 0),
187 plus_constant (Pmode,
188 XEXP (operands[1], 0),
189 GET_MODE_SIZE (<DREG:MODE>mode)))"
191 [(set_attr "type" "neon_ldp")]
194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
195 [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
196 (match_operand:DREG 1 "register_operand" "w"))
197 (set (match_operand:DREG2 2 "memory_operand" "=m")
198 (match_operand:DREG2 3 "register_operand" "w"))]
200 && rtx_equal_p (XEXP (operands[2], 0),
201 plus_constant (Pmode,
202 XEXP (operands[0], 0),
203 GET_MODE_SIZE (<DREG:MODE>mode)))"
205 [(set_attr "type" "neon_stp")]
208 (define_insn "load_pair<VQ:mode><VQ2:mode>"
209 [(set (match_operand:VQ 0 "register_operand" "=w")
210 (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
211 (set (match_operand:VQ2 2 "register_operand" "=w")
212 (match_operand:VQ2 3 "memory_operand" "m"))]
214 && rtx_equal_p (XEXP (operands[3], 0),
215 plus_constant (Pmode,
216 XEXP (operands[1], 0),
217 GET_MODE_SIZE (<VQ:MODE>mode)))"
219 [(set_attr "type" "neon_ldp_q")]
222 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
223 [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
224 (match_operand:VQ 1 "register_operand" "w"))
225 (set (match_operand:VQ2 2 "memory_operand" "=m")
226 (match_operand:VQ2 3 "register_operand" "w"))]
227 "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
228 plus_constant (Pmode,
229 XEXP (operands[0], 0),
230 GET_MODE_SIZE (<VQ:MODE>mode)))"
232 [(set_attr "type" "neon_stp_q")]
237 [(set (match_operand:VQ 0 "register_operand" "")
238 (match_operand:VQ 1 "register_operand" ""))]
239 "TARGET_SIMD && reload_completed
240 && GP_REGNUM_P (REGNO (operands[0]))
241 && GP_REGNUM_P (REGNO (operands[1]))"
244 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
249 [(set (match_operand:VQ 0 "register_operand" "")
250 (match_operand:VQ 1 "register_operand" ""))]
251 "TARGET_SIMD && reload_completed
252 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
253 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
256 aarch64_split_simd_move (operands[0], operands[1]);
260 (define_expand "@aarch64_split_simd_mov<mode>"
261 [(set (match_operand:VQ 0)
262 (match_operand:VQ 1))]
265 rtx dst = operands[0];
266 rtx src = operands[1];
268 if (GP_REGNUM_P (REGNO (src)))
270 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
271 rtx src_high_part = gen_highpart (<VHALF>mode, src);
274 (gen_move_lo_quad_<mode> (dst, src_low_part));
276 (gen_move_hi_quad_<mode> (dst, src_high_part));
281 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
282 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
283 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
284 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
287 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
289 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
295 (define_insn "aarch64_simd_mov_from_<mode>low"
296 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
298 (match_operand:VQ 1 "register_operand" "w")
299 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
300 "TARGET_SIMD && reload_completed"
302 [(set_attr "type" "neon_to_gp<q>")
303 (set_attr "length" "4")
306 (define_insn "aarch64_simd_mov_from_<mode>high"
307 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
309 (match_operand:VQ 1 "register_operand" "w")
310 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
311 "TARGET_SIMD && reload_completed"
313 [(set_attr "type" "neon_to_gp<q>")
314 (set_attr "length" "4")
317 (define_insn "orn<mode>3"
318 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
319 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
320 (match_operand:VDQ_I 2 "register_operand" "w")))]
322 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
323 [(set_attr "type" "neon_logic<q>")]
326 (define_insn "bic<mode>3"
327 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
328 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
329 (match_operand:VDQ_I 2 "register_operand" "w")))]
331 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
332 [(set_attr "type" "neon_logic<q>")]
335 (define_insn "add<mode>3"
336 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
337 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
338 (match_operand:VDQ_I 2 "register_operand" "w")))]
340 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
341 [(set_attr "type" "neon_add<q>")]
344 (define_insn "sub<mode>3"
345 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
346 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
347 (match_operand:VDQ_I 2 "register_operand" "w")))]
349 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
350 [(set_attr "type" "neon_sub<q>")]
353 (define_insn "mul<mode>3"
354 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
355 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
356 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
358 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
359 [(set_attr "type" "neon_mul_<Vetype><q>")]
362 (define_insn "bswap<mode>2"
363 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
364 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
366 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
367 [(set_attr "type" "neon_rev<q>")]
370 (define_insn "aarch64_rbit<mode>"
371 [(set (match_operand:VB 0 "register_operand" "=w")
372 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
375 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
376 [(set_attr "type" "neon_rbit")]
379 (define_expand "ctz<mode>2"
380 [(set (match_operand:VS 0 "register_operand")
381 (ctz:VS (match_operand:VS 1 "register_operand")))]
384 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
385 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
387 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
388 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
393 (define_expand "xorsign<mode>3"
394 [(match_operand:VHSDF 0 "register_operand")
395 (match_operand:VHSDF 1 "register_operand")
396 (match_operand:VHSDF 2 "register_operand")]
400 machine_mode imode = <V_INT_EQUIV>mode;
401 rtx v_bitmask = gen_reg_rtx (imode);
402 rtx op1x = gen_reg_rtx (imode);
403 rtx op2x = gen_reg_rtx (imode);
405 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
406 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
408 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
410 emit_move_insn (v_bitmask,
411 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
412 HOST_WIDE_INT_M1U << bits));
414 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
415 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
416 emit_move_insn (operands[0],
417 lowpart_subreg (<MODE>mode, op1x, imode));
422 ;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
423 ;; fact that their usage need to guarantee that the source vectors are
424 ;; contiguous. It would be wrong to describe the operation without being able
425 ;; to describe the permute that is also required, but even if that is done
426 ;; the permute would have been created as a LOAD_LANES which means the values
427 ;; in the registers are in the wrong order.
428 (define_insn "aarch64_fcadd<rot><mode>"
429 [(set (match_operand:VHSDF 0 "register_operand" "=w")
430 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
431 (match_operand:VHSDF 2 "register_operand" "w")]
434 "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
435 [(set_attr "type" "neon_fcadd")]
438 (define_insn "aarch64_fcmla<rot><mode>"
439 [(set (match_operand:VHSDF 0 "register_operand" "=w")
440 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
441 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
442 (match_operand:VHSDF 3 "register_operand" "w")]
445 "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
446 [(set_attr "type" "neon_fcmla")]
450 (define_insn "aarch64_fcmla_lane<rot><mode>"
451 [(set (match_operand:VHSDF 0 "register_operand" "=w")
452 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
453 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
454 (match_operand:VHSDF 3 "register_operand" "w")
455 (match_operand:SI 4 "const_int_operand" "n")]
459 operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
460 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
462 [(set_attr "type" "neon_fcmla")]
465 (define_insn "aarch64_fcmla_laneq<rot>v4hf"
466 [(set (match_operand:V4HF 0 "register_operand" "=w")
467 (plus:V4HF (match_operand:V4HF 1 "register_operand" "0")
468 (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
469 (match_operand:V8HF 3 "register_operand" "w")
470 (match_operand:SI 4 "const_int_operand" "n")]
474 operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
475 return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
477 [(set_attr "type" "neon_fcmla")]
480 (define_insn "aarch64_fcmlaq_lane<rot><mode>"
481 [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
482 (plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0")
483 (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
484 (match_operand:<VHALF> 3 "register_operand" "w")
485 (match_operand:SI 4 "const_int_operand" "n")]
489 int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
491 = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
492 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
494 [(set_attr "type" "neon_fcmla")]
497 ;; These instructions map to the __builtins for the Dot Product operations.
498 (define_insn "aarch64_<sur>dot<vsi2qi>"
499 [(set (match_operand:VS 0 "register_operand" "=w")
500 (plus:VS (match_operand:VS 1 "register_operand" "0")
501 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
502 (match_operand:<VSI2QI> 3 "register_operand" "w")]
505 "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
506 [(set_attr "type" "neon_dot<q>")]
509 ;; These expands map to the Dot Product optab the vectorizer checks for.
510 ;; The auto-vectorizer expects a dot product builtin that also does an
511 ;; accumulation into the provided register.
512 ;; Given the following pattern
514 ;; for (i=0; i<len; i++) {
520 ;; This can be auto-vectorized to
521 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
523 ;; given enough iterations. However the vectorizer can keep unrolling the loop
524 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
525 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
528 ;; and so the vectorizer provides r, in which the result has to be accumulated.
529 (define_expand "<sur>dot_prod<vsi2qi>"
530 [(set (match_operand:VS 0 "register_operand")
531 (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
532 (match_operand:<VSI2QI> 2 "register_operand")]
534 (match_operand:VS 3 "register_operand")))]
538 gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
540 emit_insn (gen_rtx_SET (operands[0], operands[3]));
544 ;; These instructions map to the __builtins for the Dot Product
545 ;; indexed operations.
546 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
547 [(set (match_operand:VS 0 "register_operand" "=w")
548 (plus:VS (match_operand:VS 1 "register_operand" "0")
549 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
550 (match_operand:V8QI 3 "register_operand" "<h_con>")
551 (match_operand:SI 4 "immediate_operand" "i")]
555 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
556 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
558 [(set_attr "type" "neon_dot<q>")]
561 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
562 [(set (match_operand:VS 0 "register_operand" "=w")
563 (plus:VS (match_operand:VS 1 "register_operand" "0")
564 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
565 (match_operand:V16QI 3 "register_operand" "<h_con>")
566 (match_operand:SI 4 "immediate_operand" "i")]
570 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
571 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
573 [(set_attr "type" "neon_dot<q>")]
576 (define_expand "copysign<mode>3"
577 [(match_operand:VHSDF 0 "register_operand")
578 (match_operand:VHSDF 1 "register_operand")
579 (match_operand:VHSDF 2 "register_operand")]
580 "TARGET_FLOAT && TARGET_SIMD"
582 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
583 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
585 emit_move_insn (v_bitmask,
586 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
587 HOST_WIDE_INT_M1U << bits));
588 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
589 operands[2], operands[1]));
594 (define_insn "*aarch64_mul3_elt<mode>"
595 [(set (match_operand:VMUL 0 "register_operand" "=w")
599 (match_operand:VMUL 1 "register_operand" "<h_con>")
600 (parallel [(match_operand:SI 2 "immediate_operand")])))
601 (match_operand:VMUL 3 "register_operand" "w")))]
604 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
605 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
607 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
610 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
611 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
612 (mult:VMUL_CHANGE_NLANES
613 (vec_duplicate:VMUL_CHANGE_NLANES
615 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
616 (parallel [(match_operand:SI 2 "immediate_operand")])))
617 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
620 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
621 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
623 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
626 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
627 [(set (match_operand:VMUL 0 "register_operand" "=w")
630 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
631 (match_operand:VMUL 2 "register_operand" "w")))]
633 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
634 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
637 (define_insn "@aarch64_rsqrte<mode>"
638 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
639 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
642 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
643 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
645 (define_insn "@aarch64_rsqrts<mode>"
646 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
647 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
648 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
651 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
652 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
654 (define_expand "rsqrt<mode>2"
655 [(set (match_operand:VALLF 0 "register_operand" "=w")
656 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
660 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
664 (define_insn "*aarch64_mul3_elt_to_64v2df"
665 [(set (match_operand:DF 0 "register_operand" "=w")
668 (match_operand:V2DF 1 "register_operand" "w")
669 (parallel [(match_operand:SI 2 "immediate_operand")]))
670 (match_operand:DF 3 "register_operand" "w")))]
673 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
674 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
676 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
679 (define_insn "neg<mode>2"
680 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
681 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
683 "neg\t%0.<Vtype>, %1.<Vtype>"
684 [(set_attr "type" "neon_neg<q>")]
687 (define_insn "abs<mode>2"
688 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
689 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
691 "abs\t%0.<Vtype>, %1.<Vtype>"
692 [(set_attr "type" "neon_abs<q>")]
695 ;; The intrinsic version of integer ABS must not be allowed to
696 ;; combine with any operation with an integerated ABS step, such
698 (define_insn "aarch64_abs<mode>"
699 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
701 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
704 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
705 [(set_attr "type" "neon_abs<q>")]
708 ;; It's tempting to represent SABD as ABS (MINUS op1 op2).
709 ;; This isn't accurate as ABS treats always its input as a signed value.
710 ;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
711 ;; Whereas SABD would return 192 (-64 signed) on the above example.
712 ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
713 (define_insn "*aarch64_<su>abd<mode>_3"
714 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
717 (match_operand:VDQ_BHSI 1 "register_operand" "w")
718 (match_operand:VDQ_BHSI 2 "register_operand" "w"))
723 "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
724 [(set_attr "type" "neon_abd<q>")]
727 (define_insn "aarch64_<sur>abdl2<mode>_3"
728 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
729 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
730 (match_operand:VDQV_S 2 "register_operand" "w")]
733 "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
734 [(set_attr "type" "neon_abd<q>")]
737 (define_insn "aarch64_<sur>abal<mode>_4"
738 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
739 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
740 (match_operand:VDQV_S 2 "register_operand" "w")
741 (match_operand:<VDBLW> 3 "register_operand" "0")]
744 "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
745 [(set_attr "type" "neon_arith_acc<q>")]
748 (define_insn "aarch64_<sur>adalp<mode>_3"
749 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
750 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
751 (match_operand:<VDBLW> 2 "register_operand" "0")]
754 "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
755 [(set_attr "type" "neon_reduc_add<q>")]
758 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
759 ;; inputs in operands 1 and 2. The sequence also has to perform a widening
760 ;; reduction of the difference into a V4SI vector and accumulate that into
761 ;; operand 3 before copying that into the result operand 0.
762 ;; Perform that with a sequence of:
763 ;; UABDL2 tmp.8h, op1.16b, op2.16b
764 ;; UABAL tmp.8h, op1.16b, op2.16b
765 ;; UADALP op3.4s, tmp.8h
766 ;; MOV op0, op3 // should be eliminated in later passes.
767 ;; The signed version just uses the signed variants of the above instructions.
769 (define_expand "<sur>sadv16qi"
770 [(use (match_operand:V4SI 0 "register_operand"))
771 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
772 (use (match_operand:V16QI 2 "register_operand"))] ABAL)
773 (use (match_operand:V4SI 3 "register_operand"))]
776 rtx reduc = gen_reg_rtx (V8HImode);
777 emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
779 emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
780 operands[2], reduc));
781 emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
783 emit_move_insn (operands[0], operands[3]);
788 (define_insn "aba<mode>_3"
789 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
790 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
791 (match_operand:VDQ_BHSI 1 "register_operand" "w")
792 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
793 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
795 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
796 [(set_attr "type" "neon_arith_acc<q>")]
799 (define_insn "fabd<mode>3"
800 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
803 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
804 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
806 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
807 [(set_attr "type" "neon_fp_abd_<stype><q>")]
810 ;; For AND (vector, register) and BIC (vector, immediate)
811 (define_insn "and<mode>3"
812 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
813 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
814 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
817 switch (which_alternative)
820 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
822 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
828 [(set_attr "type" "neon_logic<q>")]
831 ;; For ORR (vector, register) and ORR (vector, immediate)
832 (define_insn "ior<mode>3"
833 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
834 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
835 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
838 switch (which_alternative)
841 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
843 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
849 [(set_attr "type" "neon_logic<q>")]
852 (define_insn "xor<mode>3"
853 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
854 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
855 (match_operand:VDQ_I 2 "register_operand" "w")))]
857 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
858 [(set_attr "type" "neon_logic<q>")]
861 (define_insn "one_cmpl<mode>2"
862 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
863 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
865 "not\t%0.<Vbtype>, %1.<Vbtype>"
866 [(set_attr "type" "neon_logic<q>")]
869 (define_insn "aarch64_simd_vec_set<mode>"
870 [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
872 (vec_duplicate:VALL_F16
873 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
874 (match_operand:VALL_F16 3 "register_operand" "0,0,0")
875 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
878 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
879 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
880 switch (which_alternative)
883 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
885 return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
887 return "ld1\\t{%0.<Vetype>}[%p2], %1";
892 [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
895 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
896 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
898 (vec_duplicate:VALL_F16
900 (match_operand:VALL_F16 3 "register_operand" "w")
902 [(match_operand:SI 4 "immediate_operand" "i")])))
903 (match_operand:VALL_F16 1 "register_operand" "0")
904 (match_operand:SI 2 "immediate_operand" "i")))]
907 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
908 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
909 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
911 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
913 [(set_attr "type" "neon_ins<q>")]
916 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
917 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
918 (vec_merge:VALL_F16_NO_V2Q
919 (vec_duplicate:VALL_F16_NO_V2Q
921 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
923 [(match_operand:SI 4 "immediate_operand" "i")])))
924 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
925 (match_operand:SI 2 "immediate_operand" "i")))]
928 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
929 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
930 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
931 INTVAL (operands[4]));
933 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
935 [(set_attr "type" "neon_ins<q>")]
938 (define_insn "aarch64_simd_lshr<mode>"
939 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
940 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
941 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
943 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
944 [(set_attr "type" "neon_shift_imm<q>")]
947 (define_insn "aarch64_simd_ashr<mode>"
948 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
949 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
950 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
952 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
953 [(set_attr "type" "neon_shift_imm<q>")]
956 (define_insn "aarch64_simd_imm_shl<mode>"
957 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
958 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
959 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
961 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
962 [(set_attr "type" "neon_shift_imm<q>")]
965 (define_insn "aarch64_simd_reg_sshl<mode>"
966 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
967 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
968 (match_operand:VDQ_I 2 "register_operand" "w")))]
970 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
971 [(set_attr "type" "neon_shift_reg<q>")]
974 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
975 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
976 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
977 (match_operand:VDQ_I 2 "register_operand" "w")]
978 UNSPEC_ASHIFT_UNSIGNED))]
980 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
981 [(set_attr "type" "neon_shift_reg<q>")]
984 (define_insn "aarch64_simd_reg_shl<mode>_signed"
985 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
986 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
987 (match_operand:VDQ_I 2 "register_operand" "w")]
988 UNSPEC_ASHIFT_SIGNED))]
990 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
991 [(set_attr "type" "neon_shift_reg<q>")]
994 (define_expand "ashl<mode>3"
995 [(match_operand:VDQ_I 0 "register_operand" "")
996 (match_operand:VDQ_I 1 "register_operand" "")
997 (match_operand:SI 2 "general_operand" "")]
1000 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1003 if (CONST_INT_P (operands[2]))
1005 shift_amount = INTVAL (operands[2]);
1006 if (shift_amount >= 0 && shift_amount < bit_width)
1008 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1010 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
1017 operands[2] = force_reg (SImode, operands[2]);
1020 else if (MEM_P (operands[2]))
1022 operands[2] = force_reg (SImode, operands[2]);
1025 if (REG_P (operands[2]))
1027 rtx tmp = gen_reg_rtx (<MODE>mode);
1028 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
1029 convert_to_mode (<VEL>mode,
1032 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1041 (define_expand "lshr<mode>3"
1042 [(match_operand:VDQ_I 0 "register_operand" "")
1043 (match_operand:VDQ_I 1 "register_operand" "")
1044 (match_operand:SI 2 "general_operand" "")]
1047 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1050 if (CONST_INT_P (operands[2]))
1052 shift_amount = INTVAL (operands[2]);
1053 if (shift_amount > 0 && shift_amount <= bit_width)
1055 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1057 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
1063 operands[2] = force_reg (SImode, operands[2]);
1065 else if (MEM_P (operands[2]))
1067 operands[2] = force_reg (SImode, operands[2]);
1070 if (REG_P (operands[2]))
1072 rtx tmp = gen_reg_rtx (SImode);
1073 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1074 emit_insn (gen_negsi2 (tmp, operands[2]));
1075 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1076 convert_to_mode (<VEL>mode,
1078 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
1088 (define_expand "ashr<mode>3"
1089 [(match_operand:VDQ_I 0 "register_operand" "")
1090 (match_operand:VDQ_I 1 "register_operand" "")
1091 (match_operand:SI 2 "general_operand" "")]
1094 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1097 if (CONST_INT_P (operands[2]))
1099 shift_amount = INTVAL (operands[2]);
1100 if (shift_amount > 0 && shift_amount <= bit_width)
1102 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1104 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1110 operands[2] = force_reg (SImode, operands[2]);
1112 else if (MEM_P (operands[2]))
1114 operands[2] = force_reg (SImode, operands[2]);
1117 if (REG_P (operands[2]))
1119 rtx tmp = gen_reg_rtx (SImode);
1120 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1121 emit_insn (gen_negsi2 (tmp, operands[2]));
1122 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1123 convert_to_mode (<VEL>mode,
1125 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
1135 (define_expand "vashl<mode>3"
1136 [(match_operand:VDQ_I 0 "register_operand" "")
1137 (match_operand:VDQ_I 1 "register_operand" "")
1138 (match_operand:VDQ_I 2 "register_operand" "")]
1141 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1146 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1147 ;; Negating individual lanes most certainly offsets the
1148 ;; gain from vectorization.
1149 (define_expand "vashr<mode>3"
1150 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1151 (match_operand:VDQ_BHSI 1 "register_operand" "")
1152 (match_operand:VDQ_BHSI 2 "register_operand" "")]
1155 rtx neg = gen_reg_rtx (<MODE>mode);
1156 emit (gen_neg<mode>2 (neg, operands[2]));
1157 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1163 (define_expand "aarch64_ashr_simddi"
1164 [(match_operand:DI 0 "register_operand" "=w")
1165 (match_operand:DI 1 "register_operand" "w")
1166 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1169 /* An arithmetic shift right by 64 fills the result with copies of the sign
1170 bit, just like asr by 63 - however the standard pattern does not handle
1172 if (INTVAL (operands[2]) == 64)
1173 operands[2] = GEN_INT (63);
1174 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1179 (define_expand "vlshr<mode>3"
1180 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1181 (match_operand:VDQ_BHSI 1 "register_operand" "")
1182 (match_operand:VDQ_BHSI 2 "register_operand" "")]
1185 rtx neg = gen_reg_rtx (<MODE>mode);
1186 emit (gen_neg<mode>2 (neg, operands[2]));
1187 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1192 (define_expand "aarch64_lshr_simddi"
1193 [(match_operand:DI 0 "register_operand" "=w")
1194 (match_operand:DI 1 "register_operand" "w")
1195 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1198 if (INTVAL (operands[2]) == 64)
1199 emit_move_insn (operands[0], const0_rtx);
1201 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1206 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1207 (define_insn "vec_shr_<mode>"
1208 [(set (match_operand:VD 0 "register_operand" "=w")
1209 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1210 (match_operand:SI 2 "immediate_operand" "i")]
1214 if (BYTES_BIG_ENDIAN)
1215 return "shl %d0, %d1, %2";
1217 return "ushr %d0, %d1, %2";
1219 [(set_attr "type" "neon_shift_imm")]
1222 (define_expand "vec_set<mode>"
1223 [(match_operand:VALL_F16 0 "register_operand" "+w")
1224 (match_operand:<VEL> 1 "register_operand" "w")
1225 (match_operand:SI 2 "immediate_operand" "")]
1228 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1229 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1230 GEN_INT (elem), operands[0]));
1236 (define_insn "aarch64_mla<mode>"
1237 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1238 (plus:VDQ_BHSI (mult:VDQ_BHSI
1239 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1240 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1241 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1243 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1244 [(set_attr "type" "neon_mla_<Vetype><q>")]
1247 (define_insn "*aarch64_mla_elt<mode>"
1248 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1251 (vec_duplicate:VDQHS
1253 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1254 (parallel [(match_operand:SI 2 "immediate_operand")])))
1255 (match_operand:VDQHS 3 "register_operand" "w"))
1256 (match_operand:VDQHS 4 "register_operand" "0")))]
1259 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1260 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1262 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1265 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1266 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1269 (vec_duplicate:VDQHS
1271 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1272 (parallel [(match_operand:SI 2 "immediate_operand")])))
1273 (match_operand:VDQHS 3 "register_operand" "w"))
1274 (match_operand:VDQHS 4 "register_operand" "0")))]
1277 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1278 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1280 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1283 (define_insn "*aarch64_mla_elt_merge<mode>"
1284 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1286 (mult:VDQHS (vec_duplicate:VDQHS
1287 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1288 (match_operand:VDQHS 2 "register_operand" "w"))
1289 (match_operand:VDQHS 3 "register_operand" "0")))]
1291 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1292 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1295 (define_insn "aarch64_mls<mode>"
1296 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1297 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1298 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1299 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1301 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1302 [(set_attr "type" "neon_mla_<Vetype><q>")]
1305 (define_insn "*aarch64_mls_elt<mode>"
1306 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1308 (match_operand:VDQHS 4 "register_operand" "0")
1310 (vec_duplicate:VDQHS
1312 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1313 (parallel [(match_operand:SI 2 "immediate_operand")])))
1314 (match_operand:VDQHS 3 "register_operand" "w"))))]
1317 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1318 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1320 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1323 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1324 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1326 (match_operand:VDQHS 4 "register_operand" "0")
1328 (vec_duplicate:VDQHS
1330 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1331 (parallel [(match_operand:SI 2 "immediate_operand")])))
1332 (match_operand:VDQHS 3 "register_operand" "w"))))]
1335 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1336 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1338 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1341 (define_insn "*aarch64_mls_elt_merge<mode>"
1342 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1344 (match_operand:VDQHS 1 "register_operand" "0")
1345 (mult:VDQHS (vec_duplicate:VDQHS
1346 (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1347 (match_operand:VDQHS 3 "register_operand" "w"))))]
1349 "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1350 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1353 ;; Max/Min operations.
1354 (define_insn "<su><maxmin><mode>3"
1355 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1356 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1357 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1359 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1360 [(set_attr "type" "neon_minmax<q>")]
1363 (define_expand "<su><maxmin>v2di3"
1364 [(set (match_operand:V2DI 0 "register_operand" "")
1365 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1366 (match_operand:V2DI 2 "register_operand" "")))]
1369 enum rtx_code cmp_operator;
1390 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1391 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1392 operands[2], cmp_fmt, operands[1], operands[2]));
1396 ;; Pairwise Integer Max/Min operations.
1397 (define_insn "aarch64_<maxmin_uns>p<mode>"
1398 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1399 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1400 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1403 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1404 [(set_attr "type" "neon_minmax<q>")]
1407 ;; Pairwise FP Max/Min operations.
1408 (define_insn "aarch64_<maxmin_uns>p<mode>"
1409 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1410 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1411 (match_operand:VHSDF 2 "register_operand" "w")]
1414 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1415 [(set_attr "type" "neon_minmax<q>")]
1418 ;; vec_concat gives a new vector with the low elements from operand 1, and
1419 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1420 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1421 ;; What that means, is that the RTL descriptions of the below patterns
1422 ;; need to change depending on endianness.
1424 ;; Move to the low architectural bits of the register.
1425 ;; On little-endian this is { operand, zeroes }
1426 ;; On big-endian this is { zeroes, operand }
1428 (define_insn "move_lo_quad_internal_<mode>"
1429 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1431 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1432 (vec_duplicate:<VHALF> (const_int 0))))]
1433 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1438 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1439 (set_attr "length" "4")
1440 (set_attr "arch" "simd,fp,simd")]
1443 (define_insn "move_lo_quad_internal_<mode>"
1444 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1446 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1448 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1453 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1454 (set_attr "length" "4")
1455 (set_attr "arch" "simd,fp,simd")]
1458 (define_insn "move_lo_quad_internal_be_<mode>"
1459 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1461 (vec_duplicate:<VHALF> (const_int 0))
1462 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1463 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1468 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1469 (set_attr "length" "4")
1470 (set_attr "arch" "simd,fp,simd")]
1473 (define_insn "move_lo_quad_internal_be_<mode>"
1474 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1477 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1478 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1483 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1484 (set_attr "length" "4")
1485 (set_attr "arch" "simd,fp,simd")]
1488 (define_expand "move_lo_quad_<mode>"
1489 [(match_operand:VQ 0 "register_operand")
1490 (match_operand:VQ 1 "register_operand")]
1493 if (BYTES_BIG_ENDIAN)
1494 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1496 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1501 ;; Move operand1 to the high architectural bits of the register, keeping
1502 ;; the low architectural bits of operand2.
1503 ;; For little-endian this is { operand2, operand1 }
1504 ;; For big-endian this is { operand1, operand2 }
1506 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1507 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1511 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1512 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1513 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1515 ins\\t%0.d[1], %1.d[0]
1517 [(set_attr "type" "neon_ins")]
1520 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1521 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1523 (match_operand:<VHALF> 1 "register_operand" "w,r")
1526 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1527 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1529 ins\\t%0.d[1], %1.d[0]
1531 [(set_attr "type" "neon_ins")]
1534 (define_expand "move_hi_quad_<mode>"
1535 [(match_operand:VQ 0 "register_operand" "")
1536 (match_operand:<VHALF> 1 "register_operand" "")]
1539 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1540 if (BYTES_BIG_ENDIAN)
1541 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1544 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1549 ;; Narrowing operations.
1552 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1553 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1554 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1556 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1557 [(set_attr "type" "neon_shift_imm_narrow_q")]
1560 (define_expand "vec_pack_trunc_<mode>"
1561 [(match_operand:<VNARROWD> 0 "register_operand" "")
1562 (match_operand:VDN 1 "register_operand" "")
1563 (match_operand:VDN 2 "register_operand" "")]
1566 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1567 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1568 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1570 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1571 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1572 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1578 (define_insn "vec_pack_trunc_<mode>"
1579 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1580 (vec_concat:<VNARROWQ2>
1581 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1582 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1585 if (BYTES_BIG_ENDIAN)
1586 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1588 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1590 [(set_attr "type" "multiple")
1591 (set_attr "length" "8")]
1594 ;; Widening operations.
1596 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1597 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1598 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1599 (match_operand:VQW 1 "register_operand" "w")
1600 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1603 "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1604 [(set_attr "type" "neon_shift_imm_long")]
1607 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1608 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1609 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1610 (match_operand:VQW 1 "register_operand" "w")
1611 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1614 "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1615 [(set_attr "type" "neon_shift_imm_long")]
1618 (define_expand "vec_unpack<su>_hi_<mode>"
1619 [(match_operand:<VWIDE> 0 "register_operand" "")
1620 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1623 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1624 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1630 (define_expand "vec_unpack<su>_lo_<mode>"
1631 [(match_operand:<VWIDE> 0 "register_operand" "")
1632 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1635 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1636 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1642 ;; Widening arithmetic.
1644 (define_insn "*aarch64_<su>mlal_lo<mode>"
1645 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1648 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1649 (match_operand:VQW 2 "register_operand" "w")
1650 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1651 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1652 (match_operand:VQW 4 "register_operand" "w")
1654 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1656 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1657 [(set_attr "type" "neon_mla_<Vetype>_long")]
1660 (define_insn "*aarch64_<su>mlal_hi<mode>"
1661 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1664 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1665 (match_operand:VQW 2 "register_operand" "w")
1666 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1667 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1668 (match_operand:VQW 4 "register_operand" "w")
1670 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1672 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1673 [(set_attr "type" "neon_mla_<Vetype>_long")]
1676 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1677 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1679 (match_operand:<VWIDE> 1 "register_operand" "0")
1681 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1682 (match_operand:VQW 2 "register_operand" "w")
1683 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1684 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1685 (match_operand:VQW 4 "register_operand" "w")
1688 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1689 [(set_attr "type" "neon_mla_<Vetype>_long")]
1692 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1693 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1695 (match_operand:<VWIDE> 1 "register_operand" "0")
1697 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1698 (match_operand:VQW 2 "register_operand" "w")
1699 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1700 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1701 (match_operand:VQW 4 "register_operand" "w")
1704 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1705 [(set_attr "type" "neon_mla_<Vetype>_long")]
1708 (define_insn "*aarch64_<su>mlal<mode>"
1709 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1713 (match_operand:VD_BHSI 1 "register_operand" "w"))
1715 (match_operand:VD_BHSI 2 "register_operand" "w")))
1716 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1718 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1719 [(set_attr "type" "neon_mla_<Vetype>_long")]
1722 (define_insn "*aarch64_<su>mlsl<mode>"
1723 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1725 (match_operand:<VWIDE> 1 "register_operand" "0")
1728 (match_operand:VD_BHSI 2 "register_operand" "w"))
1730 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1732 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1733 [(set_attr "type" "neon_mla_<Vetype>_long")]
1736 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1737 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1738 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1739 (match_operand:VQW 1 "register_operand" "w")
1740 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1741 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1742 (match_operand:VQW 2 "register_operand" "w")
1745 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1746 [(set_attr "type" "neon_mul_<Vetype>_long")]
1749 (define_expand "vec_widen_<su>mult_lo_<mode>"
1750 [(match_operand:<VWIDE> 0 "register_operand" "")
1751 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1752 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1755 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1756 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1763 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1764 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1765 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1766 (match_operand:VQW 1 "register_operand" "w")
1767 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1768 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1769 (match_operand:VQW 2 "register_operand" "w")
1772 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1773 [(set_attr "type" "neon_mul_<Vetype>_long")]
1776 (define_expand "vec_widen_<su>mult_hi_<mode>"
1777 [(match_operand:<VWIDE> 0 "register_operand" "")
1778 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1779 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1782 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1783 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1791 ;; FP vector operations.
1792 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1793 ;; double-precision (64-bit) floating-point data types and arithmetic as
1794 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1795 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1797 ;; Floating-point operations can raise an exception. Vectorizing such
1798 ;; operations are safe because of reasons explained below.
1800 ;; ARMv8 permits an extension to enable trapped floating-point
1801 ;; exception handling, however this is an optional feature. In the
1802 ;; event of a floating-point exception being raised by vectorised
1804 ;; 1. If trapped floating-point exceptions are available, then a trap
1805 ;; will be taken when any lane raises an enabled exception. A trap
1806 ;; handler may determine which lane raised the exception.
1807 ;; 2. Alternatively a sticky exception flag is set in the
1808 ;; floating-point status register (FPSR). Software may explicitly
1809 ;; test the exception flags, in which case the tests will either
1810 ;; prevent vectorisation, allowing precise identification of the
1811 ;; failing operation, or if tested outside of vectorisable regions
1812 ;; then the specific operation and lane are not of interest.
1814 ;; FP arithmetic operations.
1816 (define_insn "add<mode>3"
1817 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1818 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1819 (match_operand:VHSDF 2 "register_operand" "w")))]
1821 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1822 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1825 (define_insn "sub<mode>3"
1826 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1827 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1828 (match_operand:VHSDF 2 "register_operand" "w")))]
1830 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1831 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1834 (define_insn "mul<mode>3"
1835 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1836 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1837 (match_operand:VHSDF 2 "register_operand" "w")))]
1839 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1840 [(set_attr "type" "neon_fp_mul_<stype><q>")]
1843 (define_expand "div<mode>3"
1844 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1845 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1846 (match_operand:VHSDF 2 "register_operand" "w")))]
1849 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1852 operands[1] = force_reg (<MODE>mode, operands[1]);
1855 (define_insn "*div<mode>3"
1856 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1857 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1858 (match_operand:VHSDF 2 "register_operand" "w")))]
1860 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1861 [(set_attr "type" "neon_fp_div_<stype><q>")]
1864 (define_insn "neg<mode>2"
1865 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1866 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1868 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1869 [(set_attr "type" "neon_fp_neg_<stype><q>")]
1872 (define_insn "abs<mode>2"
1873 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1874 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1876 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1877 [(set_attr "type" "neon_fp_abs_<stype><q>")]
1880 (define_insn "fma<mode>4"
1881 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1882 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1883 (match_operand:VHSDF 2 "register_operand" "w")
1884 (match_operand:VHSDF 3 "register_operand" "0")))]
1886 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1887 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1890 (define_insn "*aarch64_fma4_elt<mode>"
1891 [(set (match_operand:VDQF 0 "register_operand" "=w")
1895 (match_operand:VDQF 1 "register_operand" "<h_con>")
1896 (parallel [(match_operand:SI 2 "immediate_operand")])))
1897 (match_operand:VDQF 3 "register_operand" "w")
1898 (match_operand:VDQF 4 "register_operand" "0")))]
1901 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1902 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1904 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1907 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1908 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1910 (vec_duplicate:VDQSF
1912 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1913 (parallel [(match_operand:SI 2 "immediate_operand")])))
1914 (match_operand:VDQSF 3 "register_operand" "w")
1915 (match_operand:VDQSF 4 "register_operand" "0")))]
1918 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1919 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1921 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1924 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1925 [(set (match_operand:VMUL 0 "register_operand" "=w")
1928 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1929 (match_operand:VMUL 2 "register_operand" "w")
1930 (match_operand:VMUL 3 "register_operand" "0")))]
1932 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1933 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1936 (define_insn "*aarch64_fma4_elt_to_64v2df"
1937 [(set (match_operand:DF 0 "register_operand" "=w")
1940 (match_operand:V2DF 1 "register_operand" "w")
1941 (parallel [(match_operand:SI 2 "immediate_operand")]))
1942 (match_operand:DF 3 "register_operand" "w")
1943 (match_operand:DF 4 "register_operand" "0")))]
1946 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1947 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1949 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1952 (define_insn "fnma<mode>4"
1953 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1955 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1956 (match_operand:VHSDF 2 "register_operand" "w")
1957 (match_operand:VHSDF 3 "register_operand" "0")))]
1959 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1960 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1963 (define_insn "*aarch64_fnma4_elt<mode>"
1964 [(set (match_operand:VDQF 0 "register_operand" "=w")
1967 (match_operand:VDQF 3 "register_operand" "w"))
1970 (match_operand:VDQF 1 "register_operand" "<h_con>")
1971 (parallel [(match_operand:SI 2 "immediate_operand")])))
1972 (match_operand:VDQF 4 "register_operand" "0")))]
1975 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1976 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1978 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1981 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1982 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1985 (match_operand:VDQSF 3 "register_operand" "w"))
1986 (vec_duplicate:VDQSF
1988 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1989 (parallel [(match_operand:SI 2 "immediate_operand")])))
1990 (match_operand:VDQSF 4 "register_operand" "0")))]
1993 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1994 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1996 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1999 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
2000 [(set (match_operand:VMUL 0 "register_operand" "=w")
2003 (match_operand:VMUL 2 "register_operand" "w"))
2005 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2006 (match_operand:VMUL 3 "register_operand" "0")))]
2008 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2009 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2012 (define_insn "*aarch64_fnma4_elt_to_64v2df"
2013 [(set (match_operand:DF 0 "register_operand" "=w")
2016 (match_operand:V2DF 1 "register_operand" "w")
2017 (parallel [(match_operand:SI 2 "immediate_operand")]))
2019 (match_operand:DF 3 "register_operand" "w"))
2020 (match_operand:DF 4 "register_operand" "0")))]
2023 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2024 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
2026 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2029 ;; Vector versions of the floating-point frint patterns.
2030 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
2031 (define_insn "<frint_pattern><mode>2"
2032 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2033 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2036 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
2037 [(set_attr "type" "neon_fp_round_<stype><q>")]
2040 ;; Vector versions of the fcvt standard patterns.
2041 ;; Expands to lbtrunc, lround, lceil, lfloor
2042 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
2043 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2044 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2045 [(match_operand:VHSDF 1 "register_operand" "w")]
2048 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
2049 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
2052 ;; HF Scalar variants of related SIMD instructions.
2053 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
2054 [(set (match_operand:HI 0 "register_operand" "=w")
2055 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
2057 "TARGET_SIMD_F16INST"
2058 "fcvt<frint_suffix><su>\t%h0, %h1"
2059 [(set_attr "type" "neon_fp_to_int_s")]
2062 (define_insn "<optab>_trunchfhi2"
2063 [(set (match_operand:HI 0 "register_operand" "=w")
2064 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
2065 "TARGET_SIMD_F16INST"
2066 "fcvtz<su>\t%h0, %h1"
2067 [(set_attr "type" "neon_fp_to_int_s")]
2070 (define_insn "<optab>hihf2"
2071 [(set (match_operand:HF 0 "register_operand" "=w")
2072 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
2073 "TARGET_SIMD_F16INST"
2074 "<su_optab>cvtf\t%h0, %h1"
2075 [(set_attr "type" "neon_int_to_fp_s")]
2078 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
2079 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2080 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2082 (match_operand:VDQF 1 "register_operand" "w")
2083 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
2086 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
2087 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
2089 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
2091 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
2092 output_asm_insn (buf, operands);
2095 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
2098 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
2099 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2100 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2101 [(match_operand:VHSDF 1 "register_operand")]
2106 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2107 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2108 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2109 [(match_operand:VHSDF 1 "register_operand")]
2114 (define_expand "ftrunc<VHSDF:mode>2"
2115 [(set (match_operand:VHSDF 0 "register_operand")
2116 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2121 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2122 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2124 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2126 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2127 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2130 ;; Conversions between vectors of floats and doubles.
2131 ;; Contains a mix of patterns to match standard pattern names
2132 ;; and those for intrinsics.
2134 ;; Float widening operations.
2136 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2137 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2138 (float_extend:<VWIDE> (vec_select:<VHALF>
2139 (match_operand:VQ_HSF 1 "register_operand" "w")
2140 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2143 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2144 [(set_attr "type" "neon_fp_cvt_widen_s")]
2147 ;; Convert between fixed-point and floating-point (vector modes)
2149 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2150 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2151 (unspec:<VHSDF:FCVT_TARGET>
2152 [(match_operand:VHSDF 1 "register_operand" "w")
2153 (match_operand:SI 2 "immediate_operand" "i")]
2156 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2157 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2160 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2161 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2162 (unspec:<VDQ_HSDI:FCVT_TARGET>
2163 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2164 (match_operand:SI 2 "immediate_operand" "i")]
2167 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2168 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2171 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2172 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2173 ;; the meaning of HI and LO changes depending on the target endianness.
2174 ;; While elsewhere we map the higher numbered elements of a vector to
2175 ;; the lower architectural lanes of the vector, for these patterns we want
2176 ;; to always treat "hi" as referring to the higher architectural lanes.
2177 ;; Consequently, while the patterns below look inconsistent with our
2178 ;; other big-endian patterns their behavior is as required.
2180 (define_expand "vec_unpacks_lo_<mode>"
2181 [(match_operand:<VWIDE> 0 "register_operand" "")
2182 (match_operand:VQ_HSF 1 "register_operand" "")]
2185 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2186 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2192 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2193 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2194 (float_extend:<VWIDE> (vec_select:<VHALF>
2195 (match_operand:VQ_HSF 1 "register_operand" "w")
2196 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2199 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2200 [(set_attr "type" "neon_fp_cvt_widen_s")]
2203 (define_expand "vec_unpacks_hi_<mode>"
2204 [(match_operand:<VWIDE> 0 "register_operand" "")
2205 (match_operand:VQ_HSF 1 "register_operand" "")]
2208 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2209 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2214 (define_insn "aarch64_float_extend_lo_<Vwide>"
2215 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2216 (float_extend:<VWIDE>
2217 (match_operand:VDF 1 "register_operand" "w")))]
2219 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2220 [(set_attr "type" "neon_fp_cvt_widen_s")]
2223 ;; Float narrowing operations.
2225 (define_insn "aarch64_float_truncate_lo_<mode>"
2226 [(set (match_operand:VDF 0 "register_operand" "=w")
2228 (match_operand:<VWIDE> 1 "register_operand" "w")))]
2230 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2231 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2234 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2235 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2237 (match_operand:VDF 1 "register_operand" "0")
2239 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2240 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2241 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2242 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2245 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2246 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2249 (match_operand:<VWIDE> 2 "register_operand" "w"))
2250 (match_operand:VDF 1 "register_operand" "0")))]
2251 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2252 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2253 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2256 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2257 [(match_operand:<VDBL> 0 "register_operand" "=w")
2258 (match_operand:VDF 1 "register_operand" "0")
2259 (match_operand:<VWIDE> 2 "register_operand" "w")]
2262 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2263 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2264 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2265 emit_insn (gen (operands[0], operands[1], operands[2]));
2270 (define_expand "vec_pack_trunc_v2df"
2271 [(set (match_operand:V4SF 0 "register_operand")
2273 (float_truncate:V2SF
2274 (match_operand:V2DF 1 "register_operand"))
2275 (float_truncate:V2SF
2276 (match_operand:V2DF 2 "register_operand"))
2280 rtx tmp = gen_reg_rtx (V2SFmode);
2281 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2282 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2284 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2285 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2286 tmp, operands[hi]));
2291 (define_expand "vec_pack_trunc_df"
2292 [(set (match_operand:V2SF 0 "register_operand")
2295 (match_operand:DF 1 "register_operand"))
2297 (match_operand:DF 2 "register_operand"))
2301 rtx tmp = gen_reg_rtx (V2SFmode);
2302 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2303 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2305 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2306 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2307 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2313 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2315 ;; a = (b < c) ? b : c;
2316 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
2317 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
2320 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2321 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2322 ;; operand will be returned when both operands are zero (i.e. they may not
2323 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2324 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2327 (define_insn "<su><maxmin><mode>3"
2328 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2329 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2330 (match_operand:VHSDF 2 "register_operand" "w")))]
2332 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2333 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2336 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2337 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2338 ;; which implement the IEEE fmax ()/fmin () functions.
2339 (define_insn "<maxmin_uns><mode>3"
2340 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2341 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2342 (match_operand:VHSDF 2 "register_operand" "w")]
2345 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2346 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2349 ;; 'across lanes' add.
2351 (define_expand "reduc_plus_scal_<mode>"
2352 [(match_operand:<VEL> 0 "register_operand" "=w")
2353 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2357 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2358 rtx scratch = gen_reg_rtx (<MODE>mode);
2359 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2360 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2365 (define_insn "aarch64_faddp<mode>"
2366 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2367 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2368 (match_operand:VHSDF 2 "register_operand" "w")]
2371 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2372 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2375 (define_insn "aarch64_reduc_plus_internal<mode>"
2376 [(set (match_operand:VDQV 0 "register_operand" "=w")
2377 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2380 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2381 [(set_attr "type" "neon_reduc_add<q>")]
2384 (define_insn "aarch64_reduc_plus_internalv2si"
2385 [(set (match_operand:V2SI 0 "register_operand" "=w")
2386 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2389 "addp\\t%0.2s, %1.2s, %1.2s"
2390 [(set_attr "type" "neon_reduc_add")]
2393 (define_insn "reduc_plus_scal_<mode>"
2394 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2395 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2398 "faddp\\t%<Vetype>0, %1.<Vtype>"
2399 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2402 (define_expand "reduc_plus_scal_v4sf"
2403 [(set (match_operand:SF 0 "register_operand")
2404 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2408 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2409 rtx scratch = gen_reg_rtx (V4SFmode);
2410 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2411 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2412 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2416 (define_insn "clrsb<mode>2"
2417 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2418 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2420 "cls\\t%0.<Vtype>, %1.<Vtype>"
2421 [(set_attr "type" "neon_cls<q>")]
2424 (define_insn "clz<mode>2"
2425 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2426 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2428 "clz\\t%0.<Vtype>, %1.<Vtype>"
2429 [(set_attr "type" "neon_cls<q>")]
2432 (define_insn "popcount<mode>2"
2433 [(set (match_operand:VB 0 "register_operand" "=w")
2434 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2436 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2437 [(set_attr "type" "neon_cnt<q>")]
2440 ;; 'across lanes' max and min ops.
2442 ;; Template for outputting a scalar, so we can create __builtins which can be
2443 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
2444 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2445 [(match_operand:<VEL> 0 "register_operand")
2446 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2450 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2451 rtx scratch = gen_reg_rtx (<MODE>mode);
2452 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2454 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2459 ;; Likewise for integer cases, signed and unsigned.
2460 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2461 [(match_operand:<VEL> 0 "register_operand")
2462 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2466 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2467 rtx scratch = gen_reg_rtx (<MODE>mode);
2468 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2470 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2475 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2476 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2477 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2480 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2481 [(set_attr "type" "neon_reduc_minmax<q>")]
2484 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2485 [(set (match_operand:V2SI 0 "register_operand" "=w")
2486 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2489 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2490 [(set_attr "type" "neon_reduc_minmax")]
2493 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2494 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2495 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2498 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2499 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2502 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2504 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2507 ;; Thus our BSL is of the form:
2508 ;; op0 = bsl (mask, op2, op3)
2509 ;; We can use any of:
2512 ;; bsl mask, op1, op2
2513 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2514 ;; bit op0, op2, mask
2515 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2516 ;; bif op0, op1, mask
2518 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2519 ;; Some forms of straight-line code may generate the equivalent form
2520 ;; in *aarch64_simd_bsl<mode>_alt.
2522 (define_insn "aarch64_simd_bsl<mode>_internal"
2523 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2527 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2528 (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2529 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2530 (match_dup:<V_INT_EQUIV> 3)
2534 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2535 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2536 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2537 [(set_attr "type" "neon_bsl<q>")]
2540 ;; We need this form in addition to the above pattern to match the case
2541 ;; when combine tries merging three insns such that the second operand of
2542 ;; the outer XOR matches the second operand of the inner XOR rather than
2543 ;; the first. The two are equivalent but since recog doesn't try all
2544 ;; permutations of commutative operations, we have to have a separate pattern.
2546 (define_insn "*aarch64_simd_bsl<mode>_alt"
2547 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2551 (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2552 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2553 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2554 (match_dup:<V_INT_EQUIV> 2)))]
2557 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2558 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2559 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2560 [(set_attr "type" "neon_bsl<q>")]
2563 ;; DImode is special, we want to avoid computing operations which are
2564 ;; more naturally computed in general purpose registers in the vector
2565 ;; registers. If we do that, we need to move all three operands from general
2566 ;; purpose registers to vector registers, then back again. However, we
2567 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2568 ;; optimizations based on the component operations of a BSL.
2570 ;; That means we need a splitter back to the individual operations, if they
2571 ;; would be better calculated on the integer side.
2573 (define_insn_and_split "aarch64_simd_bsldi_internal"
2574 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2578 (match_operand:DI 3 "register_operand" "w,0,w,r")
2579 (match_operand:DI 2 "register_operand" "w,w,0,r"))
2580 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2585 bsl\\t%0.8b, %2.8b, %3.8b
2586 bit\\t%0.8b, %2.8b, %1.8b
2587 bif\\t%0.8b, %3.8b, %1.8b
2589 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2590 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2592 /* Split back to individual operations. If we're before reload, and
2593 able to create a temporary register, do so. If we're after reload,
2594 we've got an early-clobber destination register, so use that.
2595 Otherwise, we can't create pseudos and we can't yet guarantee that
2596 operands[0] is safe to write, so FAIL to split. */
2599 if (reload_completed)
2600 scratch = operands[0];
2601 else if (can_create_pseudo_p ())
2602 scratch = gen_reg_rtx (DImode);
2606 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2607 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2608 emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2611 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2612 (set_attr "length" "4,4,4,12")]
2615 (define_insn_and_split "aarch64_simd_bsldi_alt"
2616 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2620 (match_operand:DI 3 "register_operand" "w,w,0,r")
2621 (match_operand:DI 2 "register_operand" "w,0,w,r"))
2622 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2627 bsl\\t%0.8b, %3.8b, %2.8b
2628 bit\\t%0.8b, %3.8b, %1.8b
2629 bif\\t%0.8b, %2.8b, %1.8b
2631 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2632 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2634 /* Split back to individual operations. If we're before reload, and
2635 able to create a temporary register, do so. If we're after reload,
2636 we've got an early-clobber destination register, so use that.
2637 Otherwise, we can't create pseudos and we can't yet guarantee that
2638 operands[0] is safe to write, so FAIL to split. */
2641 if (reload_completed)
2642 scratch = operands[0];
2643 else if (can_create_pseudo_p ())
2644 scratch = gen_reg_rtx (DImode);
2648 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2649 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2650 emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2653 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2654 (set_attr "length" "4,4,4,12")]
2657 (define_expand "aarch64_simd_bsl<mode>"
2658 [(match_operand:VALLDIF 0 "register_operand")
2659 (match_operand:<V_INT_EQUIV> 1 "register_operand")
2660 (match_operand:VALLDIF 2 "register_operand")
2661 (match_operand:VALLDIF 3 "register_operand")]
2664 /* We can't alias operands together if they have different modes. */
2665 rtx tmp = operands[0];
2666 if (FLOAT_MODE_P (<MODE>mode))
2668 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2669 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2670 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2672 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2673 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2677 if (tmp != operands[0])
2678 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2683 (define_expand "vcond_mask_<mode><v_int_equiv>"
2684 [(match_operand:VALLDI 0 "register_operand")
2685 (match_operand:VALLDI 1 "nonmemory_operand")
2686 (match_operand:VALLDI 2 "nonmemory_operand")
2687 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2690 /* If we have (a = (P) ? -1 : 0);
2691 Then we can simply move the generated mask (result must be int). */
2692 if (operands[1] == CONSTM1_RTX (<MODE>mode)
2693 && operands[2] == CONST0_RTX (<MODE>mode))
2694 emit_move_insn (operands[0], operands[3]);
2695 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
2696 else if (operands[1] == CONST0_RTX (<MODE>mode)
2697 && operands[2] == CONSTM1_RTX (<MODE>mode))
2698 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2701 if (!REG_P (operands[1]))
2702 operands[1] = force_reg (<MODE>mode, operands[1]);
2703 if (!REG_P (operands[2]))
2704 operands[2] = force_reg (<MODE>mode, operands[2]);
2705 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2706 operands[1], operands[2]));
2712 ;; Patterns comparing two vectors to produce a mask.
2714 (define_expand "vec_cmp<mode><mode>"
2715 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2716 (match_operator 1 "comparison_operator"
2717 [(match_operand:VSDQ_I_DI 2 "register_operand")
2718 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2721 rtx mask = operands[0];
2722 enum rtx_code code = GET_CODE (operands[1]);
2732 if (operands[3] == CONST0_RTX (<MODE>mode))
2737 if (!REG_P (operands[3]))
2738 operands[3] = force_reg (<MODE>mode, operands[3]);
2746 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2750 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2754 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2758 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2762 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2766 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2770 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2774 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2778 /* Handle NE as !EQ. */
2779 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2780 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2784 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2794 (define_expand "vec_cmp<mode><v_int_equiv>"
2795 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2796 (match_operator 1 "comparison_operator"
2797 [(match_operand:VDQF 2 "register_operand")
2798 (match_operand:VDQF 3 "nonmemory_operand")]))]
2801 int use_zero_form = 0;
2802 enum rtx_code code = GET_CODE (operands[1]);
2803 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2805 rtx (*comparison) (rtx, rtx, rtx) = NULL;
2814 if (operands[3] == CONST0_RTX (<MODE>mode))
2821 if (!REG_P (operands[3]))
2822 operands[3] = force_reg (<MODE>mode, operands[3]);
2832 comparison = gen_aarch64_cmlt<mode>;
2837 std::swap (operands[2], operands[3]);
2841 comparison = gen_aarch64_cmgt<mode>;
2846 comparison = gen_aarch64_cmle<mode>;
2851 std::swap (operands[2], operands[3]);
2855 comparison = gen_aarch64_cmge<mode>;
2859 comparison = gen_aarch64_cmeq<mode>;
2877 /* All of the above must not raise any FP exceptions. Thus we first
2878 check each operand for NaNs and force any elements containing NaN to
2879 zero before using them in the compare.
2880 Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2881 (cm<cc> (isnan (a) ? 0.0 : a,
2882 isnan (b) ? 0.0 : b))
2883 We use the following transformations for doing the comparisions:
2887 a UNLT b -> b GT a. */
2889 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2890 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2891 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2892 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2893 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2894 emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2895 emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2896 lowpart_subreg (<V_INT_EQUIV>mode,
2899 emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2900 lowpart_subreg (<V_INT_EQUIV>mode,
2903 gcc_assert (comparison != NULL);
2904 emit_insn (comparison (operands[0],
2905 lowpart_subreg (<MODE>mode,
2906 tmp0, <V_INT_EQUIV>mode),
2907 lowpart_subreg (<MODE>mode,
2908 tmp1, <V_INT_EQUIV>mode)));
2909 emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2919 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2920 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2926 a NE b -> ~(a EQ b) */
2927 gcc_assert (comparison != NULL);
2928 emit_insn (comparison (operands[0], operands[2], operands[3]));
2930 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2934 /* LTGT is not guranteed to not generate a FP exception. So let's
2935 go the faster way : ((a > b) || (b > a)). */
2936 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2937 operands[2], operands[3]));
2938 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2939 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2945 /* cmeq (a, a) & cmeq (b, b). */
2946 emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2947 operands[2], operands[2]));
2948 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2949 emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2951 if (code == UNORDERED)
2952 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2953 else if (code == UNEQ)
2955 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
2956 emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
2967 (define_expand "vec_cmpu<mode><mode>"
2968 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2969 (match_operator 1 "comparison_operator"
2970 [(match_operand:VSDQ_I_DI 2 "register_operand")
2971 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2974 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2975 operands[2], operands[3]));
2979 (define_expand "vcond<mode><mode>"
2980 [(set (match_operand:VALLDI 0 "register_operand")
2981 (if_then_else:VALLDI
2982 (match_operator 3 "comparison_operator"
2983 [(match_operand:VALLDI 4 "register_operand")
2984 (match_operand:VALLDI 5 "nonmemory_operand")])
2985 (match_operand:VALLDI 1 "nonmemory_operand")
2986 (match_operand:VALLDI 2 "nonmemory_operand")))]
2989 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2990 enum rtx_code code = GET_CODE (operands[3]);
2992 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2993 it as well as switch operands 1/2 in order to avoid the additional
2997 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2998 operands[4], operands[5]);
2999 std::swap (operands[1], operands[2]);
3001 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3002 operands[4], operands[5]));
3003 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3004 operands[2], mask));
3009 (define_expand "vcond<v_cmp_mixed><mode>"
3010 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
3011 (if_then_else:<V_cmp_mixed>
3012 (match_operator 3 "comparison_operator"
3013 [(match_operand:VDQF_COND 4 "register_operand")
3014 (match_operand:VDQF_COND 5 "nonmemory_operand")])
3015 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
3016 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
3019 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3020 enum rtx_code code = GET_CODE (operands[3]);
3022 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3023 it as well as switch operands 1/2 in order to avoid the additional
3027 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3028 operands[4], operands[5]);
3029 std::swap (operands[1], operands[2]);
3031 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3032 operands[4], operands[5]));
3033 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
3034 operands[0], operands[1],
3035 operands[2], mask));
3040 (define_expand "vcondu<mode><mode>"
3041 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3042 (if_then_else:VSDQ_I_DI
3043 (match_operator 3 "comparison_operator"
3044 [(match_operand:VSDQ_I_DI 4 "register_operand")
3045 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
3046 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
3047 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
3050 rtx mask = gen_reg_rtx (<MODE>mode);
3051 enum rtx_code code = GET_CODE (operands[3]);
3053 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3054 it as well as switch operands 1/2 in order to avoid the additional
3058 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3059 operands[4], operands[5]);
3060 std::swap (operands[1], operands[2]);
3062 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
3063 operands[4], operands[5]));
3064 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3065 operands[2], mask));
3069 (define_expand "vcondu<mode><v_cmp_mixed>"
3070 [(set (match_operand:VDQF 0 "register_operand")
3072 (match_operator 3 "comparison_operator"
3073 [(match_operand:<V_cmp_mixed> 4 "register_operand")
3074 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
3075 (match_operand:VDQF 1 "nonmemory_operand")
3076 (match_operand:VDQF 2 "nonmemory_operand")))]
3079 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3080 enum rtx_code code = GET_CODE (operands[3]);
3082 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3083 it as well as switch operands 1/2 in order to avoid the additional
3087 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3088 operands[4], operands[5]);
3089 std::swap (operands[1], operands[2]);
3091 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
3093 operands[4], operands[5]));
3094 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3095 operands[2], mask));
3099 ;; Patterns for AArch64 SIMD Intrinsics.
3101 ;; Lane extraction with sign extension to general purpose register.
3102 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3103 [(set (match_operand:GPI 0 "register_operand" "=r")
3106 (match_operand:VDQQH 1 "register_operand" "w")
3107 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3110 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3111 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3113 [(set_attr "type" "neon_to_gp<q>")]
3116 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
3117 [(set (match_operand:GPI 0 "register_operand" "=r")
3120 (match_operand:VDQQH 1 "register_operand" "w")
3121 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3124 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3125 INTVAL (operands[2]));
3126 return "umov\\t%w0, %1.<Vetype>[%2]";
3128 [(set_attr "type" "neon_to_gp<q>")]
3131 ;; Lane extraction of a value, neither sign nor zero extension
3132 ;; is guaranteed so upper bits should be considered undefined.
3133 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3134 (define_insn "aarch64_get_lane<mode>"
3135 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3137 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3138 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3141 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3142 switch (which_alternative)
3145 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3147 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3149 return "st1\\t{%1.<Vetype>}[%2], %0";
3154 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3157 (define_insn "load_pair_lanes<mode>"
3158 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3160 (match_operand:VDC 1 "memory_operand" "Utq")
3161 (match_operand:VDC 2 "memory_operand" "m")))]
3162 "TARGET_SIMD && !STRICT_ALIGNMENT
3163 && rtx_equal_p (XEXP (operands[2], 0),
3164 plus_constant (Pmode,
3165 XEXP (operands[1], 0),
3166 GET_MODE_SIZE (<MODE>mode)))"
3168 [(set_attr "type" "neon_load1_1reg_q")]
3171 (define_insn "store_pair_lanes<mode>"
3172 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
3174 (match_operand:VDC 1 "register_operand" "w, r")
3175 (match_operand:VDC 2 "register_operand" "w, r")))]
3179 stp\\t%x1, %x2, %y0"
3180 [(set_attr "type" "neon_stp, store_16")]
3183 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3186 (define_insn "*aarch64_combinez<mode>"
3187 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3189 (match_operand:VDC 1 "general_operand" "w,?r,m")
3190 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3191 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3196 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3197 (set_attr "arch" "simd,fp,simd")]
3200 (define_insn "*aarch64_combinez_be<mode>"
3201 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3203 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3204 (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3205 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3210 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3211 (set_attr "arch" "simd,fp,simd")]
3214 (define_expand "aarch64_combine<mode>"
3215 [(match_operand:<VDBL> 0 "register_operand")
3216 (match_operand:VDC 1 "register_operand")
3217 (match_operand:VDC 2 "register_operand")]
3220 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3226 (define_expand "@aarch64_simd_combine<mode>"
3227 [(match_operand:<VDBL> 0 "register_operand")
3228 (match_operand:VDC 1 "register_operand")
3229 (match_operand:VDC 2 "register_operand")]
3232 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3233 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3236 [(set_attr "type" "multiple")]
3239 ;; <su><addsub>l<q>.
3241 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3242 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3243 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3244 (match_operand:VQW 1 "register_operand" "w")
3245 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3246 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3247 (match_operand:VQW 2 "register_operand" "w")
3250 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3251 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3254 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3255 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3256 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3257 (match_operand:VQW 1 "register_operand" "w")
3258 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3259 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3260 (match_operand:VQW 2 "register_operand" "w")
3263 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3264 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3268 (define_expand "aarch64_saddl2<mode>"
3269 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3270 (match_operand:VQW 1 "register_operand" "w")
3271 (match_operand:VQW 2 "register_operand" "w")]
3274 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3275 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3280 (define_expand "aarch64_uaddl2<mode>"
3281 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3282 (match_operand:VQW 1 "register_operand" "w")
3283 (match_operand:VQW 2 "register_operand" "w")]
3286 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3287 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3292 (define_expand "aarch64_ssubl2<mode>"
3293 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3294 (match_operand:VQW 1 "register_operand" "w")
3295 (match_operand:VQW 2 "register_operand" "w")]
3298 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3299 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3304 (define_expand "aarch64_usubl2<mode>"
3305 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3306 (match_operand:VQW 1 "register_operand" "w")
3307 (match_operand:VQW 2 "register_operand" "w")]
3310 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3311 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3316 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3317 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3318 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3319 (match_operand:VD_BHSI 1 "register_operand" "w"))
3321 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3323 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3324 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3327 ;; <su><addsub>w<q>.
3329 (define_expand "widen_ssum<mode>3"
3330 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3331 (plus:<VDBLW> (sign_extend:<VDBLW>
3332 (match_operand:VQW 1 "register_operand" ""))
3333 (match_operand:<VDBLW> 2 "register_operand" "")))]
3336 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3337 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3339 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3341 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3346 (define_expand "widen_ssum<mode>3"
3347 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3348 (plus:<VWIDE> (sign_extend:<VWIDE>
3349 (match_operand:VD_BHSI 1 "register_operand" ""))
3350 (match_operand:<VWIDE> 2 "register_operand" "")))]
3353 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3357 (define_expand "widen_usum<mode>3"
3358 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3359 (plus:<VDBLW> (zero_extend:<VDBLW>
3360 (match_operand:VQW 1 "register_operand" ""))
3361 (match_operand:<VDBLW> 2 "register_operand" "")))]
3364 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3365 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3367 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3369 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3374 (define_expand "widen_usum<mode>3"
3375 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3376 (plus:<VWIDE> (zero_extend:<VWIDE>
3377 (match_operand:VD_BHSI 1 "register_operand" ""))
3378 (match_operand:<VWIDE> 2 "register_operand" "")))]
3381 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3385 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
3386 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3387 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3389 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3391 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3392 [(set_attr "type" "neon_sub_widen")]
3395 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
3396 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3397 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3400 (match_operand:VQW 2 "register_operand" "w")
3401 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3403 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3404 [(set_attr "type" "neon_sub_widen")]
3407 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
3408 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3409 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3412 (match_operand:VQW 2 "register_operand" "w")
3413 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3415 "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3416 [(set_attr "type" "neon_sub_widen")]
3419 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
3420 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3422 (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
3423 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3425 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3426 [(set_attr "type" "neon_add_widen")]
3429 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
3430 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3434 (match_operand:VQW 2 "register_operand" "w")
3435 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3436 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3438 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3439 [(set_attr "type" "neon_add_widen")]
3442 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
3443 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3447 (match_operand:VQW 2 "register_operand" "w")
3448 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3449 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3451 "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3452 [(set_attr "type" "neon_add_widen")]
3455 (define_expand "aarch64_saddw2<mode>"
3456 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3457 (match_operand:<VWIDE> 1 "register_operand" "w")
3458 (match_operand:VQW 2 "register_operand" "w")]
3461 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3462 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3467 (define_expand "aarch64_uaddw2<mode>"
3468 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3469 (match_operand:<VWIDE> 1 "register_operand" "w")
3470 (match_operand:VQW 2 "register_operand" "w")]
3473 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3474 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3480 (define_expand "aarch64_ssubw2<mode>"
3481 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3482 (match_operand:<VWIDE> 1 "register_operand" "w")
3483 (match_operand:VQW 2 "register_operand" "w")]
3486 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3487 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3492 (define_expand "aarch64_usubw2<mode>"
3493 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3494 (match_operand:<VWIDE> 1 "register_operand" "w")
3495 (match_operand:VQW 2 "register_operand" "w")]
3498 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3499 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3504 ;; <su><r>h<addsub>.
3506 (define_expand "<u>avg<mode>3_floor"
3507 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3508 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3509 (match_operand:VDQ_BHSI 2 "register_operand")]
3514 (define_expand "<u>avg<mode>3_ceil"
3515 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3516 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3517 (match_operand:VDQ_BHSI 2 "register_operand")]
3522 (define_insn "aarch64_<sur>h<addsub><mode>"
3523 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3524 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3525 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3528 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3529 [(set_attr "type" "neon_<addsub>_halve<q>")]
3532 ;; <r><addsub>hn<q>.
3534 (define_insn "aarch64_<sur><addsub>hn<mode>"
3535 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3536 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3537 (match_operand:VQN 2 "register_operand" "w")]
3540 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3541 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3544 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3545 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3546 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3547 (match_operand:VQN 2 "register_operand" "w")
3548 (match_operand:VQN 3 "register_operand" "w")]
3551 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3552 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3557 (define_insn "aarch64_pmul<mode>"
3558 [(set (match_operand:VB 0 "register_operand" "=w")
3559 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3560 (match_operand:VB 2 "register_operand" "w")]
3563 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3564 [(set_attr "type" "neon_mul_<Vetype><q>")]
3569 (define_insn "aarch64_fmulx<mode>"
3570 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3572 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3573 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3576 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3577 [(set_attr "type" "neon_fp_mul_<stype>")]
3580 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3582 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3583 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3585 [(match_operand:VDQSF 1 "register_operand" "w")
3586 (vec_duplicate:VDQSF
3588 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3589 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3593 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3594 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3596 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3599 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3601 (define_insn "*aarch64_mulx_elt<mode>"
3602 [(set (match_operand:VDQF 0 "register_operand" "=w")
3604 [(match_operand:VDQF 1 "register_operand" "w")
3607 (match_operand:VDQF 2 "register_operand" "w")
3608 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3612 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3613 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3615 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3620 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3621 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3623 [(match_operand:VHSDF 1 "register_operand" "w")
3624 (vec_duplicate:VHSDF
3625 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3628 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3629 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3632 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3633 ;; vmulxd_lane_f64 == vmulx_lane_f64
3634 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3636 (define_insn "*aarch64_vgetfmulx<mode>"
3637 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3639 [(match_operand:<VEL> 1 "register_operand" "w")
3641 (match_operand:VDQF 2 "register_operand" "w")
3642 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3646 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3647 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3649 [(set_attr "type" "fmul<Vetype>")]
3653 (define_insn "aarch64_<su_optab><optab><mode>"
3654 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3655 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3656 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3658 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3659 [(set_attr "type" "neon_<optab><q>")]
3662 ;; suqadd and usqadd
3664 (define_insn "aarch64_<sur>qadd<mode>"
3665 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3666 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3667 (match_operand:VSDQ_I 2 "register_operand" "w")]
3670 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3671 [(set_attr "type" "neon_qadd<q>")]
3676 (define_insn "aarch64_sqmovun<mode>"
3677 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3678 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3681 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3682 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3685 ;; sqmovn and uqmovn
3687 (define_insn "aarch64_<sur>qmovn<mode>"
3688 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3689 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3692 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3693 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3698 (define_insn "aarch64_s<optab><mode>"
3699 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3701 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3703 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3704 [(set_attr "type" "neon_<optab><q>")]
3709 (define_insn "aarch64_sq<r>dmulh<mode>"
3710 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3712 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3713 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3716 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3717 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3722 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3723 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3725 [(match_operand:VDQHS 1 "register_operand" "w")
3727 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3728 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3732 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3733 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3734 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3737 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3738 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3740 [(match_operand:VDQHS 1 "register_operand" "w")
3742 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3743 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3747 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3748 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3749 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3752 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3753 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3755 [(match_operand:SD_HSI 1 "register_operand" "w")
3757 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3758 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3762 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3763 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3764 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3767 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3768 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3770 [(match_operand:SD_HSI 1 "register_operand" "w")
3772 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3773 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3777 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3778 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3779 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3784 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3785 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3787 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3788 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3789 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3792 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3793 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3796 ;; sqrdml[as]h_lane.
3798 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3799 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3801 [(match_operand:VDQHS 1 "register_operand" "0")
3802 (match_operand:VDQHS 2 "register_operand" "w")
3804 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3805 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3809 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3811 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3813 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3816 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3817 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3819 [(match_operand:SD_HSI 1 "register_operand" "0")
3820 (match_operand:SD_HSI 2 "register_operand" "w")
3822 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3823 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3827 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3829 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3831 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3834 ;; sqrdml[as]h_laneq.
3836 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3837 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3839 [(match_operand:VDQHS 1 "register_operand" "0")
3840 (match_operand:VDQHS 2 "register_operand" "w")
3842 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3843 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3847 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3849 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3851 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3854 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3855 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3857 [(match_operand:SD_HSI 1 "register_operand" "0")
3858 (match_operand:SD_HSI 2 "register_operand" "w")
3860 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3861 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3865 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3867 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3869 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3874 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3875 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3877 (match_operand:<VWIDE> 1 "register_operand" "0")
3880 (sign_extend:<VWIDE>
3881 (match_operand:VSD_HSI 2 "register_operand" "w"))
3882 (sign_extend:<VWIDE>
3883 (match_operand:VSD_HSI 3 "register_operand" "w")))
3886 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3887 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3892 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3893 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3895 (match_operand:<VWIDE> 1 "register_operand" "0")
3898 (sign_extend:<VWIDE>
3899 (match_operand:VD_HSI 2 "register_operand" "w"))
3900 (sign_extend:<VWIDE>
3901 (vec_duplicate:VD_HSI
3903 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3904 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3909 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3911 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3913 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3916 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3917 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3919 (match_operand:<VWIDE> 1 "register_operand" "0")
3922 (sign_extend:<VWIDE>
3923 (match_operand:VD_HSI 2 "register_operand" "w"))
3924 (sign_extend:<VWIDE>
3925 (vec_duplicate:VD_HSI
3927 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3928 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3933 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3935 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3937 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3940 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3941 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3943 (match_operand:<VWIDE> 1 "register_operand" "0")
3946 (sign_extend:<VWIDE>
3947 (match_operand:SD_HSI 2 "register_operand" "w"))
3948 (sign_extend:<VWIDE>
3950 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3951 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3956 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3958 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3960 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3963 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3964 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3966 (match_operand:<VWIDE> 1 "register_operand" "0")
3969 (sign_extend:<VWIDE>
3970 (match_operand:SD_HSI 2 "register_operand" "w"))
3971 (sign_extend:<VWIDE>
3973 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3974 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3979 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3981 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3983 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3988 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3989 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3991 (match_operand:<VWIDE> 1 "register_operand" "0")
3994 (sign_extend:<VWIDE>
3995 (match_operand:VD_HSI 2 "register_operand" "w"))
3996 (sign_extend:<VWIDE>
3997 (vec_duplicate:VD_HSI
3998 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4001 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4002 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4007 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
4008 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4010 (match_operand:<VWIDE> 1 "register_operand" "0")
4013 (sign_extend:<VWIDE>
4015 (match_operand:VQ_HSI 2 "register_operand" "w")
4016 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4017 (sign_extend:<VWIDE>
4019 (match_operand:VQ_HSI 3 "register_operand" "w")
4023 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4024 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4027 (define_expand "aarch64_sqdmlal2<mode>"
4028 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4029 (match_operand:<VWIDE> 1 "register_operand" "w")
4030 (match_operand:VQ_HSI 2 "register_operand" "w")
4031 (match_operand:VQ_HSI 3 "register_operand" "w")]
4034 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4035 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
4036 operands[2], operands[3], p));
4040 (define_expand "aarch64_sqdmlsl2<mode>"
4041 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4042 (match_operand:<VWIDE> 1 "register_operand" "w")
4043 (match_operand:VQ_HSI 2 "register_operand" "w")
4044 (match_operand:VQ_HSI 3 "register_operand" "w")]
4047 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4048 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
4049 operands[2], operands[3], p));
4055 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
4056 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4058 (match_operand:<VWIDE> 1 "register_operand" "0")
4061 (sign_extend:<VWIDE>
4063 (match_operand:VQ_HSI 2 "register_operand" "w")
4064 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4065 (sign_extend:<VWIDE>
4066 (vec_duplicate:<VHALF>
4068 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4069 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4074 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4076 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4078 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4081 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
4082 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4084 (match_operand:<VWIDE> 1 "register_operand" "0")
4087 (sign_extend:<VWIDE>
4089 (match_operand:VQ_HSI 2 "register_operand" "w")
4090 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4091 (sign_extend:<VWIDE>
4092 (vec_duplicate:<VHALF>
4094 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4095 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4100 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4102 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4104 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4107 (define_expand "aarch64_sqdmlal2_lane<mode>"
4108 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4109 (match_operand:<VWIDE> 1 "register_operand" "w")
4110 (match_operand:VQ_HSI 2 "register_operand" "w")
4111 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4112 (match_operand:SI 4 "immediate_operand" "i")]
4115 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4116 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
4117 operands[2], operands[3],
4122 (define_expand "aarch64_sqdmlal2_laneq<mode>"
4123 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4124 (match_operand:<VWIDE> 1 "register_operand" "w")
4125 (match_operand:VQ_HSI 2 "register_operand" "w")
4126 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4127 (match_operand:SI 4 "immediate_operand" "i")]
4130 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4131 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
4132 operands[2], operands[3],
4137 (define_expand "aarch64_sqdmlsl2_lane<mode>"
4138 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4139 (match_operand:<VWIDE> 1 "register_operand" "w")
4140 (match_operand:VQ_HSI 2 "register_operand" "w")
4141 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4142 (match_operand:SI 4 "immediate_operand" "i")]
4145 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4146 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
4147 operands[2], operands[3],
4152 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
4153 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4154 (match_operand:<VWIDE> 1 "register_operand" "w")
4155 (match_operand:VQ_HSI 2 "register_operand" "w")
4156 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4157 (match_operand:SI 4 "immediate_operand" "i")]
4160 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4161 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4162 operands[2], operands[3],
4167 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4168 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4170 (match_operand:<VWIDE> 1 "register_operand" "0")
4173 (sign_extend:<VWIDE>
4175 (match_operand:VQ_HSI 2 "register_operand" "w")
4176 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4177 (sign_extend:<VWIDE>
4178 (vec_duplicate:<VHALF>
4179 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4182 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4183 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4186 (define_expand "aarch64_sqdmlal2_n<mode>"
4187 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4188 (match_operand:<VWIDE> 1 "register_operand" "w")
4189 (match_operand:VQ_HSI 2 "register_operand" "w")
4190 (match_operand:<VEL> 3 "register_operand" "w")]
4193 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4194 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4195 operands[2], operands[3],
4200 (define_expand "aarch64_sqdmlsl2_n<mode>"
4201 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4202 (match_operand:<VWIDE> 1 "register_operand" "w")
4203 (match_operand:VQ_HSI 2 "register_operand" "w")
4204 (match_operand:<VEL> 3 "register_operand" "w")]
4207 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4208 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4209 operands[2], operands[3],
4216 (define_insn "aarch64_sqdmull<mode>"
4217 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4220 (sign_extend:<VWIDE>
4221 (match_operand:VSD_HSI 1 "register_operand" "w"))
4222 (sign_extend:<VWIDE>
4223 (match_operand:VSD_HSI 2 "register_operand" "w")))
4226 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4227 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4232 (define_insn "aarch64_sqdmull_lane<mode>"
4233 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4236 (sign_extend:<VWIDE>
4237 (match_operand:VD_HSI 1 "register_operand" "w"))
4238 (sign_extend:<VWIDE>
4239 (vec_duplicate:VD_HSI
4241 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4242 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4247 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4248 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4250 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4253 (define_insn "aarch64_sqdmull_laneq<mode>"
4254 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4257 (sign_extend:<VWIDE>
4258 (match_operand:VD_HSI 1 "register_operand" "w"))
4259 (sign_extend:<VWIDE>
4260 (vec_duplicate:VD_HSI
4262 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4263 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4268 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4269 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4271 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4274 (define_insn "aarch64_sqdmull_lane<mode>"
4275 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4278 (sign_extend:<VWIDE>
4279 (match_operand:SD_HSI 1 "register_operand" "w"))
4280 (sign_extend:<VWIDE>
4282 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4283 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4288 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4289 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4291 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4294 (define_insn "aarch64_sqdmull_laneq<mode>"
4295 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4298 (sign_extend:<VWIDE>
4299 (match_operand:SD_HSI 1 "register_operand" "w"))
4300 (sign_extend:<VWIDE>
4302 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4303 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4308 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4309 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4311 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4316 (define_insn "aarch64_sqdmull_n<mode>"
4317 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4320 (sign_extend:<VWIDE>
4321 (match_operand:VD_HSI 1 "register_operand" "w"))
4322 (sign_extend:<VWIDE>
4323 (vec_duplicate:VD_HSI
4324 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4328 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4329 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4336 (define_insn "aarch64_sqdmull2<mode>_internal"
4337 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4340 (sign_extend:<VWIDE>
4342 (match_operand:VQ_HSI 1 "register_operand" "w")
4343 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4344 (sign_extend:<VWIDE>
4346 (match_operand:VQ_HSI 2 "register_operand" "w")
4351 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4352 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4355 (define_expand "aarch64_sqdmull2<mode>"
4356 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4357 (match_operand:VQ_HSI 1 "register_operand" "w")
4358 (match_operand:VQ_HSI 2 "register_operand" "w")]
4361 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4362 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4369 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4370 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4373 (sign_extend:<VWIDE>
4375 (match_operand:VQ_HSI 1 "register_operand" "w")
4376 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4377 (sign_extend:<VWIDE>
4378 (vec_duplicate:<VHALF>
4380 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4381 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4386 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4387 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4389 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4392 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4393 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4396 (sign_extend:<VWIDE>
4398 (match_operand:VQ_HSI 1 "register_operand" "w")
4399 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4400 (sign_extend:<VWIDE>
4401 (vec_duplicate:<VHALF>
4403 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4404 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4409 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4410 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4412 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4415 (define_expand "aarch64_sqdmull2_lane<mode>"
4416 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4417 (match_operand:VQ_HSI 1 "register_operand" "w")
4418 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4419 (match_operand:SI 3 "immediate_operand" "i")]
4422 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4423 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4424 operands[2], operands[3],
4429 (define_expand "aarch64_sqdmull2_laneq<mode>"
4430 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4431 (match_operand:VQ_HSI 1 "register_operand" "w")
4432 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4433 (match_operand:SI 3 "immediate_operand" "i")]
4436 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4437 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4438 operands[2], operands[3],
4445 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4446 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4449 (sign_extend:<VWIDE>
4451 (match_operand:VQ_HSI 1 "register_operand" "w")
4452 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4453 (sign_extend:<VWIDE>
4454 (vec_duplicate:<VHALF>
4455 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4459 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4460 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4463 (define_expand "aarch64_sqdmull2_n<mode>"
4464 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4465 (match_operand:VQ_HSI 1 "register_operand" "w")
4466 (match_operand:<VEL> 2 "register_operand" "w")]
4469 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4470 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4477 (define_insn "aarch64_<sur>shl<mode>"
4478 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4480 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4481 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4484 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4485 [(set_attr "type" "neon_shift_reg<q>")]
4491 (define_insn "aarch64_<sur>q<r>shl<mode>"
4492 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4494 [(match_operand:VSDQ_I 1 "register_operand" "w")
4495 (match_operand:VSDQ_I 2 "register_operand" "w")]
4498 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4499 [(set_attr "type" "neon_sat_shift_reg<q>")]
4504 (define_insn "aarch64_<sur>shll_n<mode>"
4505 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4506 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4508 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4512 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4513 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4515 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4517 [(set_attr "type" "neon_shift_imm_long")]
4522 (define_insn "aarch64_<sur>shll2_n<mode>"
4523 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4524 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4525 (match_operand:SI 2 "immediate_operand" "i")]
4529 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4530 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4532 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4534 [(set_attr "type" "neon_shift_imm_long")]
4539 (define_insn "aarch64_<sur>shr_n<mode>"
4540 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4541 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4543 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4546 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4547 [(set_attr "type" "neon_sat_shift_imm<q>")]
4552 (define_insn "aarch64_<sur>sra_n<mode>"
4553 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4554 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4555 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4557 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4560 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4561 [(set_attr "type" "neon_shift_acc<q>")]
4566 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4567 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4568 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4569 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4571 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4574 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4575 [(set_attr "type" "neon_shift_imm<q>")]
4580 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4581 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4582 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4584 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4587 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4588 [(set_attr "type" "neon_sat_shift_imm<q>")]
4594 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4595 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4596 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4598 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4601 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4602 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4606 ;; cm(eq|ge|gt|lt|le)
4607 ;; Note, we have constraints for Dz and Z as different expanders
4608 ;; have different ideas of what should be passed to this pattern.
4610 (define_insn "aarch64_cm<optab><mode>"
4611 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4613 (COMPARISONS:<V_INT_EQUIV>
4614 (match_operand:VDQ_I 1 "register_operand" "w,w")
4615 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4619 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4620 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4621 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4624 (define_insn_and_split "aarch64_cm<optab>di"
4625 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4628 (match_operand:DI 1 "register_operand" "w,w,r")
4629 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4631 (clobber (reg:CC CC_REGNUM))]
4634 "&& reload_completed"
4635 [(set (match_operand:DI 0 "register_operand")
4638 (match_operand:DI 1 "register_operand")
4639 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4642 /* If we are in the general purpose register file,
4643 we split to a sequence of comparison and store. */
4644 if (GP_REGNUM_P (REGNO (operands[0]))
4645 && GP_REGNUM_P (REGNO (operands[1])))
4647 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4648 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4649 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4650 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4653 /* Otherwise, we expand to a similar pattern which does not
4654 clobber CC_REGNUM. */
4656 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4659 (define_insn "*aarch64_cm<optab>di"
4660 [(set (match_operand:DI 0 "register_operand" "=w,w")
4663 (match_operand:DI 1 "register_operand" "w,w")
4664 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4666 "TARGET_SIMD && reload_completed"
4668 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4669 cm<optab>\t%d0, %d1, #0"
4670 [(set_attr "type" "neon_compare, neon_compare_zero")]
4675 (define_insn "aarch64_cm<optab><mode>"
4676 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4678 (UCOMPARISONS:<V_INT_EQUIV>
4679 (match_operand:VDQ_I 1 "register_operand" "w")
4680 (match_operand:VDQ_I 2 "register_operand" "w")
4683 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4684 [(set_attr "type" "neon_compare<q>")]
4687 (define_insn_and_split "aarch64_cm<optab>di"
4688 [(set (match_operand:DI 0 "register_operand" "=w,r")
4691 (match_operand:DI 1 "register_operand" "w,r")
4692 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4694 (clobber (reg:CC CC_REGNUM))]
4697 "&& reload_completed"
4698 [(set (match_operand:DI 0 "register_operand")
4701 (match_operand:DI 1 "register_operand")
4702 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4705 /* If we are in the general purpose register file,
4706 we split to a sequence of comparison and store. */
4707 if (GP_REGNUM_P (REGNO (operands[0]))
4708 && GP_REGNUM_P (REGNO (operands[1])))
4710 machine_mode mode = CCmode;
4711 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4712 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4713 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4716 /* Otherwise, we expand to a similar pattern which does not
4717 clobber CC_REGNUM. */
4719 [(set_attr "type" "neon_compare,multiple")]
4722 (define_insn "*aarch64_cm<optab>di"
4723 [(set (match_operand:DI 0 "register_operand" "=w")
4726 (match_operand:DI 1 "register_operand" "w")
4727 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4729 "TARGET_SIMD && reload_completed"
4730 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4731 [(set_attr "type" "neon_compare")]
4736 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4737 ;; we don't have any insns using ne, and aarch64_vcond outputs
4738 ;; not (neg (eq (and x y) 0))
4739 ;; which is rewritten by simplify_rtx as
4740 ;; plus (eq (and x y) 0) -1.
4742 (define_insn "aarch64_cmtst<mode>"
4743 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4747 (match_operand:VDQ_I 1 "register_operand" "w")
4748 (match_operand:VDQ_I 2 "register_operand" "w"))
4749 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4750 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4753 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4754 [(set_attr "type" "neon_tst<q>")]
4757 (define_insn_and_split "aarch64_cmtstdi"
4758 [(set (match_operand:DI 0 "register_operand" "=w,r")
4762 (match_operand:DI 1 "register_operand" "w,r")
4763 (match_operand:DI 2 "register_operand" "w,r"))
4765 (clobber (reg:CC CC_REGNUM))]
4768 "&& reload_completed"
4769 [(set (match_operand:DI 0 "register_operand")
4773 (match_operand:DI 1 "register_operand")
4774 (match_operand:DI 2 "register_operand"))
4777 /* If we are in the general purpose register file,
4778 we split to a sequence of comparison and store. */
4779 if (GP_REGNUM_P (REGNO (operands[0]))
4780 && GP_REGNUM_P (REGNO (operands[1])))
4782 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4783 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4784 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4785 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4786 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4789 /* Otherwise, we expand to a similar pattern which does not
4790 clobber CC_REGNUM. */
4792 [(set_attr "type" "neon_tst,multiple")]
4795 (define_insn "*aarch64_cmtstdi"
4796 [(set (match_operand:DI 0 "register_operand" "=w")
4800 (match_operand:DI 1 "register_operand" "w")
4801 (match_operand:DI 2 "register_operand" "w"))
4804 "cmtst\t%d0, %d1, %d2"
4805 [(set_attr "type" "neon_tst")]
4808 ;; fcm(eq|ge|gt|le|lt)
4810 (define_insn "aarch64_cm<optab><mode>"
4811 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4813 (COMPARISONS:<V_INT_EQUIV>
4814 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4815 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4819 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4820 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4821 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4825 ;; Note we can also handle what would be fac(le|lt) by
4826 ;; generating fac(ge|gt).
4828 (define_insn "aarch64_fac<optab><mode>"
4829 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4831 (FAC_COMPARISONS:<V_INT_EQUIV>
4833 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4835 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4838 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4839 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4844 (define_insn "aarch64_addp<mode>"
4845 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4847 [(match_operand:VD_BHSI 1 "register_operand" "w")
4848 (match_operand:VD_BHSI 2 "register_operand" "w")]
4851 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4852 [(set_attr "type" "neon_reduc_add<q>")]
4855 (define_insn "aarch64_addpdi"
4856 [(set (match_operand:DI 0 "register_operand" "=w")
4858 [(match_operand:V2DI 1 "register_operand" "w")]
4862 [(set_attr "type" "neon_reduc_add")]
4867 (define_expand "sqrt<mode>2"
4868 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4869 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4872 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4876 (define_insn "*sqrt<mode>2"
4877 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4878 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4880 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4881 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4884 ;; Patterns for vector struct loads and stores.
4886 (define_insn "aarch64_simd_ld2<mode>"
4887 [(set (match_operand:OI 0 "register_operand" "=w")
4888 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4889 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4892 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4893 [(set_attr "type" "neon_load2_2reg<q>")]
4896 (define_insn "aarch64_simd_ld2r<mode>"
4897 [(set (match_operand:OI 0 "register_operand" "=w")
4898 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4899 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4902 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4903 [(set_attr "type" "neon_load2_all_lanes<q>")]
4906 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4907 [(set (match_operand:OI 0 "register_operand" "=w")
4908 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4909 (match_operand:OI 2 "register_operand" "0")
4910 (match_operand:SI 3 "immediate_operand" "i")
4911 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4915 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4916 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4918 [(set_attr "type" "neon_load2_one_lane")]
4921 (define_expand "vec_load_lanesoi<mode>"
4922 [(set (match_operand:OI 0 "register_operand" "=w")
4923 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4924 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4928 if (BYTES_BIG_ENDIAN)
4930 rtx tmp = gen_reg_rtx (OImode);
4931 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4932 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4933 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4936 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4940 (define_insn "aarch64_simd_st2<mode>"
4941 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4942 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4943 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4946 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4947 [(set_attr "type" "neon_store2_2reg<q>")]
4950 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4951 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4952 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4953 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4954 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4955 (match_operand:SI 2 "immediate_operand" "i")]
4959 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4960 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4962 [(set_attr "type" "neon_store2_one_lane<q>")]
4965 (define_expand "vec_store_lanesoi<mode>"
4966 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4967 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4968 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4972 if (BYTES_BIG_ENDIAN)
4974 rtx tmp = gen_reg_rtx (OImode);
4975 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4976 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4977 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4980 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4984 (define_insn "aarch64_simd_ld3<mode>"
4985 [(set (match_operand:CI 0 "register_operand" "=w")
4986 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4987 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4990 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4991 [(set_attr "type" "neon_load3_3reg<q>")]
4994 (define_insn "aarch64_simd_ld3r<mode>"
4995 [(set (match_operand:CI 0 "register_operand" "=w")
4996 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4997 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5000 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5001 [(set_attr "type" "neon_load3_all_lanes<q>")]
5004 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
5005 [(set (match_operand:CI 0 "register_operand" "=w")
5006 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5007 (match_operand:CI 2 "register_operand" "0")
5008 (match_operand:SI 3 "immediate_operand" "i")
5009 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5013 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5014 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
5016 [(set_attr "type" "neon_load3_one_lane")]
5019 (define_expand "vec_load_lanesci<mode>"
5020 [(set (match_operand:CI 0 "register_operand" "=w")
5021 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5022 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5026 if (BYTES_BIG_ENDIAN)
5028 rtx tmp = gen_reg_rtx (CImode);
5029 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5030 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
5031 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
5034 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
5038 (define_insn "aarch64_simd_st3<mode>"
5039 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5040 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
5041 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5044 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5045 [(set_attr "type" "neon_store3_3reg<q>")]
5048 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5049 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
5050 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5051 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5052 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5053 (match_operand:SI 2 "immediate_operand" "i")]
5057 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5058 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
5060 [(set_attr "type" "neon_store3_one_lane<q>")]
5063 (define_expand "vec_store_lanesci<mode>"
5064 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5065 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
5066 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5070 if (BYTES_BIG_ENDIAN)
5072 rtx tmp = gen_reg_rtx (CImode);
5073 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5074 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
5075 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
5078 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
5082 (define_insn "aarch64_simd_ld4<mode>"
5083 [(set (match_operand:XI 0 "register_operand" "=w")
5084 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5085 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5088 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5089 [(set_attr "type" "neon_load4_4reg<q>")]
5092 (define_insn "aarch64_simd_ld4r<mode>"
5093 [(set (match_operand:XI 0 "register_operand" "=w")
5094 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5095 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5098 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5099 [(set_attr "type" "neon_load4_all_lanes<q>")]
5102 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
5103 [(set (match_operand:XI 0 "register_operand" "=w")
5104 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5105 (match_operand:XI 2 "register_operand" "0")
5106 (match_operand:SI 3 "immediate_operand" "i")
5107 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5111 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5112 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
5114 [(set_attr "type" "neon_load4_one_lane")]
5117 (define_expand "vec_load_lanesxi<mode>"
5118 [(set (match_operand:XI 0 "register_operand" "=w")
5119 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5120 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5124 if (BYTES_BIG_ENDIAN)
5126 rtx tmp = gen_reg_rtx (XImode);
5127 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5128 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
5129 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
5132 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
5136 (define_insn "aarch64_simd_st4<mode>"
5137 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5138 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5139 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5142 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5143 [(set_attr "type" "neon_store4_4reg<q>")]
5146 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5147 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
5148 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5149 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5150 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5151 (match_operand:SI 2 "immediate_operand" "i")]
5155 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5156 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5158 [(set_attr "type" "neon_store4_one_lane<q>")]
5161 (define_expand "vec_store_lanesxi<mode>"
5162 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5163 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5164 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5168 if (BYTES_BIG_ENDIAN)
5170 rtx tmp = gen_reg_rtx (XImode);
5171 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5172 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5173 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5176 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5180 (define_insn_and_split "aarch64_rev_reglist<mode>"
5181 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5183 [(match_operand:VSTRUCT 1 "register_operand" "w")
5184 (match_operand:V16QI 2 "register_operand" "w")]
5185 UNSPEC_REV_REGLIST))]
5188 "&& reload_completed"
5192 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5193 for (i = 0; i < nregs; i++)
5195 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5196 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5197 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5201 [(set_attr "type" "neon_tbl1_q")
5202 (set_attr "length" "<insn_count>")]
5205 ;; Reload patterns for AdvSIMD register list operands.
5207 (define_expand "mov<mode>"
5208 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
5209 (match_operand:VSTRUCT 1 "general_operand" ""))]
5212 if (can_create_pseudo_p ())
5214 if (GET_CODE (operands[0]) != REG)
5215 operands[1] = force_reg (<MODE>mode, operands[1]);
5220 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5221 [(match_operand:CI 0 "register_operand" "=w")
5222 (match_operand:DI 1 "register_operand" "r")
5223 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5226 rtx mem = gen_rtx_MEM (CImode, operands[1]);
5227 emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5231 (define_insn "aarch64_ld1_x3_<mode>"
5232 [(set (match_operand:CI 0 "register_operand" "=w")
5234 [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5235 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5237 "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5238 [(set_attr "type" "neon_load1_3reg<q>")]
5241 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5242 [(match_operand:DI 0 "register_operand" "")
5243 (match_operand:OI 1 "register_operand" "")
5244 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5247 rtx mem = gen_rtx_MEM (OImode, operands[0]);
5248 emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5252 (define_insn "aarch64_st1_x2_<mode>"
5253 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5255 [(match_operand:OI 1 "register_operand" "w")
5256 (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5258 "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5259 [(set_attr "type" "neon_store1_2reg<q>")]
5262 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5263 [(match_operand:DI 0 "register_operand" "")
5264 (match_operand:CI 1 "register_operand" "")
5265 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5268 rtx mem = gen_rtx_MEM (CImode, operands[0]);
5269 emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5273 (define_insn "aarch64_st1_x3_<mode>"
5274 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5276 [(match_operand:CI 1 "register_operand" "w")
5277 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5279 "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5280 [(set_attr "type" "neon_store1_3reg<q>")]
5283 (define_insn "*aarch64_mov<mode>"
5284 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5285 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5286 "TARGET_SIMD && !BYTES_BIG_ENDIAN
5287 && (register_operand (operands[0], <MODE>mode)
5288 || register_operand (operands[1], <MODE>mode))"
5291 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5292 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5293 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5294 neon_load<nregs>_<nregs>reg_q")
5295 (set_attr "length" "<insn_count>,4,4")]
5298 (define_insn "aarch64_be_ld1<mode>"
5299 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
5300 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5301 "aarch64_simd_struct_operand" "Utv")]
5304 "ld1\\t{%0<Vmtype>}, %1"
5305 [(set_attr "type" "neon_load1_1reg<q>")]
5308 (define_insn "aarch64_be_st1<mode>"
5309 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5310 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5313 "st1\\t{%1<Vmtype>}, %0"
5314 [(set_attr "type" "neon_store1_1reg<q>")]
5317 (define_insn "*aarch64_be_movoi"
5318 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5319 (match_operand:OI 1 "general_operand" " w,w,m"))]
5320 "TARGET_SIMD && BYTES_BIG_ENDIAN
5321 && (register_operand (operands[0], OImode)
5322 || register_operand (operands[1], OImode))"
5327 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5328 (set_attr "length" "8,4,4")]
5331 (define_insn "*aarch64_be_movci"
5332 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5333 (match_operand:CI 1 "general_operand" " w,w,o"))]
5334 "TARGET_SIMD && BYTES_BIG_ENDIAN
5335 && (register_operand (operands[0], CImode)
5336 || register_operand (operands[1], CImode))"
5338 [(set_attr "type" "multiple")
5339 (set_attr "length" "12,4,4")]
5342 (define_insn "*aarch64_be_movxi"
5343 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5344 (match_operand:XI 1 "general_operand" " w,w,o"))]
5345 "TARGET_SIMD && BYTES_BIG_ENDIAN
5346 && (register_operand (operands[0], XImode)
5347 || register_operand (operands[1], XImode))"
5349 [(set_attr "type" "multiple")
5350 (set_attr "length" "16,4,4")]
5354 [(set (match_operand:OI 0 "register_operand")
5355 (match_operand:OI 1 "register_operand"))]
5356 "TARGET_SIMD && reload_completed"
5359 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5364 [(set (match_operand:CI 0 "nonimmediate_operand")
5365 (match_operand:CI 1 "general_operand"))]
5366 "TARGET_SIMD && reload_completed"
5369 if (register_operand (operands[0], CImode)
5370 && register_operand (operands[1], CImode))
5372 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5375 else if (BYTES_BIG_ENDIAN)
5377 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5378 simplify_gen_subreg (OImode, operands[1], CImode, 0));
5379 emit_move_insn (gen_lowpart (V16QImode,
5380 simplify_gen_subreg (TImode, operands[0],
5382 gen_lowpart (V16QImode,
5383 simplify_gen_subreg (TImode, operands[1],
5392 [(set (match_operand:XI 0 "nonimmediate_operand")
5393 (match_operand:XI 1 "general_operand"))]
5394 "TARGET_SIMD && reload_completed"
5397 if (register_operand (operands[0], XImode)
5398 && register_operand (operands[1], XImode))
5400 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5403 else if (BYTES_BIG_ENDIAN)
5405 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5406 simplify_gen_subreg (OImode, operands[1], XImode, 0));
5407 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5408 simplify_gen_subreg (OImode, operands[1], XImode, 32));
5415 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5416 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5417 (match_operand:DI 1 "register_operand" "w")
5418 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5421 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5422 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5425 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5430 (define_insn "aarch64_ld2<mode>_dreg"
5431 [(set (match_operand:OI 0 "register_operand" "=w")
5432 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5433 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5436 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5437 [(set_attr "type" "neon_load2_2reg<q>")]
5440 (define_insn "aarch64_ld2<mode>_dreg"
5441 [(set (match_operand:OI 0 "register_operand" "=w")
5442 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5443 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5446 "ld1\\t{%S0.1d - %T0.1d}, %1"
5447 [(set_attr "type" "neon_load1_2reg<q>")]
5450 (define_insn "aarch64_ld3<mode>_dreg"
5451 [(set (match_operand:CI 0 "register_operand" "=w")
5452 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5453 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5456 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5457 [(set_attr "type" "neon_load3_3reg<q>")]
5460 (define_insn "aarch64_ld3<mode>_dreg"
5461 [(set (match_operand:CI 0 "register_operand" "=w")
5462 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5463 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5466 "ld1\\t{%S0.1d - %U0.1d}, %1"
5467 [(set_attr "type" "neon_load1_3reg<q>")]
5470 (define_insn "aarch64_ld4<mode>_dreg"
5471 [(set (match_operand:XI 0 "register_operand" "=w")
5472 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5473 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5476 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5477 [(set_attr "type" "neon_load4_4reg<q>")]
5480 (define_insn "aarch64_ld4<mode>_dreg"
5481 [(set (match_operand:XI 0 "register_operand" "=w")
5482 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5483 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5486 "ld1\\t{%S0.1d - %V0.1d}, %1"
5487 [(set_attr "type" "neon_load1_4reg<q>")]
5490 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5491 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5492 (match_operand:DI 1 "register_operand" "r")
5493 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5496 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5497 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5499 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5503 (define_expand "aarch64_ld1<VALL_F16:mode>"
5504 [(match_operand:VALL_F16 0 "register_operand")
5505 (match_operand:DI 1 "register_operand")]
5508 machine_mode mode = <VALL_F16:MODE>mode;
5509 rtx mem = gen_rtx_MEM (mode, operands[1]);
5511 if (BYTES_BIG_ENDIAN)
5512 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5514 emit_move_insn (operands[0], mem);
5518 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5519 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5520 (match_operand:DI 1 "register_operand" "r")
5521 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5524 machine_mode mode = <VSTRUCT:MODE>mode;
5525 rtx mem = gen_rtx_MEM (mode, operands[1]);
5527 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5531 (define_expand "aarch64_ld1x2<VQ:mode>"
5532 [(match_operand:OI 0 "register_operand" "=w")
5533 (match_operand:DI 1 "register_operand" "r")
5534 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5537 machine_mode mode = OImode;
5538 rtx mem = gen_rtx_MEM (mode, operands[1]);
5540 emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5544 (define_expand "aarch64_ld1x2<VDC:mode>"
5545 [(match_operand:OI 0 "register_operand" "=w")
5546 (match_operand:DI 1 "register_operand" "r")
5547 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5550 machine_mode mode = OImode;
5551 rtx mem = gen_rtx_MEM (mode, operands[1]);
5553 emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5558 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5559 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5560 (match_operand:DI 1 "register_operand" "w")
5561 (match_operand:VSTRUCT 2 "register_operand" "0")
5562 (match_operand:SI 3 "immediate_operand" "i")
5563 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5566 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5567 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5570 aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5571 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5572 operands[0], mem, operands[2], operands[3]));
5576 ;; Expanders for builtins to extract vector registers from large
5577 ;; opaque integer modes.
5581 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5582 [(match_operand:VDC 0 "register_operand" "=w")
5583 (match_operand:VSTRUCT 1 "register_operand" "w")
5584 (match_operand:SI 2 "immediate_operand" "i")]
5587 int part = INTVAL (operands[2]);
5588 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5589 int offset = part * 16;
5591 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5592 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5598 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5599 [(match_operand:VQ 0 "register_operand" "=w")
5600 (match_operand:VSTRUCT 1 "register_operand" "w")
5601 (match_operand:SI 2 "immediate_operand" "i")]
5604 int part = INTVAL (operands[2]);
5605 int offset = part * 16;
5607 emit_move_insn (operands[0],
5608 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5612 ;; Permuted-store expanders for neon intrinsics.
5614 ;; Permute instructions
5618 (define_expand "vec_perm<mode>"
5619 [(match_operand:VB 0 "register_operand")
5620 (match_operand:VB 1 "register_operand")
5621 (match_operand:VB 2 "register_operand")
5622 (match_operand:VB 3 "register_operand")]
5625 aarch64_expand_vec_perm (operands[0], operands[1],
5626 operands[2], operands[3], <nunits>);
5630 (define_insn "aarch64_tbl1<mode>"
5631 [(set (match_operand:VB 0 "register_operand" "=w")
5632 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5633 (match_operand:VB 2 "register_operand" "w")]
5636 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5637 [(set_attr "type" "neon_tbl1<q>")]
5640 ;; Two source registers.
5642 (define_insn "aarch64_tbl2v16qi"
5643 [(set (match_operand:V16QI 0 "register_operand" "=w")
5644 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5645 (match_operand:V16QI 2 "register_operand" "w")]
5648 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5649 [(set_attr "type" "neon_tbl2_q")]
5652 (define_insn "aarch64_tbl3<mode>"
5653 [(set (match_operand:VB 0 "register_operand" "=w")
5654 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5655 (match_operand:VB 2 "register_operand" "w")]
5658 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5659 [(set_attr "type" "neon_tbl3")]
5662 (define_insn "aarch64_tbx4<mode>"
5663 [(set (match_operand:VB 0 "register_operand" "=w")
5664 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5665 (match_operand:OI 2 "register_operand" "w")
5666 (match_operand:VB 3 "register_operand" "w")]
5669 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5670 [(set_attr "type" "neon_tbl4")]
5673 ;; Three source registers.
5675 (define_insn "aarch64_qtbl3<mode>"
5676 [(set (match_operand:VB 0 "register_operand" "=w")
5677 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5678 (match_operand:VB 2 "register_operand" "w")]
5681 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5682 [(set_attr "type" "neon_tbl3")]
5685 (define_insn "aarch64_qtbx3<mode>"
5686 [(set (match_operand:VB 0 "register_operand" "=w")
5687 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5688 (match_operand:CI 2 "register_operand" "w")
5689 (match_operand:VB 3 "register_operand" "w")]
5692 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5693 [(set_attr "type" "neon_tbl3")]
5696 ;; Four source registers.
5698 (define_insn "aarch64_qtbl4<mode>"
5699 [(set (match_operand:VB 0 "register_operand" "=w")
5700 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5701 (match_operand:VB 2 "register_operand" "w")]
5704 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5705 [(set_attr "type" "neon_tbl4")]
5708 (define_insn "aarch64_qtbx4<mode>"
5709 [(set (match_operand:VB 0 "register_operand" "=w")
5710 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5711 (match_operand:XI 2 "register_operand" "w")
5712 (match_operand:VB 3 "register_operand" "w")]
5715 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5716 [(set_attr "type" "neon_tbl4")]
5719 (define_insn_and_split "aarch64_combinev16qi"
5720 [(set (match_operand:OI 0 "register_operand" "=w")
5721 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5722 (match_operand:V16QI 2 "register_operand" "w")]
5726 "&& reload_completed"
5729 aarch64_split_combinev16qi (operands);
5732 [(set_attr "type" "multiple")]
5735 ;; This instruction's pattern is generated directly by
5736 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5737 ;; need corresponding changes there.
5738 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5739 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5740 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5741 (match_operand:VALL_F16 2 "register_operand" "w")]
5744 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5745 [(set_attr "type" "neon_permute<q>")]
5748 ;; This instruction's pattern is generated directly by
5749 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5750 ;; need corresponding changes there. Note that the immediate (third)
5751 ;; operand is a lane index not a byte index.
5752 (define_insn "aarch64_ext<mode>"
5753 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5754 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5755 (match_operand:VALL_F16 2 "register_operand" "w")
5756 (match_operand:SI 3 "immediate_operand" "i")]
5760 operands[3] = GEN_INT (INTVAL (operands[3])
5761 * GET_MODE_UNIT_SIZE (<MODE>mode));
5762 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5764 [(set_attr "type" "neon_ext<q>")]
5767 ;; This instruction's pattern is generated directly by
5768 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5769 ;; need corresponding changes there.
5770 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5771 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5772 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5775 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5776 [(set_attr "type" "neon_rev<q>")]
5779 (define_insn "aarch64_st2<mode>_dreg"
5780 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5781 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5782 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5785 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5786 [(set_attr "type" "neon_store2_2reg")]
5789 (define_insn "aarch64_st2<mode>_dreg"
5790 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5791 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5792 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5795 "st1\\t{%S1.1d - %T1.1d}, %0"
5796 [(set_attr "type" "neon_store1_2reg")]
5799 (define_insn "aarch64_st3<mode>_dreg"
5800 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5801 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5802 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5805 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5806 [(set_attr "type" "neon_store3_3reg")]
5809 (define_insn "aarch64_st3<mode>_dreg"
5810 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5811 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5812 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5815 "st1\\t{%S1.1d - %U1.1d}, %0"
5816 [(set_attr "type" "neon_store1_3reg")]
5819 (define_insn "aarch64_st4<mode>_dreg"
5820 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5821 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5822 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5825 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5826 [(set_attr "type" "neon_store4_4reg")]
5829 (define_insn "aarch64_st4<mode>_dreg"
5830 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5831 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5832 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5835 "st1\\t{%S1.1d - %V1.1d}, %0"
5836 [(set_attr "type" "neon_store1_4reg")]
5839 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5840 [(match_operand:DI 0 "register_operand" "r")
5841 (match_operand:VSTRUCT 1 "register_operand" "w")
5842 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5845 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5846 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5848 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5852 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5853 [(match_operand:DI 0 "register_operand" "r")
5854 (match_operand:VSTRUCT 1 "register_operand" "w")
5855 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5858 machine_mode mode = <VSTRUCT:MODE>mode;
5859 rtx mem = gen_rtx_MEM (mode, operands[0]);
5861 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5865 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5866 [(match_operand:DI 0 "register_operand" "r")
5867 (match_operand:VSTRUCT 1 "register_operand" "w")
5868 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5869 (match_operand:SI 2 "immediate_operand")]
5872 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5873 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5876 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5877 mem, operands[1], operands[2]));
5881 (define_expand "aarch64_st1<VALL_F16:mode>"
5882 [(match_operand:DI 0 "register_operand")
5883 (match_operand:VALL_F16 1 "register_operand")]
5886 machine_mode mode = <VALL_F16:MODE>mode;
5887 rtx mem = gen_rtx_MEM (mode, operands[0]);
5889 if (BYTES_BIG_ENDIAN)
5890 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5892 emit_move_insn (mem, operands[1]);
5896 ;; Expander for builtins to insert vector registers into large
5897 ;; opaque integer modes.
5899 ;; Q-register list. We don't need a D-reg inserter as we zero
5900 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5902 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5903 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5904 (match_operand:VSTRUCT 1 "register_operand" "0")
5905 (match_operand:VQ 2 "register_operand" "w")
5906 (match_operand:SI 3 "immediate_operand" "i")]
5909 int part = INTVAL (operands[3]);
5910 int offset = part * 16;
5912 emit_move_insn (operands[0], operands[1]);
5913 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5918 ;; Standard pattern name vec_init<mode><Vel>.
5920 (define_expand "vec_init<mode><Vel>"
5921 [(match_operand:VALL_F16 0 "register_operand" "")
5922 (match_operand 1 "" "")]
5925 aarch64_expand_vector_init (operands[0], operands[1]);
5929 (define_insn "*aarch64_simd_ld1r<mode>"
5930 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5931 (vec_duplicate:VALL_F16
5932 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5934 "ld1r\\t{%0.<Vtype>}, %1"
5935 [(set_attr "type" "neon_load1_all_lanes")]
5938 (define_insn "aarch64_simd_ld1<mode>_x2"
5939 [(set (match_operand:OI 0 "register_operand" "=w")
5940 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5941 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5944 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5945 [(set_attr "type" "neon_load1_2reg<q>")]
5948 (define_insn "aarch64_simd_ld1<mode>_x2"
5949 [(set (match_operand:OI 0 "register_operand" "=w")
5950 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5951 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5954 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5955 [(set_attr "type" "neon_load1_2reg<q>")]
5959 (define_insn "@aarch64_frecpe<mode>"
5960 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5962 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
5965 "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
5966 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5969 (define_insn "aarch64_frecpx<mode>"
5970 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5971 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5974 "frecpx\t%<s>0, %<s>1"
5975 [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
5978 (define_insn "@aarch64_frecps<mode>"
5979 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5981 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5982 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5985 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5986 [(set_attr "type" "neon_fp_recps_<stype><q>")]
5989 (define_insn "aarch64_urecpe<mode>"
5990 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5991 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5994 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5995 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5997 ;; Standard pattern name vec_extract<mode><Vel>.
5999 (define_expand "vec_extract<mode><Vel>"
6000 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
6001 (match_operand:VALL_F16 1 "register_operand" "")
6002 (match_operand:SI 2 "immediate_operand" "")]
6006 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
6012 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
6013 [(set (match_operand:V16QI 0 "register_operand" "=w")
6014 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%0")
6015 (match_operand:V16QI 2 "register_operand" "w")]
6017 "TARGET_SIMD && TARGET_AES"
6018 "aes<aes_op>\\t%0.16b, %2.16b"
6019 [(set_attr "type" "crypto_aese")]
6022 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
6023 [(set (match_operand:V16QI 0 "register_operand" "=w")
6024 (unspec:V16QI [(xor:V16QI
6025 (match_operand:V16QI 1 "register_operand" "%0")
6026 (match_operand:V16QI 2 "register_operand" "w"))
6027 (match_operand:V16QI 3 "aarch64_simd_imm_zero" "")]
6029 "TARGET_SIMD && TARGET_AES"
6030 "aes<aes_op>\\t%0.16b, %2.16b"
6031 [(set_attr "type" "crypto_aese")]
6034 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
6035 [(set (match_operand:V16QI 0 "register_operand" "=w")
6036 (unspec:V16QI [(match_operand:V16QI 3 "aarch64_simd_imm_zero" "")
6037 (xor:V16QI (match_operand:V16QI 1 "register_operand" "%0")
6038 (match_operand:V16QI 2 "register_operand" "w"))]
6040 "TARGET_SIMD && TARGET_AES"
6041 "aes<aes_op>\\t%0.16b, %2.16b"
6042 [(set_attr "type" "crypto_aese")]
6045 ;; When AES/AESMC fusion is enabled we want the register allocation to
6049 ;; So prefer to tie operand 1 to operand 0 when fusing.
6051 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
6052 [(set (match_operand:V16QI 0 "register_operand" "=w,w")
6053 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
6055 "TARGET_SIMD && TARGET_AES"
6056 "aes<aesmc_op>\\t%0.16b, %1.16b"
6057 [(set_attr "type" "crypto_aesmc")
6058 (set_attr_alternative "enabled"
6059 [(if_then_else (match_test
6060 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
6061 (const_string "yes" )
6062 (const_string "no"))
6063 (const_string "yes")])]
6066 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
6067 ;; and enforce the register dependency without scheduling or register
6068 ;; allocation messing up the order or introducing moves inbetween.
6069 ;; Mash the two together during combine.
6071 (define_insn "*aarch64_crypto_aese_fused"
6072 [(set (match_operand:V16QI 0 "register_operand" "=&w")
6075 [(match_operand:V16QI 1 "register_operand" "0")
6076 (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE)
6078 "TARGET_SIMD && TARGET_AES
6079 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6080 "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
6081 [(set_attr "type" "crypto_aese")
6082 (set_attr "length" "8")]
6085 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
6086 ;; and enforce the register dependency without scheduling or register
6087 ;; allocation messing up the order or introducing moves inbetween.
6088 ;; Mash the two together during combine.
6090 (define_insn "*aarch64_crypto_aesd_fused"
6091 [(set (match_operand:V16QI 0 "register_operand" "=&w")
6094 [(match_operand:V16QI 1 "register_operand" "0")
6095 (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD)
6097 "TARGET_SIMD && TARGET_AES
6098 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6099 "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
6100 [(set_attr "type" "crypto_aese")
6101 (set_attr "length" "8")]
6106 (define_insn "aarch64_crypto_sha1hsi"
6107 [(set (match_operand:SI 0 "register_operand" "=w")
6108 (unspec:SI [(match_operand:SI 1
6109 "register_operand" "w")]
6111 "TARGET_SIMD && TARGET_SHA2"
6113 [(set_attr "type" "crypto_sha1_fast")]
6116 (define_insn "aarch64_crypto_sha1hv4si"
6117 [(set (match_operand:SI 0 "register_operand" "=w")
6118 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6119 (parallel [(const_int 0)]))]
6121 "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
6123 [(set_attr "type" "crypto_sha1_fast")]
6126 (define_insn "aarch64_be_crypto_sha1hv4si"
6127 [(set (match_operand:SI 0 "register_operand" "=w")
6128 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6129 (parallel [(const_int 3)]))]
6131 "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
6133 [(set_attr "type" "crypto_sha1_fast")]
6136 (define_insn "aarch64_crypto_sha1su1v4si"
6137 [(set (match_operand:V4SI 0 "register_operand" "=w")
6138 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6139 (match_operand:V4SI 2 "register_operand" "w")]
6141 "TARGET_SIMD && TARGET_SHA2"
6142 "sha1su1\\t%0.4s, %2.4s"
6143 [(set_attr "type" "crypto_sha1_fast")]
6146 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
6147 [(set (match_operand:V4SI 0 "register_operand" "=w")
6148 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6149 (match_operand:SI 2 "register_operand" "w")
6150 (match_operand:V4SI 3 "register_operand" "w")]
6152 "TARGET_SIMD && TARGET_SHA2"
6153 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
6154 [(set_attr "type" "crypto_sha1_slow")]
6157 (define_insn "aarch64_crypto_sha1su0v4si"
6158 [(set (match_operand:V4SI 0 "register_operand" "=w")
6159 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6160 (match_operand:V4SI 2 "register_operand" "w")
6161 (match_operand:V4SI 3 "register_operand" "w")]
6163 "TARGET_SIMD && TARGET_SHA2"
6164 "sha1su0\\t%0.4s, %2.4s, %3.4s"
6165 [(set_attr "type" "crypto_sha1_xor")]
6170 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6171 [(set (match_operand:V4SI 0 "register_operand" "=w")
6172 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6173 (match_operand:V4SI 2 "register_operand" "w")
6174 (match_operand:V4SI 3 "register_operand" "w")]
6176 "TARGET_SIMD && TARGET_SHA2"
6177 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
6178 [(set_attr "type" "crypto_sha256_slow")]
6181 (define_insn "aarch64_crypto_sha256su0v4si"
6182 [(set (match_operand:V4SI 0 "register_operand" "=w")
6183 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6184 (match_operand:V4SI 2 "register_operand" "w")]
6186 "TARGET_SIMD && TARGET_SHA2"
6187 "sha256su0\\t%0.4s, %2.4s"
6188 [(set_attr "type" "crypto_sha256_fast")]
6191 (define_insn "aarch64_crypto_sha256su1v4si"
6192 [(set (match_operand:V4SI 0 "register_operand" "=w")
6193 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6194 (match_operand:V4SI 2 "register_operand" "w")
6195 (match_operand:V4SI 3 "register_operand" "w")]
6197 "TARGET_SIMD && TARGET_SHA2"
6198 "sha256su1\\t%0.4s, %2.4s, %3.4s"
6199 [(set_attr "type" "crypto_sha256_slow")]
6204 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6205 [(set (match_operand:V2DI 0 "register_operand" "=w")
6206 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6207 (match_operand:V2DI 2 "register_operand" "w")
6208 (match_operand:V2DI 3 "register_operand" "w")]
6210 "TARGET_SIMD && TARGET_SHA3"
6211 "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6212 [(set_attr "type" "crypto_sha512")]
6215 (define_insn "aarch64_crypto_sha512su0qv2di"
6216 [(set (match_operand:V2DI 0 "register_operand" "=w")
6217 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6218 (match_operand:V2DI 2 "register_operand" "w")]
6220 "TARGET_SIMD && TARGET_SHA3"
6221 "sha512su0\\t%0.2d, %2.2d"
6222 [(set_attr "type" "crypto_sha512")]
6225 (define_insn "aarch64_crypto_sha512su1qv2di"
6226 [(set (match_operand:V2DI 0 "register_operand" "=w")
6227 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6228 (match_operand:V2DI 2 "register_operand" "w")
6229 (match_operand:V2DI 3 "register_operand" "w")]
6231 "TARGET_SIMD && TARGET_SHA3"
6232 "sha512su1\\t%0.2d, %2.2d, %3.2d"
6233 [(set_attr "type" "crypto_sha512")]
6238 (define_insn "eor3q<mode>4"
6239 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6242 (match_operand:VQ_I 2 "register_operand" "w")
6243 (match_operand:VQ_I 3 "register_operand" "w"))
6244 (match_operand:VQ_I 1 "register_operand" "w")))]
6245 "TARGET_SIMD && TARGET_SHA3"
6246 "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6247 [(set_attr "type" "crypto_sha3")]
6250 (define_insn "aarch64_rax1qv2di"
6251 [(set (match_operand:V2DI 0 "register_operand" "=w")
6254 (match_operand:V2DI 2 "register_operand" "w")
6256 (match_operand:V2DI 1 "register_operand" "w")))]
6257 "TARGET_SIMD && TARGET_SHA3"
6258 "rax1\\t%0.2d, %1.2d, %2.2d"
6259 [(set_attr "type" "crypto_sha3")]
6262 (define_insn "aarch64_xarqv2di"
6263 [(set (match_operand:V2DI 0 "register_operand" "=w")
6266 (match_operand:V2DI 1 "register_operand" "%w")
6267 (match_operand:V2DI 2 "register_operand" "w"))
6268 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6269 "TARGET_SIMD && TARGET_SHA3"
6270 "xar\\t%0.2d, %1.2d, %2.2d, %3"
6271 [(set_attr "type" "crypto_sha3")]
6274 (define_insn "bcaxq<mode>4"
6275 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6278 (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6279 (match_operand:VQ_I 2 "register_operand" "w"))
6280 (match_operand:VQ_I 1 "register_operand" "w")))]
6281 "TARGET_SIMD && TARGET_SHA3"
6282 "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6283 [(set_attr "type" "crypto_sha3")]
6288 (define_insn "aarch64_sm3ss1qv4si"
6289 [(set (match_operand:V4SI 0 "register_operand" "=w")
6290 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6291 (match_operand:V4SI 2 "register_operand" "w")
6292 (match_operand:V4SI 3 "register_operand" "w")]
6294 "TARGET_SIMD && TARGET_SM4"
6295 "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6296 [(set_attr "type" "crypto_sm3")]
6300 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6301 [(set (match_operand:V4SI 0 "register_operand" "=w")
6302 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6303 (match_operand:V4SI 2 "register_operand" "w")
6304 (match_operand:V4SI 3 "register_operand" "w")
6305 (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6307 "TARGET_SIMD && TARGET_SM4"
6308 "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6309 [(set_attr "type" "crypto_sm3")]
6312 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6313 [(set (match_operand:V4SI 0 "register_operand" "=w")
6314 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6315 (match_operand:V4SI 2 "register_operand" "w")
6316 (match_operand:V4SI 3 "register_operand" "w")]
6318 "TARGET_SIMD && TARGET_SM4"
6319 "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6320 [(set_attr "type" "crypto_sm3")]
6325 (define_insn "aarch64_sm4eqv4si"
6326 [(set (match_operand:V4SI 0 "register_operand" "=w")
6327 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6328 (match_operand:V4SI 2 "register_operand" "w")]
6330 "TARGET_SIMD && TARGET_SM4"
6331 "sm4e\\t%0.4s, %2.4s"
6332 [(set_attr "type" "crypto_sm4")]
6335 (define_insn "aarch64_sm4ekeyqv4si"
6336 [(set (match_operand:V4SI 0 "register_operand" "=w")
6337 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6338 (match_operand:V4SI 2 "register_operand" "w")]
6340 "TARGET_SIMD && TARGET_SM4"
6341 "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6342 [(set_attr "type" "crypto_sm4")]
6347 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6348 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6350 [(match_operand:VDQSF 1 "register_operand" "0")
6351 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6352 (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6356 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6357 <nunits> * 2, false);
6358 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6359 <nunits> * 2, false);
6361 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6370 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6371 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6373 [(match_operand:VDQSF 1 "register_operand" "0")
6374 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6375 (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6379 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6380 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6382 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6390 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6391 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6394 (vec_select:<VFMLA_SEL_W>
6395 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6396 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6398 (vec_select:<VFMLA_SEL_W>
6399 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6400 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6401 (match_operand:VDQSF 1 "register_operand" "0")))]
6403 "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6404 [(set_attr "type" "neon_fp_mul_s")]
6407 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6408 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6412 (vec_select:<VFMLA_SEL_W>
6413 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6414 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6416 (vec_select:<VFMLA_SEL_W>
6417 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6418 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6419 (match_operand:VDQSF 1 "register_operand" "0")))]
6421 "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6422 [(set_attr "type" "neon_fp_mul_s")]
6425 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6426 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6429 (vec_select:<VFMLA_SEL_W>
6430 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6431 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6433 (vec_select:<VFMLA_SEL_W>
6434 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6435 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6436 (match_operand:VDQSF 1 "register_operand" "0")))]
6438 "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6439 [(set_attr "type" "neon_fp_mul_s")]
6442 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6443 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6447 (vec_select:<VFMLA_SEL_W>
6448 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6449 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6451 (vec_select:<VFMLA_SEL_W>
6452 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6453 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6454 (match_operand:VDQSF 1 "register_operand" "0")))]
6456 "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6457 [(set_attr "type" "neon_fp_mul_s")]
6460 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6461 [(set (match_operand:V2SF 0 "register_operand" "")
6462 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6463 (match_operand:V4HF 2 "register_operand" "")
6464 (match_operand:V4HF 3 "register_operand" "")
6465 (match_operand:SI 4 "aarch64_imm2" "")]
6469 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6470 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6472 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6481 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6482 [(set (match_operand:V2SF 0 "register_operand" "")
6483 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6484 (match_operand:V4HF 2 "register_operand" "")
6485 (match_operand:V4HF 3 "register_operand" "")
6486 (match_operand:SI 4 "aarch64_imm2" "")]
6490 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6491 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6493 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6501 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6502 [(set (match_operand:V2SF 0 "register_operand" "=w")
6506 (match_operand:V4HF 2 "register_operand" "w")
6507 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6511 (match_operand:V4HF 3 "register_operand" "x")
6512 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6513 (match_operand:V2SF 1 "register_operand" "0")))]
6515 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6516 [(set_attr "type" "neon_fp_mul_s")]
6519 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6520 [(set (match_operand:V2SF 0 "register_operand" "=w")
6525 (match_operand:V4HF 2 "register_operand" "w")
6526 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6530 (match_operand:V4HF 3 "register_operand" "x")
6531 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6532 (match_operand:V2SF 1 "register_operand" "0")))]
6534 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6535 [(set_attr "type" "neon_fp_mul_s")]
6538 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6539 [(set (match_operand:V2SF 0 "register_operand" "=w")
6543 (match_operand:V4HF 2 "register_operand" "w")
6544 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6548 (match_operand:V4HF 3 "register_operand" "x")
6549 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6550 (match_operand:V2SF 1 "register_operand" "0")))]
6552 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6553 [(set_attr "type" "neon_fp_mul_s")]
6556 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6557 [(set (match_operand:V2SF 0 "register_operand" "=w")
6562 (match_operand:V4HF 2 "register_operand" "w")
6563 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6567 (match_operand:V4HF 3 "register_operand" "x")
6568 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6569 (match_operand:V2SF 1 "register_operand" "0")))]
6571 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6572 [(set_attr "type" "neon_fp_mul_s")]
6575 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6576 [(set (match_operand:V4SF 0 "register_operand" "")
6577 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6578 (match_operand:V8HF 2 "register_operand" "")
6579 (match_operand:V8HF 3 "register_operand" "")
6580 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6584 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6585 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6587 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6595 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6596 [(set (match_operand:V4SF 0 "register_operand" "")
6597 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6598 (match_operand:V8HF 2 "register_operand" "")
6599 (match_operand:V8HF 3 "register_operand" "")
6600 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6604 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6605 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6607 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6615 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6616 [(set (match_operand:V4SF 0 "register_operand" "=w")
6620 (match_operand:V8HF 2 "register_operand" "w")
6621 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6625 (match_operand:V8HF 3 "register_operand" "x")
6626 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6627 (match_operand:V4SF 1 "register_operand" "0")))]
6629 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6630 [(set_attr "type" "neon_fp_mul_s")]
6633 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6634 [(set (match_operand:V4SF 0 "register_operand" "=w")
6639 (match_operand:V8HF 2 "register_operand" "w")
6640 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6644 (match_operand:V8HF 3 "register_operand" "x")
6645 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6646 (match_operand:V4SF 1 "register_operand" "0")))]
6648 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6649 [(set_attr "type" "neon_fp_mul_s")]
6652 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6653 [(set (match_operand:V4SF 0 "register_operand" "=w")
6657 (match_operand:V8HF 2 "register_operand" "w")
6658 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6662 (match_operand:V8HF 3 "register_operand" "x")
6663 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6664 (match_operand:V4SF 1 "register_operand" "0")))]
6666 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6667 [(set_attr "type" "neon_fp_mul_s")]
6670 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6671 [(set (match_operand:V4SF 0 "register_operand" "=w")
6676 (match_operand:V8HF 2 "register_operand" "w")
6677 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6681 (match_operand:V8HF 3 "register_operand" "x")
6682 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6683 (match_operand:V4SF 1 "register_operand" "0")))]
6685 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6686 [(set_attr "type" "neon_fp_mul_s")]
6689 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6690 [(set (match_operand:V2SF 0 "register_operand" "")
6691 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6692 (match_operand:V4HF 2 "register_operand" "")
6693 (match_operand:V8HF 3 "register_operand" "")
6694 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6698 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6699 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6701 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6710 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6711 [(set (match_operand:V2SF 0 "register_operand" "")
6712 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6713 (match_operand:V4HF 2 "register_operand" "")
6714 (match_operand:V8HF 3 "register_operand" "")
6715 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6719 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6720 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6722 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6731 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6732 [(set (match_operand:V2SF 0 "register_operand" "=w")
6736 (match_operand:V4HF 2 "register_operand" "w")
6737 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6741 (match_operand:V8HF 3 "register_operand" "x")
6742 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6743 (match_operand:V2SF 1 "register_operand" "0")))]
6745 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6746 [(set_attr "type" "neon_fp_mul_s")]
6749 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6750 [(set (match_operand:V2SF 0 "register_operand" "=w")
6755 (match_operand:V4HF 2 "register_operand" "w")
6756 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6760 (match_operand:V8HF 3 "register_operand" "x")
6761 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6762 (match_operand:V2SF 1 "register_operand" "0")))]
6764 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6765 [(set_attr "type" "neon_fp_mul_s")]
6768 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6769 [(set (match_operand:V2SF 0 "register_operand" "=w")
6773 (match_operand:V4HF 2 "register_operand" "w")
6774 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6778 (match_operand:V8HF 3 "register_operand" "x")
6779 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6780 (match_operand:V2SF 1 "register_operand" "0")))]
6782 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6783 [(set_attr "type" "neon_fp_mul_s")]
6786 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6787 [(set (match_operand:V2SF 0 "register_operand" "=w")
6792 (match_operand:V4HF 2 "register_operand" "w")
6793 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6797 (match_operand:V8HF 3 "register_operand" "x")
6798 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6799 (match_operand:V2SF 1 "register_operand" "0")))]
6801 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6802 [(set_attr "type" "neon_fp_mul_s")]
6805 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6806 [(set (match_operand:V4SF 0 "register_operand" "")
6807 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6808 (match_operand:V8HF 2 "register_operand" "")
6809 (match_operand:V4HF 3 "register_operand" "")
6810 (match_operand:SI 4 "aarch64_imm2" "")]
6814 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6815 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6817 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6825 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6826 [(set (match_operand:V4SF 0 "register_operand" "")
6827 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6828 (match_operand:V8HF 2 "register_operand" "")
6829 (match_operand:V4HF 3 "register_operand" "")
6830 (match_operand:SI 4 "aarch64_imm2" "")]
6834 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6835 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6837 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6845 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6846 [(set (match_operand:V4SF 0 "register_operand" "=w")
6850 (match_operand:V8HF 2 "register_operand" "w")
6851 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6855 (match_operand:V4HF 3 "register_operand" "x")
6856 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6857 (match_operand:V4SF 1 "register_operand" "0")))]
6859 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6860 [(set_attr "type" "neon_fp_mul_s")]
6863 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6864 [(set (match_operand:V4SF 0 "register_operand" "=w")
6869 (match_operand:V8HF 2 "register_operand" "w")
6870 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6874 (match_operand:V4HF 3 "register_operand" "x")
6875 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6876 (match_operand:V4SF 1 "register_operand" "0")))]
6878 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6879 [(set_attr "type" "neon_fp_mul_s")]
6882 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6883 [(set (match_operand:V4SF 0 "register_operand" "=w")
6887 (match_operand:V8HF 2 "register_operand" "w")
6888 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6892 (match_operand:V4HF 3 "register_operand" "x")
6893 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6894 (match_operand:V4SF 1 "register_operand" "0")))]
6896 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6897 [(set_attr "type" "neon_fp_mul_s")]
6900 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6901 [(set (match_operand:V4SF 0 "register_operand" "=w")
6906 (match_operand:V8HF 2 "register_operand" "w")
6907 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6911 (match_operand:V4HF 3 "register_operand" "x")
6912 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6913 (match_operand:V4SF 1 "register_operand" "0")))]
6915 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6916 [(set_attr "type" "neon_fp_mul_s")]
6921 (define_insn "aarch64_crypto_pmulldi"
6922 [(set (match_operand:TI 0 "register_operand" "=w")
6923 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
6924 (match_operand:DI 2 "register_operand" "w")]
6926 "TARGET_SIMD && TARGET_AES"
6927 "pmull\\t%0.1q, %1.1d, %2.1d"
6928 [(set_attr "type" "crypto_pmull")]
6931 (define_insn "aarch64_crypto_pmullv2di"
6932 [(set (match_operand:TI 0 "register_operand" "=w")
6933 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
6934 (match_operand:V2DI 2 "register_operand" "w")]
6936 "TARGET_SIMD && TARGET_AES"
6937 "pmull2\\t%0.1q, %1.2d, %2.2d"
6938 [(set_attr "type" "crypto_pmull")]