1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2017 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16 1 "general_operand" ""))]
26 if (GET_CODE (operands[0]) == MEM
27 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
28 && aarch64_legitimate_address_p (<MODE>mode, operands[0],
30 operands[1] = force_reg (<MODE>mode, operands[1]);
34 (define_expand "movmisalign<mode>"
35 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
36 (match_operand:VALL 1 "general_operand" ""))]
39 /* This pattern is not permitted to fail during expansion: if both arguments
40 are non-registers (e.g. memory := constant, which can be created by the
41 auto-vectorizer), force operand 1 into a register. */
42 if (!register_operand (operands[0], <MODE>mode)
43 && !register_operand (operands[1], <MODE>mode))
44 operands[1] = force_reg (<MODE>mode, operands[1]);
47 (define_insn "aarch64_simd_dup<mode>"
48 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
50 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
53 dup\\t%0.<Vtype>, %1.<Vetype>[0]
54 dup\\t%0.<Vtype>, %<vw>1"
55 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
58 (define_insn "aarch64_simd_dup<mode>"
59 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
60 (vec_duplicate:VDQF_F16
61 (match_operand:<VEL> 1 "register_operand" "w")))]
63 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
64 [(set_attr "type" "neon_dup<q>")]
67 (define_insn "aarch64_dup_lane<mode>"
68 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
69 (vec_duplicate:VALL_F16
71 (match_operand:VALL_F16 1 "register_operand" "w")
72 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
76 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
77 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
79 [(set_attr "type" "neon_dup<q>")]
82 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
83 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
84 (vec_duplicate:VALL_F16_NO_V2Q
86 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
87 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
91 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
92 INTVAL (operands[2])));
93 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
95 [(set_attr "type" "neon_dup<q>")]
98 (define_insn "*aarch64_simd_mov<mode>"
99 [(set (match_operand:VD 0 "nonimmediate_operand"
100 "=w, m, m, w, ?r, ?w, ?r, w")
101 (match_operand:VD 1 "general_operand"
102 "m, Dz, w, w, w, r, r, Dn"))]
104 && (register_operand (operands[0], <MODE>mode)
105 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
107 switch (which_alternative)
109 case 0: return "ldr\t%d0, %1";
110 case 1: return "str\txzr, %0";
111 case 2: return "str\t%d1, %0";
112 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
113 case 4: return "umov\t%0, %1.d[0]";
114 case 5: return "fmov\t%d0, %1";
115 case 6: return "mov\t%0, %1";
117 return aarch64_output_simd_mov_immediate (operands[1],
119 default: gcc_unreachable ();
122 [(set_attr "type" "neon_load1_1reg<q>, neon_stp, neon_store1_1reg<q>,\
123 neon_logic<q>, neon_to_gp<q>, f_mcr,\
124 mov_reg, neon_move<q>")]
127 (define_insn "*aarch64_simd_mov<mode>"
128 [(set (match_operand:VQ 0 "nonimmediate_operand"
129 "=w, Ump, m, w, ?r, ?w, ?r, w")
130 (match_operand:VQ 1 "general_operand"
131 "m, Dz, w, w, w, r, r, Dn"))]
133 && (register_operand (operands[0], <MODE>mode)
134 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
136 switch (which_alternative)
139 return "ldr\t%q0, %1";
141 return "stp\txzr, xzr, %0";
143 return "str\t%q1, %0";
145 return "mov\t%0.<Vbtype>, %1.<Vbtype>";
151 return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
156 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
157 neon_stp, neon_logic<q>, multiple, multiple,\
158 multiple, neon_move<q>")
159 (set_attr "length" "4,4,4,4,8,8,8,4")]
162 ;; When storing lane zero we can use the normal STR and its more permissive
165 (define_insn "aarch64_store_lane0<mode>"
166 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
167 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
168 (parallel [(match_operand 2 "const_int_operand" "n")])))]
170 && ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])) == 0"
171 "str\\t%<Vetype>1, %0"
172 [(set_attr "type" "neon_store1_1reg<q>")]
175 (define_insn "load_pair<mode>"
176 [(set (match_operand:VD 0 "register_operand" "=w")
177 (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
178 (set (match_operand:VD 2 "register_operand" "=w")
179 (match_operand:VD 3 "memory_operand" "m"))]
181 && rtx_equal_p (XEXP (operands[3], 0),
182 plus_constant (Pmode,
183 XEXP (operands[1], 0),
184 GET_MODE_SIZE (<MODE>mode)))"
186 [(set_attr "type" "neon_ldp")]
189 (define_insn "store_pair<mode>"
190 [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
191 (match_operand:VD 1 "register_operand" "w"))
192 (set (match_operand:VD 2 "memory_operand" "=m")
193 (match_operand:VD 3 "register_operand" "w"))]
195 && rtx_equal_p (XEXP (operands[2], 0),
196 plus_constant (Pmode,
197 XEXP (operands[0], 0),
198 GET_MODE_SIZE (<MODE>mode)))"
200 [(set_attr "type" "neon_stp")]
204 [(set (match_operand:VQ 0 "register_operand" "")
205 (match_operand:VQ 1 "register_operand" ""))]
206 "TARGET_SIMD && reload_completed
207 && GP_REGNUM_P (REGNO (operands[0]))
208 && GP_REGNUM_P (REGNO (operands[1]))"
211 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
216 [(set (match_operand:VQ 0 "register_operand" "")
217 (match_operand:VQ 1 "register_operand" ""))]
218 "TARGET_SIMD && reload_completed
219 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
220 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
223 aarch64_split_simd_move (operands[0], operands[1]);
227 (define_expand "aarch64_split_simd_mov<mode>"
228 [(set (match_operand:VQ 0)
229 (match_operand:VQ 1))]
232 rtx dst = operands[0];
233 rtx src = operands[1];
235 if (GP_REGNUM_P (REGNO (src)))
237 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
238 rtx src_high_part = gen_highpart (<VHALF>mode, src);
241 (gen_move_lo_quad_<mode> (dst, src_low_part));
243 (gen_move_hi_quad_<mode> (dst, src_high_part));
248 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
249 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
250 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
251 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
254 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
256 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
262 (define_insn "aarch64_simd_mov_from_<mode>low"
263 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
265 (match_operand:VQ 1 "register_operand" "w")
266 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
267 "TARGET_SIMD && reload_completed"
269 [(set_attr "type" "neon_to_gp<q>")
270 (set_attr "length" "4")
273 (define_insn "aarch64_simd_mov_from_<mode>high"
274 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
276 (match_operand:VQ 1 "register_operand" "w")
277 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
278 "TARGET_SIMD && reload_completed"
280 [(set_attr "type" "neon_to_gp<q>")
281 (set_attr "length" "4")
284 (define_insn "orn<mode>3"
285 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
286 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
287 (match_operand:VDQ_I 2 "register_operand" "w")))]
289 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
290 [(set_attr "type" "neon_logic<q>")]
293 (define_insn "bic<mode>3"
294 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
295 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
296 (match_operand:VDQ_I 2 "register_operand" "w")))]
298 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
299 [(set_attr "type" "neon_logic<q>")]
302 (define_insn "add<mode>3"
303 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
304 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
305 (match_operand:VDQ_I 2 "register_operand" "w")))]
307 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
308 [(set_attr "type" "neon_add<q>")]
311 (define_insn "sub<mode>3"
312 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
313 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
314 (match_operand:VDQ_I 2 "register_operand" "w")))]
316 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
317 [(set_attr "type" "neon_sub<q>")]
320 (define_insn "mul<mode>3"
321 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
322 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
323 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
325 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
326 [(set_attr "type" "neon_mul_<Vetype><q>")]
329 (define_insn "bswap<mode>2"
330 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
331 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
333 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
334 [(set_attr "type" "neon_rev<q>")]
337 (define_insn "aarch64_rbit<mode>"
338 [(set (match_operand:VB 0 "register_operand" "=w")
339 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
342 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
343 [(set_attr "type" "neon_rbit")]
346 (define_expand "ctz<mode>2"
347 [(set (match_operand:VS 0 "register_operand")
348 (ctz:VS (match_operand:VS 1 "register_operand")))]
351 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
352 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
354 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
355 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
360 (define_expand "xorsign<mode>3"
361 [(match_operand:VHSDF 0 "register_operand")
362 (match_operand:VHSDF 1 "register_operand")
363 (match_operand:VHSDF 2 "register_operand")]
367 machine_mode imode = <V_INT_EQUIV>mode;
368 rtx v_bitmask = gen_reg_rtx (imode);
369 rtx op1x = gen_reg_rtx (imode);
370 rtx op2x = gen_reg_rtx (imode);
372 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
373 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
375 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
377 emit_move_insn (v_bitmask,
378 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
379 HOST_WIDE_INT_M1U << bits));
381 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
382 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
383 emit_move_insn (operands[0],
384 lowpart_subreg (<MODE>mode, op1x, imode));
389 (define_expand "copysign<mode>3"
390 [(match_operand:VHSDF 0 "register_operand")
391 (match_operand:VHSDF 1 "register_operand")
392 (match_operand:VHSDF 2 "register_operand")]
393 "TARGET_FLOAT && TARGET_SIMD"
395 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
396 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
398 emit_move_insn (v_bitmask,
399 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
400 HOST_WIDE_INT_M1U << bits));
401 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
402 operands[2], operands[1]));
407 (define_insn "*aarch64_mul3_elt<mode>"
408 [(set (match_operand:VMUL 0 "register_operand" "=w")
412 (match_operand:VMUL 1 "register_operand" "<h_con>")
413 (parallel [(match_operand:SI 2 "immediate_operand")])))
414 (match_operand:VMUL 3 "register_operand" "w")))]
417 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
418 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
420 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
423 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
424 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
425 (mult:VMUL_CHANGE_NLANES
426 (vec_duplicate:VMUL_CHANGE_NLANES
428 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
429 (parallel [(match_operand:SI 2 "immediate_operand")])))
430 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
433 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
434 INTVAL (operands[2])));
435 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
437 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
440 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
441 [(set (match_operand:VMUL 0 "register_operand" "=w")
444 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
445 (match_operand:VMUL 2 "register_operand" "w")))]
447 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
448 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
451 (define_insn "aarch64_rsqrte<mode>"
452 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
453 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
456 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
457 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
459 (define_insn "aarch64_rsqrts<mode>"
460 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
461 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
462 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
465 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
466 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
468 (define_expand "rsqrt<mode>2"
469 [(set (match_operand:VALLF 0 "register_operand" "=w")
470 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
474 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
478 (define_insn "*aarch64_mul3_elt_to_64v2df"
479 [(set (match_operand:DF 0 "register_operand" "=w")
482 (match_operand:V2DF 1 "register_operand" "w")
483 (parallel [(match_operand:SI 2 "immediate_operand")]))
484 (match_operand:DF 3 "register_operand" "w")))]
487 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
488 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
490 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
493 (define_insn "neg<mode>2"
494 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
495 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
497 "neg\t%0.<Vtype>, %1.<Vtype>"
498 [(set_attr "type" "neon_neg<q>")]
501 (define_insn "abs<mode>2"
502 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
503 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
505 "abs\t%0.<Vtype>, %1.<Vtype>"
506 [(set_attr "type" "neon_abs<q>")]
509 ;; The intrinsic version of integer ABS must not be allowed to
510 ;; combine with any operation with an integerated ABS step, such
512 (define_insn "aarch64_abs<mode>"
513 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
515 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
518 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
519 [(set_attr "type" "neon_abs<q>")]
522 (define_insn "abd<mode>_3"
523 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
524 (abs:VDQ_BHSI (minus:VDQ_BHSI
525 (match_operand:VDQ_BHSI 1 "register_operand" "w")
526 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
528 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
529 [(set_attr "type" "neon_abd<q>")]
532 (define_insn "aba<mode>_3"
533 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
534 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
535 (match_operand:VDQ_BHSI 1 "register_operand" "w")
536 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
537 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
539 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
540 [(set_attr "type" "neon_arith_acc<q>")]
543 (define_insn "fabd<mode>3"
544 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
547 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
548 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
550 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
551 [(set_attr "type" "neon_fp_abd_<stype><q>")]
554 (define_insn "and<mode>3"
555 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
556 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
557 (match_operand:VDQ_I 2 "register_operand" "w")))]
559 "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
560 [(set_attr "type" "neon_logic<q>")]
563 (define_insn "ior<mode>3"
564 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
565 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
566 (match_operand:VDQ_I 2 "register_operand" "w")))]
568 "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
569 [(set_attr "type" "neon_logic<q>")]
572 (define_insn "xor<mode>3"
573 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
574 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
575 (match_operand:VDQ_I 2 "register_operand" "w")))]
577 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
578 [(set_attr "type" "neon_logic<q>")]
581 (define_insn "one_cmpl<mode>2"
582 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
583 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
585 "not\t%0.<Vbtype>, %1.<Vbtype>"
586 [(set_attr "type" "neon_logic<q>")]
589 (define_insn "aarch64_simd_vec_set<mode>"
590 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
592 (vec_duplicate:VDQ_BHSI
593 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
594 (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
595 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
598 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
599 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
600 switch (which_alternative)
603 return "ins\\t%0.<Vetype>[%p2], %w1";
605 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
607 return "ld1\\t{%0.<Vetype>}[%p2], %1";
612 [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_one_lane<q>")]
615 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
616 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
618 (vec_duplicate:VALL_F16
620 (match_operand:VALL_F16 3 "register_operand" "w")
622 [(match_operand:SI 4 "immediate_operand" "i")])))
623 (match_operand:VALL_F16 1 "register_operand" "0")
624 (match_operand:SI 2 "immediate_operand" "i")))]
627 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
628 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
629 operands[4] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[4])));
631 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
633 [(set_attr "type" "neon_ins<q>")]
636 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
637 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
638 (vec_merge:VALL_F16_NO_V2Q
639 (vec_duplicate:VALL_F16_NO_V2Q
641 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
643 [(match_operand:SI 4 "immediate_operand" "i")])))
644 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
645 (match_operand:SI 2 "immediate_operand" "i")))]
648 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
649 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
650 operands[4] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
651 INTVAL (operands[4])));
653 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
655 [(set_attr "type" "neon_ins<q>")]
658 (define_insn "aarch64_simd_lshr<mode>"
659 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
660 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
661 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
663 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
664 [(set_attr "type" "neon_shift_imm<q>")]
667 (define_insn "aarch64_simd_ashr<mode>"
668 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
669 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
670 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
672 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
673 [(set_attr "type" "neon_shift_imm<q>")]
676 (define_insn "aarch64_simd_imm_shl<mode>"
677 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
678 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
679 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
681 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
682 [(set_attr "type" "neon_shift_imm<q>")]
685 (define_insn "aarch64_simd_reg_sshl<mode>"
686 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
687 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
688 (match_operand:VDQ_I 2 "register_operand" "w")))]
690 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
691 [(set_attr "type" "neon_shift_reg<q>")]
694 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
695 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
696 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
697 (match_operand:VDQ_I 2 "register_operand" "w")]
698 UNSPEC_ASHIFT_UNSIGNED))]
700 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
701 [(set_attr "type" "neon_shift_reg<q>")]
704 (define_insn "aarch64_simd_reg_shl<mode>_signed"
705 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
706 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
707 (match_operand:VDQ_I 2 "register_operand" "w")]
708 UNSPEC_ASHIFT_SIGNED))]
710 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
711 [(set_attr "type" "neon_shift_reg<q>")]
714 (define_expand "ashl<mode>3"
715 [(match_operand:VDQ_I 0 "register_operand" "")
716 (match_operand:VDQ_I 1 "register_operand" "")
717 (match_operand:SI 2 "general_operand" "")]
720 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
723 if (CONST_INT_P (operands[2]))
725 shift_amount = INTVAL (operands[2]);
726 if (shift_amount >= 0 && shift_amount < bit_width)
728 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
730 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
737 operands[2] = force_reg (SImode, operands[2]);
740 else if (MEM_P (operands[2]))
742 operands[2] = force_reg (SImode, operands[2]);
745 if (REG_P (operands[2]))
747 rtx tmp = gen_reg_rtx (<MODE>mode);
748 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
749 convert_to_mode (<VEL>mode,
752 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
761 (define_expand "lshr<mode>3"
762 [(match_operand:VDQ_I 0 "register_operand" "")
763 (match_operand:VDQ_I 1 "register_operand" "")
764 (match_operand:SI 2 "general_operand" "")]
767 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
770 if (CONST_INT_P (operands[2]))
772 shift_amount = INTVAL (operands[2]);
773 if (shift_amount > 0 && shift_amount <= bit_width)
775 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
777 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
783 operands[2] = force_reg (SImode, operands[2]);
785 else if (MEM_P (operands[2]))
787 operands[2] = force_reg (SImode, operands[2]);
790 if (REG_P (operands[2]))
792 rtx tmp = gen_reg_rtx (SImode);
793 rtx tmp1 = gen_reg_rtx (<MODE>mode);
794 emit_insn (gen_negsi2 (tmp, operands[2]));
795 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
796 convert_to_mode (<VEL>mode,
798 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
808 (define_expand "ashr<mode>3"
809 [(match_operand:VDQ_I 0 "register_operand" "")
810 (match_operand:VDQ_I 1 "register_operand" "")
811 (match_operand:SI 2 "general_operand" "")]
814 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
817 if (CONST_INT_P (operands[2]))
819 shift_amount = INTVAL (operands[2]);
820 if (shift_amount > 0 && shift_amount <= bit_width)
822 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
824 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
830 operands[2] = force_reg (SImode, operands[2]);
832 else if (MEM_P (operands[2]))
834 operands[2] = force_reg (SImode, operands[2]);
837 if (REG_P (operands[2]))
839 rtx tmp = gen_reg_rtx (SImode);
840 rtx tmp1 = gen_reg_rtx (<MODE>mode);
841 emit_insn (gen_negsi2 (tmp, operands[2]));
842 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
843 convert_to_mode (<VEL>mode,
845 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
855 (define_expand "vashl<mode>3"
856 [(match_operand:VDQ_I 0 "register_operand" "")
857 (match_operand:VDQ_I 1 "register_operand" "")
858 (match_operand:VDQ_I 2 "register_operand" "")]
861 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
866 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
867 ;; Negating individual lanes most certainly offsets the
868 ;; gain from vectorization.
869 (define_expand "vashr<mode>3"
870 [(match_operand:VDQ_BHSI 0 "register_operand" "")
871 (match_operand:VDQ_BHSI 1 "register_operand" "")
872 (match_operand:VDQ_BHSI 2 "register_operand" "")]
875 rtx neg = gen_reg_rtx (<MODE>mode);
876 emit (gen_neg<mode>2 (neg, operands[2]));
877 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
883 (define_expand "aarch64_ashr_simddi"
884 [(match_operand:DI 0 "register_operand" "=w")
885 (match_operand:DI 1 "register_operand" "w")
886 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
889 /* An arithmetic shift right by 64 fills the result with copies of the sign
890 bit, just like asr by 63 - however the standard pattern does not handle
892 if (INTVAL (operands[2]) == 64)
893 operands[2] = GEN_INT (63);
894 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
899 (define_expand "vlshr<mode>3"
900 [(match_operand:VDQ_BHSI 0 "register_operand" "")
901 (match_operand:VDQ_BHSI 1 "register_operand" "")
902 (match_operand:VDQ_BHSI 2 "register_operand" "")]
905 rtx neg = gen_reg_rtx (<MODE>mode);
906 emit (gen_neg<mode>2 (neg, operands[2]));
907 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
912 (define_expand "aarch64_lshr_simddi"
913 [(match_operand:DI 0 "register_operand" "=w")
914 (match_operand:DI 1 "register_operand" "w")
915 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
918 if (INTVAL (operands[2]) == 64)
919 emit_move_insn (operands[0], const0_rtx);
921 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
926 (define_expand "vec_set<mode>"
927 [(match_operand:VDQ_BHSI 0 "register_operand")
928 (match_operand:<VEL> 1 "register_operand")
929 (match_operand:SI 2 "immediate_operand")]
932 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
933 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
934 GEN_INT (elem), operands[0]));
939 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
940 (define_insn "vec_shr_<mode>"
941 [(set (match_operand:VD 0 "register_operand" "=w")
942 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
943 (match_operand:SI 2 "immediate_operand" "i")]
947 if (BYTES_BIG_ENDIAN)
948 return "shl %d0, %d1, %2";
950 return "ushr %d0, %d1, %2";
952 [(set_attr "type" "neon_shift_imm")]
955 (define_insn "aarch64_simd_vec_setv2di"
956 [(set (match_operand:V2DI 0 "register_operand" "=w,w")
959 (match_operand:DI 1 "register_operand" "r,w"))
960 (match_operand:V2DI 3 "register_operand" "0,0")
961 (match_operand:SI 2 "immediate_operand" "i,i")))]
964 int elt = ENDIAN_LANE_N (V2DImode, exact_log2 (INTVAL (operands[2])));
965 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
966 switch (which_alternative)
969 return "ins\\t%0.d[%p2], %1";
971 return "ins\\t%0.d[%p2], %1.d[0]";
976 [(set_attr "type" "neon_from_gp, neon_ins_q")]
979 (define_expand "vec_setv2di"
980 [(match_operand:V2DI 0 "register_operand")
981 (match_operand:DI 1 "register_operand")
982 (match_operand:SI 2 "immediate_operand")]
985 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
986 emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
987 GEN_INT (elem), operands[0]));
992 (define_insn "aarch64_simd_vec_set<mode>"
993 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
995 (vec_duplicate:VDQF_F16
996 (match_operand:<VEL> 1 "register_operand" "w"))
997 (match_operand:VDQF_F16 3 "register_operand" "0")
998 (match_operand:SI 2 "immediate_operand" "i")))]
1001 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
1003 operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
1004 return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
1006 [(set_attr "type" "neon_ins<q>")]
1009 (define_expand "vec_set<mode>"
1010 [(match_operand:VDQF_F16 0 "register_operand" "+w")
1011 (match_operand:<VEL> 1 "register_operand" "w")
1012 (match_operand:SI 2 "immediate_operand" "")]
1015 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1016 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1017 GEN_INT (elem), operands[0]));
1023 (define_insn "aarch64_mla<mode>"
1024 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1025 (plus:VDQ_BHSI (mult:VDQ_BHSI
1026 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1027 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1028 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1030 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1031 [(set_attr "type" "neon_mla_<Vetype><q>")]
1034 (define_insn "*aarch64_mla_elt<mode>"
1035 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1038 (vec_duplicate:VDQHS
1040 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1041 (parallel [(match_operand:SI 2 "immediate_operand")])))
1042 (match_operand:VDQHS 3 "register_operand" "w"))
1043 (match_operand:VDQHS 4 "register_operand" "0")))]
1046 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1047 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1049 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1052 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1053 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1056 (vec_duplicate:VDQHS
1058 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1059 (parallel [(match_operand:SI 2 "immediate_operand")])))
1060 (match_operand:VDQHS 3 "register_operand" "w"))
1061 (match_operand:VDQHS 4 "register_operand" "0")))]
1064 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1065 INTVAL (operands[2])));
1066 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1068 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1071 (define_insn "*aarch64_mla_elt_merge<mode>"
1072 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1074 (mult:VDQHS (vec_duplicate:VDQHS
1075 (match_operand:<VEL> 1 "register_operand" "w"))
1076 (match_operand:VDQHS 2 "register_operand" "w"))
1077 (match_operand:VDQHS 3 "register_operand" "0")))]
1079 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1080 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1083 (define_insn "aarch64_mls<mode>"
1084 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1085 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1086 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1087 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1089 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1090 [(set_attr "type" "neon_mla_<Vetype><q>")]
1093 (define_insn "*aarch64_mls_elt<mode>"
1094 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1096 (match_operand:VDQHS 4 "register_operand" "0")
1098 (vec_duplicate:VDQHS
1100 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1101 (parallel [(match_operand:SI 2 "immediate_operand")])))
1102 (match_operand:VDQHS 3 "register_operand" "w"))))]
1105 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1106 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1108 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1111 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1112 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1114 (match_operand:VDQHS 4 "register_operand" "0")
1116 (vec_duplicate:VDQHS
1118 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1119 (parallel [(match_operand:SI 2 "immediate_operand")])))
1120 (match_operand:VDQHS 3 "register_operand" "w"))))]
1123 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1124 INTVAL (operands[2])));
1125 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1127 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1130 (define_insn "*aarch64_mls_elt_merge<mode>"
1131 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1133 (match_operand:VDQHS 1 "register_operand" "0")
1134 (mult:VDQHS (vec_duplicate:VDQHS
1135 (match_operand:<VEL> 2 "register_operand" "w"))
1136 (match_operand:VDQHS 3 "register_operand" "w"))))]
1138 "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1139 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1142 ;; Max/Min operations.
1143 (define_insn "<su><maxmin><mode>3"
1144 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1145 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1146 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1148 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1149 [(set_attr "type" "neon_minmax<q>")]
1152 (define_expand "<su><maxmin>v2di3"
1153 [(set (match_operand:V2DI 0 "register_operand" "")
1154 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1155 (match_operand:V2DI 2 "register_operand" "")))]
1158 enum rtx_code cmp_operator;
1179 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1180 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1181 operands[2], cmp_fmt, operands[1], operands[2]));
1185 ;; Pairwise Integer Max/Min operations.
1186 (define_insn "aarch64_<maxmin_uns>p<mode>"
1187 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1188 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1189 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1192 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1193 [(set_attr "type" "neon_minmax<q>")]
1196 ;; Pairwise FP Max/Min operations.
1197 (define_insn "aarch64_<maxmin_uns>p<mode>"
1198 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1199 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1200 (match_operand:VHSDF 2 "register_operand" "w")]
1203 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1204 [(set_attr "type" "neon_minmax<q>")]
1207 ;; vec_concat gives a new vector with the low elements from operand 1, and
1208 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1209 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1210 ;; What that means, is that the RTL descriptions of the below patterns
1211 ;; need to change depending on endianness.
1213 ;; Move to the low architectural bits of the register.
1214 ;; On little-endian this is { operand, zeroes }
1215 ;; On big-endian this is { zeroes, operand }
1217 (define_insn "move_lo_quad_internal_<mode>"
1218 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1220 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1221 (vec_duplicate:<VHALF> (const_int 0))))]
1222 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1227 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1228 (set_attr "simd" "yes,*,yes")
1229 (set_attr "fp" "*,yes,*")
1230 (set_attr "length" "4")]
1233 (define_insn "move_lo_quad_internal_<mode>"
1234 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1236 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1238 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1243 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1244 (set_attr "simd" "yes,*,yes")
1245 (set_attr "fp" "*,yes,*")
1246 (set_attr "length" "4")]
1249 (define_insn "move_lo_quad_internal_be_<mode>"
1250 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1252 (vec_duplicate:<VHALF> (const_int 0))
1253 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1254 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1259 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1260 (set_attr "simd" "yes,*,yes")
1261 (set_attr "fp" "*,yes,*")
1262 (set_attr "length" "4")]
1265 (define_insn "move_lo_quad_internal_be_<mode>"
1266 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1269 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1270 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1275 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1276 (set_attr "simd" "yes,*,yes")
1277 (set_attr "fp" "*,yes,*")
1278 (set_attr "length" "4")]
1281 (define_expand "move_lo_quad_<mode>"
1282 [(match_operand:VQ 0 "register_operand")
1283 (match_operand:VQ 1 "register_operand")]
1286 if (BYTES_BIG_ENDIAN)
1287 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1289 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1294 ;; Move operand1 to the high architectural bits of the register, keeping
1295 ;; the low architectural bits of operand2.
1296 ;; For little-endian this is { operand2, operand1 }
1297 ;; For big-endian this is { operand1, operand2 }
1299 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1300 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1304 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1305 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1306 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1308 ins\\t%0.d[1], %1.d[0]
1310 [(set_attr "type" "neon_ins")]
1313 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1314 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1316 (match_operand:<VHALF> 1 "register_operand" "w,r")
1319 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1320 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1322 ins\\t%0.d[1], %1.d[0]
1324 [(set_attr "type" "neon_ins")]
1327 (define_expand "move_hi_quad_<mode>"
1328 [(match_operand:VQ 0 "register_operand" "")
1329 (match_operand:<VHALF> 1 "register_operand" "")]
1332 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1333 if (BYTES_BIG_ENDIAN)
1334 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1337 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1342 ;; Narrowing operations.
1345 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1346 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1347 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1349 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1350 [(set_attr "type" "neon_shift_imm_narrow_q")]
1353 (define_expand "vec_pack_trunc_<mode>"
1354 [(match_operand:<VNARROWD> 0 "register_operand" "")
1355 (match_operand:VDN 1 "register_operand" "")
1356 (match_operand:VDN 2 "register_operand" "")]
1359 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1360 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1361 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1363 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1364 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1365 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1371 (define_insn "vec_pack_trunc_<mode>"
1372 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1373 (vec_concat:<VNARROWQ2>
1374 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1375 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1378 if (BYTES_BIG_ENDIAN)
1379 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1381 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1383 [(set_attr "type" "multiple")
1384 (set_attr "length" "8")]
1387 ;; Widening operations.
1389 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1390 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1391 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1392 (match_operand:VQW 1 "register_operand" "w")
1393 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1396 "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1397 [(set_attr "type" "neon_shift_imm_long")]
1400 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1401 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1402 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1403 (match_operand:VQW 1 "register_operand" "w")
1404 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1407 "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1408 [(set_attr "type" "neon_shift_imm_long")]
1411 (define_expand "vec_unpack<su>_hi_<mode>"
1412 [(match_operand:<VWIDE> 0 "register_operand" "")
1413 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1416 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1417 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1423 (define_expand "vec_unpack<su>_lo_<mode>"
1424 [(match_operand:<VWIDE> 0 "register_operand" "")
1425 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1428 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1429 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1435 ;; Widening arithmetic.
1437 (define_insn "*aarch64_<su>mlal_lo<mode>"
1438 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1441 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1442 (match_operand:VQW 2 "register_operand" "w")
1443 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1444 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1445 (match_operand:VQW 4 "register_operand" "w")
1447 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1449 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1450 [(set_attr "type" "neon_mla_<Vetype>_long")]
1453 (define_insn "*aarch64_<su>mlal_hi<mode>"
1454 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1457 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1458 (match_operand:VQW 2 "register_operand" "w")
1459 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1460 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1461 (match_operand:VQW 4 "register_operand" "w")
1463 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1465 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1466 [(set_attr "type" "neon_mla_<Vetype>_long")]
1469 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1470 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1472 (match_operand:<VWIDE> 1 "register_operand" "0")
1474 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1475 (match_operand:VQW 2 "register_operand" "w")
1476 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1477 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1478 (match_operand:VQW 4 "register_operand" "w")
1481 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1482 [(set_attr "type" "neon_mla_<Vetype>_long")]
1485 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1486 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1488 (match_operand:<VWIDE> 1 "register_operand" "0")
1490 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1491 (match_operand:VQW 2 "register_operand" "w")
1492 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1493 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1494 (match_operand:VQW 4 "register_operand" "w")
1497 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1498 [(set_attr "type" "neon_mla_<Vetype>_long")]
1501 (define_insn "*aarch64_<su>mlal<mode>"
1502 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1506 (match_operand:VD_BHSI 1 "register_operand" "w"))
1508 (match_operand:VD_BHSI 2 "register_operand" "w")))
1509 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1511 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1512 [(set_attr "type" "neon_mla_<Vetype>_long")]
1515 (define_insn "*aarch64_<su>mlsl<mode>"
1516 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1518 (match_operand:<VWIDE> 1 "register_operand" "0")
1521 (match_operand:VD_BHSI 2 "register_operand" "w"))
1523 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1525 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1526 [(set_attr "type" "neon_mla_<Vetype>_long")]
1529 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1530 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1531 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1532 (match_operand:VQW 1 "register_operand" "w")
1533 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1534 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1535 (match_operand:VQW 2 "register_operand" "w")
1538 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1539 [(set_attr "type" "neon_mul_<Vetype>_long")]
1542 (define_expand "vec_widen_<su>mult_lo_<mode>"
1543 [(match_operand:<VWIDE> 0 "register_operand" "")
1544 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1545 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1548 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1549 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1556 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1557 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1558 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1559 (match_operand:VQW 1 "register_operand" "w")
1560 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1561 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1562 (match_operand:VQW 2 "register_operand" "w")
1565 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1566 [(set_attr "type" "neon_mul_<Vetype>_long")]
1569 (define_expand "vec_widen_<su>mult_hi_<mode>"
1570 [(match_operand:<VWIDE> 0 "register_operand" "")
1571 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1572 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1575 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1576 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1584 ;; FP vector operations.
1585 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1586 ;; double-precision (64-bit) floating-point data types and arithmetic as
1587 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1588 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1590 ;; Floating-point operations can raise an exception. Vectorizing such
1591 ;; operations are safe because of reasons explained below.
1593 ;; ARMv8 permits an extension to enable trapped floating-point
1594 ;; exception handling, however this is an optional feature. In the
1595 ;; event of a floating-point exception being raised by vectorised
1597 ;; 1. If trapped floating-point exceptions are available, then a trap
1598 ;; will be taken when any lane raises an enabled exception. A trap
1599 ;; handler may determine which lane raised the exception.
1600 ;; 2. Alternatively a sticky exception flag is set in the
1601 ;; floating-point status register (FPSR). Software may explicitly
1602 ;; test the exception flags, in which case the tests will either
1603 ;; prevent vectorisation, allowing precise identification of the
1604 ;; failing operation, or if tested outside of vectorisable regions
1605 ;; then the specific operation and lane are not of interest.
1607 ;; FP arithmetic operations.
1609 (define_insn "add<mode>3"
1610 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1611 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1612 (match_operand:VHSDF 2 "register_operand" "w")))]
1614 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1615 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1618 (define_insn "sub<mode>3"
1619 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1620 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1621 (match_operand:VHSDF 2 "register_operand" "w")))]
1623 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1624 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1627 (define_insn "mul<mode>3"
1628 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1629 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1630 (match_operand:VHSDF 2 "register_operand" "w")))]
1632 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1633 [(set_attr "type" "neon_fp_mul_<stype><q>")]
1636 (define_expand "div<mode>3"
1637 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1638 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1639 (match_operand:VHSDF 2 "register_operand" "w")))]
1642 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1645 operands[1] = force_reg (<MODE>mode, operands[1]);
1648 (define_insn "*div<mode>3"
1649 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1650 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1651 (match_operand:VHSDF 2 "register_operand" "w")))]
1653 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1654 [(set_attr "type" "neon_fp_div_<stype><q>")]
1657 (define_insn "neg<mode>2"
1658 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1659 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1661 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1662 [(set_attr "type" "neon_fp_neg_<stype><q>")]
1665 (define_insn "abs<mode>2"
1666 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1667 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1669 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1670 [(set_attr "type" "neon_fp_abs_<stype><q>")]
1673 (define_insn "fma<mode>4"
1674 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1675 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1676 (match_operand:VHSDF 2 "register_operand" "w")
1677 (match_operand:VHSDF 3 "register_operand" "0")))]
1679 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1680 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1683 (define_insn "*aarch64_fma4_elt<mode>"
1684 [(set (match_operand:VDQF 0 "register_operand" "=w")
1688 (match_operand:VDQF 1 "register_operand" "<h_con>")
1689 (parallel [(match_operand:SI 2 "immediate_operand")])))
1690 (match_operand:VDQF 3 "register_operand" "w")
1691 (match_operand:VDQF 4 "register_operand" "0")))]
1694 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1695 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1697 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1700 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1701 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1703 (vec_duplicate:VDQSF
1705 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1706 (parallel [(match_operand:SI 2 "immediate_operand")])))
1707 (match_operand:VDQSF 3 "register_operand" "w")
1708 (match_operand:VDQSF 4 "register_operand" "0")))]
1711 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1712 INTVAL (operands[2])));
1713 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1715 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1718 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1719 [(set (match_operand:VMUL 0 "register_operand" "=w")
1722 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1723 (match_operand:VMUL 2 "register_operand" "w")
1724 (match_operand:VMUL 3 "register_operand" "0")))]
1726 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1727 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1730 (define_insn "*aarch64_fma4_elt_to_64v2df"
1731 [(set (match_operand:DF 0 "register_operand" "=w")
1734 (match_operand:V2DF 1 "register_operand" "w")
1735 (parallel [(match_operand:SI 2 "immediate_operand")]))
1736 (match_operand:DF 3 "register_operand" "w")
1737 (match_operand:DF 4 "register_operand" "0")))]
1740 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1741 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1743 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1746 (define_insn "fnma<mode>4"
1747 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1749 (match_operand:VHSDF 1 "register_operand" "w")
1751 (match_operand:VHSDF 2 "register_operand" "w"))
1752 (match_operand:VHSDF 3 "register_operand" "0")))]
1754 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1755 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1758 (define_insn "*aarch64_fnma4_elt<mode>"
1759 [(set (match_operand:VDQF 0 "register_operand" "=w")
1762 (match_operand:VDQF 3 "register_operand" "w"))
1765 (match_operand:VDQF 1 "register_operand" "<h_con>")
1766 (parallel [(match_operand:SI 2 "immediate_operand")])))
1767 (match_operand:VDQF 4 "register_operand" "0")))]
1770 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1771 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1773 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1776 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1777 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1780 (match_operand:VDQSF 3 "register_operand" "w"))
1781 (vec_duplicate:VDQSF
1783 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1784 (parallel [(match_operand:SI 2 "immediate_operand")])))
1785 (match_operand:VDQSF 4 "register_operand" "0")))]
1788 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1789 INTVAL (operands[2])));
1790 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1792 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1795 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1796 [(set (match_operand:VMUL 0 "register_operand" "=w")
1799 (match_operand:VMUL 2 "register_operand" "w"))
1801 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1802 (match_operand:VMUL 3 "register_operand" "0")))]
1804 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1805 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1808 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1809 [(set (match_operand:DF 0 "register_operand" "=w")
1812 (match_operand:V2DF 1 "register_operand" "w")
1813 (parallel [(match_operand:SI 2 "immediate_operand")]))
1815 (match_operand:DF 3 "register_operand" "w"))
1816 (match_operand:DF 4 "register_operand" "0")))]
1819 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1820 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1822 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1825 ;; Vector versions of the floating-point frint patterns.
1826 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1827 (define_insn "<frint_pattern><mode>2"
1828 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1829 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1832 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1833 [(set_attr "type" "neon_fp_round_<stype><q>")]
1836 ;; Vector versions of the fcvt standard patterns.
1837 ;; Expands to lbtrunc, lround, lceil, lfloor
1838 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1839 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1840 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1841 [(match_operand:VHSDF 1 "register_operand" "w")]
1844 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1845 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1848 ;; HF Scalar variants of related SIMD instructions.
1849 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1850 [(set (match_operand:HI 0 "register_operand" "=w")
1851 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1853 "TARGET_SIMD_F16INST"
1854 "fcvt<frint_suffix><su>\t%h0, %h1"
1855 [(set_attr "type" "neon_fp_to_int_s")]
1858 (define_insn "<optab>_trunchfhi2"
1859 [(set (match_operand:HI 0 "register_operand" "=w")
1860 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1861 "TARGET_SIMD_F16INST"
1862 "fcvtz<su>\t%h0, %h1"
1863 [(set_attr "type" "neon_fp_to_int_s")]
1866 (define_insn "<optab>hihf2"
1867 [(set (match_operand:HF 0 "register_operand" "=w")
1868 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1869 "TARGET_SIMD_F16INST"
1870 "<su_optab>cvtf\t%h0, %h1"
1871 [(set_attr "type" "neon_int_to_fp_s")]
1874 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1875 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1876 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1878 (match_operand:VDQF 1 "register_operand" "w")
1879 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1882 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1883 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1885 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1887 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1888 output_asm_insn (buf, operands);
1891 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1894 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
1895 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1896 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1897 [(match_operand:VHSDF 1 "register_operand")]
1902 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
1903 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1904 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1905 [(match_operand:VHSDF 1 "register_operand")]
1910 (define_expand "ftrunc<VHSDF:mode>2"
1911 [(set (match_operand:VHSDF 0 "register_operand")
1912 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
1917 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
1918 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1920 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
1922 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
1923 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
1926 ;; Conversions between vectors of floats and doubles.
1927 ;; Contains a mix of patterns to match standard pattern names
1928 ;; and those for intrinsics.
1930 ;; Float widening operations.
1932 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
1933 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1934 (float_extend:<VWIDE> (vec_select:<VHALF>
1935 (match_operand:VQ_HSF 1 "register_operand" "w")
1936 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
1939 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
1940 [(set_attr "type" "neon_fp_cvt_widen_s")]
1943 ;; Convert between fixed-point and floating-point (vector modes)
1945 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
1946 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
1947 (unspec:<VHSDF:FCVT_TARGET>
1948 [(match_operand:VHSDF 1 "register_operand" "w")
1949 (match_operand:SI 2 "immediate_operand" "i")]
1952 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
1953 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
1956 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
1957 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
1958 (unspec:<VDQ_HSDI:FCVT_TARGET>
1959 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
1960 (match_operand:SI 2 "immediate_operand" "i")]
1963 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
1964 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
1967 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
1968 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
1969 ;; the meaning of HI and LO changes depending on the target endianness.
1970 ;; While elsewhere we map the higher numbered elements of a vector to
1971 ;; the lower architectural lanes of the vector, for these patterns we want
1972 ;; to always treat "hi" as referring to the higher architectural lanes.
1973 ;; Consequently, while the patterns below look inconsistent with our
1974 ;; other big-endian patterns their behavior is as required.
1976 (define_expand "vec_unpacks_lo_<mode>"
1977 [(match_operand:<VWIDE> 0 "register_operand" "")
1978 (match_operand:VQ_HSF 1 "register_operand" "")]
1981 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1982 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
1988 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
1989 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1990 (float_extend:<VWIDE> (vec_select:<VHALF>
1991 (match_operand:VQ_HSF 1 "register_operand" "w")
1992 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
1995 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
1996 [(set_attr "type" "neon_fp_cvt_widen_s")]
1999 (define_expand "vec_unpacks_hi_<mode>"
2000 [(match_operand:<VWIDE> 0 "register_operand" "")
2001 (match_operand:VQ_HSF 1 "register_operand" "")]
2004 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2005 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2010 (define_insn "aarch64_float_extend_lo_<Vwide>"
2011 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2012 (float_extend:<VWIDE>
2013 (match_operand:VDF 1 "register_operand" "w")))]
2015 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2016 [(set_attr "type" "neon_fp_cvt_widen_s")]
2019 ;; Float narrowing operations.
2021 (define_insn "aarch64_float_truncate_lo_<mode>"
2022 [(set (match_operand:VDF 0 "register_operand" "=w")
2024 (match_operand:<VWIDE> 1 "register_operand" "w")))]
2026 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2027 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2030 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2031 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2033 (match_operand:VDF 1 "register_operand" "0")
2035 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2036 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2037 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2038 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2041 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2042 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2045 (match_operand:<VWIDE> 2 "register_operand" "w"))
2046 (match_operand:VDF 1 "register_operand" "0")))]
2047 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2048 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2049 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2052 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2053 [(match_operand:<VDBL> 0 "register_operand" "=w")
2054 (match_operand:VDF 1 "register_operand" "0")
2055 (match_operand:<VWIDE> 2 "register_operand" "w")]
2058 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2059 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2060 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2061 emit_insn (gen (operands[0], operands[1], operands[2]));
2066 (define_expand "vec_pack_trunc_v2df"
2067 [(set (match_operand:V4SF 0 "register_operand")
2069 (float_truncate:V2SF
2070 (match_operand:V2DF 1 "register_operand"))
2071 (float_truncate:V2SF
2072 (match_operand:V2DF 2 "register_operand"))
2076 rtx tmp = gen_reg_rtx (V2SFmode);
2077 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2078 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2080 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2081 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2082 tmp, operands[hi]));
2087 (define_expand "vec_pack_trunc_df"
2088 [(set (match_operand:V2SF 0 "register_operand")
2091 (match_operand:DF 1 "register_operand"))
2093 (match_operand:DF 2 "register_operand"))
2097 rtx tmp = gen_reg_rtx (V2SFmode);
2098 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2099 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2101 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2102 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2103 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2109 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2111 ;; a = (b < c) ? b : c;
2112 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
2113 ;; either explicitly or indirectly via -ffast-math.
2115 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2116 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2117 ;; operand will be returned when both operands are zero (i.e. they may not
2118 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2119 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2122 (define_insn "<su><maxmin><mode>3"
2123 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2124 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2125 (match_operand:VHSDF 2 "register_operand" "w")))]
2127 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2128 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2131 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2132 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2133 ;; which implement the IEEE fmax ()/fmin () functions.
2134 (define_insn "<maxmin_uns><mode>3"
2135 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2136 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2137 (match_operand:VHSDF 2 "register_operand" "w")]
2140 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2141 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2144 ;; 'across lanes' add.
2146 (define_expand "reduc_plus_scal_<mode>"
2147 [(match_operand:<VEL> 0 "register_operand" "=w")
2148 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2152 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2153 rtx scratch = gen_reg_rtx (<MODE>mode);
2154 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2155 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2160 (define_insn "aarch64_faddp<mode>"
2161 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2162 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2163 (match_operand:VHSDF 2 "register_operand" "w")]
2166 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2167 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2170 (define_insn "aarch64_reduc_plus_internal<mode>"
2171 [(set (match_operand:VDQV 0 "register_operand" "=w")
2172 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2175 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2176 [(set_attr "type" "neon_reduc_add<q>")]
2179 (define_insn "aarch64_reduc_plus_internalv2si"
2180 [(set (match_operand:V2SI 0 "register_operand" "=w")
2181 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2184 "addp\\t%0.2s, %1.2s, %1.2s"
2185 [(set_attr "type" "neon_reduc_add")]
2188 (define_insn "reduc_plus_scal_<mode>"
2189 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2190 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2193 "faddp\\t%<Vetype>0, %1.<Vtype>"
2194 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2197 (define_expand "reduc_plus_scal_v4sf"
2198 [(set (match_operand:SF 0 "register_operand")
2199 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2203 rtx elt = GEN_INT (ENDIAN_LANE_N (V4SFmode, 0));
2204 rtx scratch = gen_reg_rtx (V4SFmode);
2205 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2206 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2207 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2211 (define_insn "clrsb<mode>2"
2212 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2213 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2215 "cls\\t%0.<Vtype>, %1.<Vtype>"
2216 [(set_attr "type" "neon_cls<q>")]
2219 (define_insn "clz<mode>2"
2220 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2221 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2223 "clz\\t%0.<Vtype>, %1.<Vtype>"
2224 [(set_attr "type" "neon_cls<q>")]
2227 (define_insn "popcount<mode>2"
2228 [(set (match_operand:VB 0 "register_operand" "=w")
2229 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2231 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2232 [(set_attr "type" "neon_cnt<q>")]
2235 ;; 'across lanes' max and min ops.
2237 ;; Template for outputting a scalar, so we can create __builtins which can be
2238 ;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin).
2239 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2240 [(match_operand:<VEL> 0 "register_operand")
2241 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2245 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2246 rtx scratch = gen_reg_rtx (<MODE>mode);
2247 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2249 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2254 ;; Likewise for integer cases, signed and unsigned.
2255 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2256 [(match_operand:<VEL> 0 "register_operand")
2257 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2261 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2262 rtx scratch = gen_reg_rtx (<MODE>mode);
2263 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2265 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2270 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2271 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2272 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2275 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2276 [(set_attr "type" "neon_reduc_minmax<q>")]
2279 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2280 [(set (match_operand:V2SI 0 "register_operand" "=w")
2281 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2284 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2285 [(set_attr "type" "neon_reduc_minmax")]
2288 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2289 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2290 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2293 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2294 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2297 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2299 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2302 ;; Thus our BSL is of the form:
2303 ;; op0 = bsl (mask, op2, op3)
2304 ;; We can use any of:
2307 ;; bsl mask, op1, op2
2308 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2309 ;; bit op0, op2, mask
2310 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2311 ;; bif op0, op1, mask
2313 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2314 ;; Some forms of straight-line code may generate the equivalent form
2315 ;; in *aarch64_simd_bsl<mode>_alt.
2317 (define_insn "aarch64_simd_bsl<mode>_internal"
2318 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2322 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2323 (match_operand:VSDQ_I_DI 2 "register_operand" "w,w,0"))
2324 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2325 (match_dup:<V_INT_EQUIV> 3)
2329 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2330 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2331 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2332 [(set_attr "type" "neon_bsl<q>")]
2335 ;; We need this form in addition to the above pattern to match the case
2336 ;; when combine tries merging three insns such that the second operand of
2337 ;; the outer XOR matches the second operand of the inner XOR rather than
2338 ;; the first. The two are equivalent but since recog doesn't try all
2339 ;; permutations of commutative operations, we have to have a separate pattern.
2341 (define_insn "*aarch64_simd_bsl<mode>_alt"
2342 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2346 (match_operand:VSDQ_I_DI 3 "register_operand" "w,w,0")
2347 (match_operand:VSDQ_I_DI 2 "register_operand" "w,0,w"))
2348 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2349 (match_dup:VSDQ_I_DI 2)))]
2352 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2353 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2354 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2355 [(set_attr "type" "neon_bsl<q>")]
2358 (define_expand "aarch64_simd_bsl<mode>"
2359 [(match_operand:VALLDIF 0 "register_operand")
2360 (match_operand:<V_INT_EQUIV> 1 "register_operand")
2361 (match_operand:VALLDIF 2 "register_operand")
2362 (match_operand:VALLDIF 3 "register_operand")]
2365 /* We can't alias operands together if they have different modes. */
2366 rtx tmp = operands[0];
2367 if (FLOAT_MODE_P (<MODE>mode))
2369 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2370 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2371 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2373 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2374 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2378 if (tmp != operands[0])
2379 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2384 (define_expand "vcond_mask_<mode><v_int_equiv>"
2385 [(match_operand:VALLDI 0 "register_operand")
2386 (match_operand:VALLDI 1 "nonmemory_operand")
2387 (match_operand:VALLDI 2 "nonmemory_operand")
2388 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2391 /* If we have (a = (P) ? -1 : 0);
2392 Then we can simply move the generated mask (result must be int). */
2393 if (operands[1] == CONSTM1_RTX (<MODE>mode)
2394 && operands[2] == CONST0_RTX (<MODE>mode))
2395 emit_move_insn (operands[0], operands[3]);
2396 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
2397 else if (operands[1] == CONST0_RTX (<MODE>mode)
2398 && operands[2] == CONSTM1_RTX (<MODE>mode))
2399 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2402 if (!REG_P (operands[1]))
2403 operands[1] = force_reg (<MODE>mode, operands[1]);
2404 if (!REG_P (operands[2]))
2405 operands[2] = force_reg (<MODE>mode, operands[2]);
2406 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2407 operands[1], operands[2]));
2413 ;; Patterns comparing two vectors to produce a mask.
2415 (define_expand "vec_cmp<mode><mode>"
2416 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2417 (match_operator 1 "comparison_operator"
2418 [(match_operand:VSDQ_I_DI 2 "register_operand")
2419 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2422 rtx mask = operands[0];
2423 enum rtx_code code = GET_CODE (operands[1]);
2433 if (operands[3] == CONST0_RTX (<MODE>mode))
2438 if (!REG_P (operands[3]))
2439 operands[3] = force_reg (<MODE>mode, operands[3]);
2447 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2451 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2455 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2459 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2463 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2467 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2471 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2475 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2479 /* Handle NE as !EQ. */
2480 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2481 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2485 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2495 (define_expand "vec_cmp<mode><v_int_equiv>"
2496 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2497 (match_operator 1 "comparison_operator"
2498 [(match_operand:VDQF 2 "register_operand")
2499 (match_operand:VDQF 3 "nonmemory_operand")]))]
2502 int use_zero_form = 0;
2503 enum rtx_code code = GET_CODE (operands[1]);
2504 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2506 rtx (*comparison) (rtx, rtx, rtx) = NULL;
2515 if (operands[3] == CONST0_RTX (<MODE>mode))
2522 if (!REG_P (operands[3]))
2523 operands[3] = force_reg (<MODE>mode, operands[3]);
2533 comparison = gen_aarch64_cmlt<mode>;
2538 std::swap (operands[2], operands[3]);
2542 comparison = gen_aarch64_cmgt<mode>;
2547 comparison = gen_aarch64_cmle<mode>;
2552 std::swap (operands[2], operands[3]);
2556 comparison = gen_aarch64_cmge<mode>;
2560 comparison = gen_aarch64_cmeq<mode>;
2577 /* FCM returns false for lanes which are unordered, so if we use
2578 the inverse of the comparison we actually want to emit, then
2579 invert the result, we will end up with the correct result.
2580 Note that a NE NaN and NaN NE b are true for all a, b.
2582 Our transformations are:
2583 a UNGE b -> !(b GT a)
2584 a UNGT b -> !(b GE a)
2585 a UNLE b -> !(a GT b)
2586 a UNLT b -> !(a GE b)
2587 a NE b -> !(a EQ b) */
2588 gcc_assert (comparison != NULL);
2589 emit_insn (comparison (operands[0], operands[2], operands[3]));
2590 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2598 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2599 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2605 gcc_assert (comparison != NULL);
2606 emit_insn (comparison (operands[0], operands[2], operands[3]));
2610 /* We first check (a > b || b > a) which is !UNEQ, inverting
2611 this result will then give us (a == b || a UNORDERED b). */
2612 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2613 operands[2], operands[3]));
2614 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2615 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2616 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2620 /* Operands are ORDERED iff (a > b || b >= a), so we can compute
2621 UNORDERED as !ORDERED. */
2622 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2623 emit_insn (gen_aarch64_cmge<mode> (operands[0],
2624 operands[3], operands[2]));
2625 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2626 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2630 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2631 emit_insn (gen_aarch64_cmge<mode> (operands[0],
2632 operands[3], operands[2]));
2633 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2643 (define_expand "vec_cmpu<mode><mode>"
2644 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2645 (match_operator 1 "comparison_operator"
2646 [(match_operand:VSDQ_I_DI 2 "register_operand")
2647 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2650 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2651 operands[2], operands[3]));
2655 (define_expand "vcond<mode><mode>"
2656 [(set (match_operand:VALLDI 0 "register_operand")
2657 (if_then_else:VALLDI
2658 (match_operator 3 "comparison_operator"
2659 [(match_operand:VALLDI 4 "register_operand")
2660 (match_operand:VALLDI 5 "nonmemory_operand")])
2661 (match_operand:VALLDI 1 "nonmemory_operand")
2662 (match_operand:VALLDI 2 "nonmemory_operand")))]
2665 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2666 enum rtx_code code = GET_CODE (operands[3]);
2668 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2669 it as well as switch operands 1/2 in order to avoid the additional
2673 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2674 operands[4], operands[5]);
2675 std::swap (operands[1], operands[2]);
2677 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2678 operands[4], operands[5]));
2679 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2680 operands[2], mask));
2685 (define_expand "vcond<v_cmp_mixed><mode>"
2686 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2687 (if_then_else:<V_cmp_mixed>
2688 (match_operator 3 "comparison_operator"
2689 [(match_operand:VDQF_COND 4 "register_operand")
2690 (match_operand:VDQF_COND 5 "nonmemory_operand")])
2691 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2692 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2695 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2696 enum rtx_code code = GET_CODE (operands[3]);
2698 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2699 it as well as switch operands 1/2 in order to avoid the additional
2703 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2704 operands[4], operands[5]);
2705 std::swap (operands[1], operands[2]);
2707 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2708 operands[4], operands[5]));
2709 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
2710 operands[0], operands[1],
2711 operands[2], mask));
2716 (define_expand "vcondu<mode><mode>"
2717 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2718 (if_then_else:VSDQ_I_DI
2719 (match_operator 3 "comparison_operator"
2720 [(match_operand:VSDQ_I_DI 4 "register_operand")
2721 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2722 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2723 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2726 rtx mask = gen_reg_rtx (<MODE>mode);
2727 enum rtx_code code = GET_CODE (operands[3]);
2729 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2730 it as well as switch operands 1/2 in order to avoid the additional
2734 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2735 operands[4], operands[5]);
2736 std::swap (operands[1], operands[2]);
2738 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2739 operands[4], operands[5]));
2740 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2741 operands[2], mask));
2745 (define_expand "vcondu<mode><v_cmp_mixed>"
2746 [(set (match_operand:VDQF 0 "register_operand")
2748 (match_operator 3 "comparison_operator"
2749 [(match_operand:<V_cmp_mixed> 4 "register_operand")
2750 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2751 (match_operand:VDQF 1 "nonmemory_operand")
2752 (match_operand:VDQF 2 "nonmemory_operand")))]
2755 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2756 enum rtx_code code = GET_CODE (operands[3]);
2758 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2759 it as well as switch operands 1/2 in order to avoid the additional
2763 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2764 operands[4], operands[5]);
2765 std::swap (operands[1], operands[2]);
2767 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
2769 operands[4], operands[5]));
2770 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2771 operands[2], mask));
2775 ;; Patterns for AArch64 SIMD Intrinsics.
2777 ;; Lane extraction with sign extension to general purpose register.
2778 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2779 [(set (match_operand:GPI 0 "register_operand" "=r")
2782 (match_operand:VDQQH 1 "register_operand" "w")
2783 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2786 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2787 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
2789 [(set_attr "type" "neon_to_gp<q>")]
2792 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
2793 [(set (match_operand:SI 0 "register_operand" "=r")
2796 (match_operand:VDQQH 1 "register_operand" "w")
2797 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2800 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2801 return "umov\\t%w0, %1.<Vetype>[%2]";
2803 [(set_attr "type" "neon_to_gp<q>")]
2806 ;; Lane extraction of a value, neither sign nor zero extension
2807 ;; is guaranteed so upper bits should be considered undefined.
2808 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
2809 (define_insn "aarch64_get_lane<mode>"
2810 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2812 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
2813 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
2816 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2817 switch (which_alternative)
2820 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2822 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2824 return "st1\\t{%1.<Vetype>}[%2], %0";
2829 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
2832 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2835 (define_insn "*aarch64_combinez<mode>"
2836 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2838 (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")
2839 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")))]
2840 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2845 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2846 (set_attr "simd" "yes,*,yes")
2847 (set_attr "fp" "*,yes,*")]
2850 (define_insn "*aarch64_combinez_be<mode>"
2851 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2853 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")
2854 (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")))]
2855 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2860 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2861 (set_attr "simd" "yes,*,yes")
2862 (set_attr "fp" "*,yes,*")]
2865 (define_expand "aarch64_combine<mode>"
2866 [(match_operand:<VDBL> 0 "register_operand")
2867 (match_operand:VDC 1 "register_operand")
2868 (match_operand:VDC 2 "register_operand")]
2871 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
2877 (define_expand "aarch64_simd_combine<mode>"
2878 [(match_operand:<VDBL> 0 "register_operand")
2879 (match_operand:VDC 1 "register_operand")
2880 (match_operand:VDC 2 "register_operand")]
2883 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
2884 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
2887 [(set_attr "type" "multiple")]
2890 ;; <su><addsub>l<q>.
2892 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
2893 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2894 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2895 (match_operand:VQW 1 "register_operand" "w")
2896 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2897 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2898 (match_operand:VQW 2 "register_operand" "w")
2901 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2902 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2905 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
2906 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2907 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2908 (match_operand:VQW 1 "register_operand" "w")
2909 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2910 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2911 (match_operand:VQW 2 "register_operand" "w")
2914 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2915 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2919 (define_expand "aarch64_saddl2<mode>"
2920 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2921 (match_operand:VQW 1 "register_operand" "w")
2922 (match_operand:VQW 2 "register_operand" "w")]
2925 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2926 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
2931 (define_expand "aarch64_uaddl2<mode>"
2932 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2933 (match_operand:VQW 1 "register_operand" "w")
2934 (match_operand:VQW 2 "register_operand" "w")]
2937 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2938 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
2943 (define_expand "aarch64_ssubl2<mode>"
2944 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2945 (match_operand:VQW 1 "register_operand" "w")
2946 (match_operand:VQW 2 "register_operand" "w")]
2949 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2950 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
2955 (define_expand "aarch64_usubl2<mode>"
2956 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2957 (match_operand:VQW 1 "register_operand" "w")
2958 (match_operand:VQW 2 "register_operand" "w")]
2961 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2962 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
2967 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
2968 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2969 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
2970 (match_operand:VD_BHSI 1 "register_operand" "w"))
2972 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2974 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2975 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2978 ;; <su><addsub>w<q>.
2980 (define_expand "widen_ssum<mode>3"
2981 [(set (match_operand:<VDBLW> 0 "register_operand" "")
2982 (plus:<VDBLW> (sign_extend:<VDBLW>
2983 (match_operand:VQW 1 "register_operand" ""))
2984 (match_operand:<VDBLW> 2 "register_operand" "")))]
2987 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
2988 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
2990 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
2992 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
2997 (define_expand "widen_ssum<mode>3"
2998 [(set (match_operand:<VWIDE> 0 "register_operand" "")
2999 (plus:<VWIDE> (sign_extend:<VWIDE>
3000 (match_operand:VD_BHSI 1 "register_operand" ""))
3001 (match_operand:<VWIDE> 2 "register_operand" "")))]
3004 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3008 (define_expand "widen_usum<mode>3"
3009 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3010 (plus:<VDBLW> (zero_extend:<VDBLW>
3011 (match_operand:VQW 1 "register_operand" ""))
3012 (match_operand:<VDBLW> 2 "register_operand" "")))]
3015 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
3016 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3018 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3020 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3025 (define_expand "widen_usum<mode>3"
3026 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3027 (plus:<VWIDE> (zero_extend:<VWIDE>
3028 (match_operand:VD_BHSI 1 "register_operand" ""))
3029 (match_operand:<VWIDE> 2 "register_operand" "")))]
3032 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3036 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
3037 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3038 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3040 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3042 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3043 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3046 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
3047 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3048 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3051 (match_operand:VQW 2 "register_operand" "w")
3052 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3054 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3055 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3058 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
3059 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3060 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3063 (match_operand:VQW 2 "register_operand" "w")
3064 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3066 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3067 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3070 (define_expand "aarch64_saddw2<mode>"
3071 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3072 (match_operand:<VWIDE> 1 "register_operand" "w")
3073 (match_operand:VQW 2 "register_operand" "w")]
3076 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3077 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3082 (define_expand "aarch64_uaddw2<mode>"
3083 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3084 (match_operand:<VWIDE> 1 "register_operand" "w")
3085 (match_operand:VQW 2 "register_operand" "w")]
3088 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3089 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3095 (define_expand "aarch64_ssubw2<mode>"
3096 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3097 (match_operand:<VWIDE> 1 "register_operand" "w")
3098 (match_operand:VQW 2 "register_operand" "w")]
3101 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3102 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3107 (define_expand "aarch64_usubw2<mode>"
3108 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3109 (match_operand:<VWIDE> 1 "register_operand" "w")
3110 (match_operand:VQW 2 "register_operand" "w")]
3113 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3114 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3119 ;; <su><r>h<addsub>.
3121 (define_insn "aarch64_<sur>h<addsub><mode>"
3122 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3123 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3124 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3127 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3128 [(set_attr "type" "neon_<addsub>_halve<q>")]
3131 ;; <r><addsub>hn<q>.
3133 (define_insn "aarch64_<sur><addsub>hn<mode>"
3134 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3135 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3136 (match_operand:VQN 2 "register_operand" "w")]
3139 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3140 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3143 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3144 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3145 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3146 (match_operand:VQN 2 "register_operand" "w")
3147 (match_operand:VQN 3 "register_operand" "w")]
3150 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3151 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3156 (define_insn "aarch64_pmul<mode>"
3157 [(set (match_operand:VB 0 "register_operand" "=w")
3158 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3159 (match_operand:VB 2 "register_operand" "w")]
3162 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3163 [(set_attr "type" "neon_mul_<Vetype><q>")]
3168 (define_insn "aarch64_fmulx<mode>"
3169 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3171 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3172 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3175 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3176 [(set_attr "type" "neon_fp_mul_<stype>")]
3179 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3181 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3182 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3184 [(match_operand:VDQSF 1 "register_operand" "w")
3185 (vec_duplicate:VDQSF
3187 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3188 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3192 operands[3] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
3193 INTVAL (operands[3])));
3194 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3196 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3199 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3201 (define_insn "*aarch64_mulx_elt<mode>"
3202 [(set (match_operand:VDQF 0 "register_operand" "=w")
3204 [(match_operand:VDQF 1 "register_operand" "w")
3207 (match_operand:VDQF 2 "register_operand" "w")
3208 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3212 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
3213 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3215 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3220 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3221 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3223 [(match_operand:VHSDF 1 "register_operand" "w")
3224 (vec_duplicate:VHSDF
3225 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3228 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3229 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3232 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3233 ;; vmulxd_lane_f64 == vmulx_lane_f64
3234 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3236 (define_insn "*aarch64_vgetfmulx<mode>"
3237 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3239 [(match_operand:<VEL> 1 "register_operand" "w")
3241 (match_operand:VDQF 2 "register_operand" "w")
3242 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3246 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
3247 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3249 [(set_attr "type" "fmul<Vetype>")]
3253 (define_insn "aarch64_<su_optab><optab><mode>"
3254 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3255 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3256 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3258 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3259 [(set_attr "type" "neon_<optab><q>")]
3262 ;; suqadd and usqadd
3264 (define_insn "aarch64_<sur>qadd<mode>"
3265 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3266 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3267 (match_operand:VSDQ_I 2 "register_operand" "w")]
3270 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3271 [(set_attr "type" "neon_qadd<q>")]
3276 (define_insn "aarch64_sqmovun<mode>"
3277 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3278 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3281 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3282 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3285 ;; sqmovn and uqmovn
3287 (define_insn "aarch64_<sur>qmovn<mode>"
3288 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3289 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3292 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3293 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3298 (define_insn "aarch64_s<optab><mode>"
3299 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3301 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3303 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3304 [(set_attr "type" "neon_<optab><q>")]
3309 (define_insn "aarch64_sq<r>dmulh<mode>"
3310 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3312 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3313 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3316 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3317 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3322 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3323 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3325 [(match_operand:VDQHS 1 "register_operand" "w")
3327 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3328 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3332 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3333 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3334 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3337 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3338 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3340 [(match_operand:VDQHS 1 "register_operand" "w")
3342 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3343 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3347 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3348 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3349 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3352 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3353 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3355 [(match_operand:SD_HSI 1 "register_operand" "w")
3357 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3358 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3362 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3363 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3364 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3367 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3368 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3370 [(match_operand:SD_HSI 1 "register_operand" "w")
3372 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3373 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3377 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3378 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3379 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3384 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3385 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3387 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3388 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3389 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3392 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3393 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3396 ;; sqrdml[as]h_lane.
3398 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3399 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3401 [(match_operand:VDQHS 1 "register_operand" "0")
3402 (match_operand:VDQHS 2 "register_operand" "w")
3404 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3405 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3409 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3411 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3413 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3416 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3417 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3419 [(match_operand:SD_HSI 1 "register_operand" "0")
3420 (match_operand:SD_HSI 2 "register_operand" "w")
3422 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3423 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3427 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3429 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3431 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3434 ;; sqrdml[as]h_laneq.
3436 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3437 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3439 [(match_operand:VDQHS 1 "register_operand" "0")
3440 (match_operand:VDQHS 2 "register_operand" "w")
3442 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3443 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3447 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3449 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3451 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3454 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3455 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3457 [(match_operand:SD_HSI 1 "register_operand" "0")
3458 (match_operand:SD_HSI 2 "register_operand" "w")
3460 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3461 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3465 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3467 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3469 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3474 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3475 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3477 (match_operand:<VWIDE> 1 "register_operand" "0")
3480 (sign_extend:<VWIDE>
3481 (match_operand:VSD_HSI 2 "register_operand" "w"))
3482 (sign_extend:<VWIDE>
3483 (match_operand:VSD_HSI 3 "register_operand" "w")))
3486 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3487 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3492 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3493 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3495 (match_operand:<VWIDE> 1 "register_operand" "0")
3498 (sign_extend:<VWIDE>
3499 (match_operand:VD_HSI 2 "register_operand" "w"))
3500 (sign_extend:<VWIDE>
3501 (vec_duplicate:VD_HSI
3503 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3504 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3509 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3511 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3513 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3516 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3517 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3519 (match_operand:<VWIDE> 1 "register_operand" "0")
3522 (sign_extend:<VWIDE>
3523 (match_operand:VD_HSI 2 "register_operand" "w"))
3524 (sign_extend:<VWIDE>
3525 (vec_duplicate:VD_HSI
3527 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3528 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3533 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3535 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3537 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3540 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3541 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3543 (match_operand:<VWIDE> 1 "register_operand" "0")
3546 (sign_extend:<VWIDE>
3547 (match_operand:SD_HSI 2 "register_operand" "w"))
3548 (sign_extend:<VWIDE>
3550 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3551 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3556 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3558 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3560 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3563 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3564 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3566 (match_operand:<VWIDE> 1 "register_operand" "0")
3569 (sign_extend:<VWIDE>
3570 (match_operand:SD_HSI 2 "register_operand" "w"))
3571 (sign_extend:<VWIDE>
3573 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3574 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3579 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3581 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3583 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3588 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3589 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3591 (match_operand:<VWIDE> 1 "register_operand" "0")
3594 (sign_extend:<VWIDE>
3595 (match_operand:VD_HSI 2 "register_operand" "w"))
3596 (sign_extend:<VWIDE>
3597 (vec_duplicate:VD_HSI
3598 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3601 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3602 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3607 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3608 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3610 (match_operand:<VWIDE> 1 "register_operand" "0")
3613 (sign_extend:<VWIDE>
3615 (match_operand:VQ_HSI 2 "register_operand" "w")
3616 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3617 (sign_extend:<VWIDE>
3619 (match_operand:VQ_HSI 3 "register_operand" "w")
3623 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3624 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3627 (define_expand "aarch64_sqdmlal2<mode>"
3628 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3629 (match_operand:<VWIDE> 1 "register_operand" "w")
3630 (match_operand:VQ_HSI 2 "register_operand" "w")
3631 (match_operand:VQ_HSI 3 "register_operand" "w")]
3634 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3635 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3636 operands[2], operands[3], p));
3640 (define_expand "aarch64_sqdmlsl2<mode>"
3641 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3642 (match_operand:<VWIDE> 1 "register_operand" "w")
3643 (match_operand:VQ_HSI 2 "register_operand" "w")
3644 (match_operand:VQ_HSI 3 "register_operand" "w")]
3647 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3648 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3649 operands[2], operands[3], p));
3655 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3656 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3658 (match_operand:<VWIDE> 1 "register_operand" "0")
3661 (sign_extend:<VWIDE>
3663 (match_operand:VQ_HSI 2 "register_operand" "w")
3664 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3665 (sign_extend:<VWIDE>
3666 (vec_duplicate:<VHALF>
3668 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3669 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3674 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3676 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3678 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3681 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3682 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3684 (match_operand:<VWIDE> 1 "register_operand" "0")
3687 (sign_extend:<VWIDE>
3689 (match_operand:VQ_HSI 2 "register_operand" "w")
3690 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3691 (sign_extend:<VWIDE>
3692 (vec_duplicate:<VHALF>
3694 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3695 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3700 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3702 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3704 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3707 (define_expand "aarch64_sqdmlal2_lane<mode>"
3708 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3709 (match_operand:<VWIDE> 1 "register_operand" "w")
3710 (match_operand:VQ_HSI 2 "register_operand" "w")
3711 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3712 (match_operand:SI 4 "immediate_operand" "i")]
3715 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3716 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3717 operands[2], operands[3],
3722 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3723 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3724 (match_operand:<VWIDE> 1 "register_operand" "w")
3725 (match_operand:VQ_HSI 2 "register_operand" "w")
3726 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3727 (match_operand:SI 4 "immediate_operand" "i")]
3730 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3731 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3732 operands[2], operands[3],
3737 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3738 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3739 (match_operand:<VWIDE> 1 "register_operand" "w")
3740 (match_operand:VQ_HSI 2 "register_operand" "w")
3741 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3742 (match_operand:SI 4 "immediate_operand" "i")]
3745 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3746 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3747 operands[2], operands[3],
3752 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3753 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3754 (match_operand:<VWIDE> 1 "register_operand" "w")
3755 (match_operand:VQ_HSI 2 "register_operand" "w")
3756 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3757 (match_operand:SI 4 "immediate_operand" "i")]
3760 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3761 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
3762 operands[2], operands[3],
3767 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
3768 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3770 (match_operand:<VWIDE> 1 "register_operand" "0")
3773 (sign_extend:<VWIDE>
3775 (match_operand:VQ_HSI 2 "register_operand" "w")
3776 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3777 (sign_extend:<VWIDE>
3778 (vec_duplicate:<VHALF>
3779 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3782 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3783 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3786 (define_expand "aarch64_sqdmlal2_n<mode>"
3787 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3788 (match_operand:<VWIDE> 1 "register_operand" "w")
3789 (match_operand:VQ_HSI 2 "register_operand" "w")
3790 (match_operand:<VEL> 3 "register_operand" "w")]
3793 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3794 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
3795 operands[2], operands[3],
3800 (define_expand "aarch64_sqdmlsl2_n<mode>"
3801 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3802 (match_operand:<VWIDE> 1 "register_operand" "w")
3803 (match_operand:VQ_HSI 2 "register_operand" "w")
3804 (match_operand:<VEL> 3 "register_operand" "w")]
3807 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3808 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
3809 operands[2], operands[3],
3816 (define_insn "aarch64_sqdmull<mode>"
3817 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3820 (sign_extend:<VWIDE>
3821 (match_operand:VSD_HSI 1 "register_operand" "w"))
3822 (sign_extend:<VWIDE>
3823 (match_operand:VSD_HSI 2 "register_operand" "w")))
3826 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3827 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
3832 (define_insn "aarch64_sqdmull_lane<mode>"
3833 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3836 (sign_extend:<VWIDE>
3837 (match_operand:VD_HSI 1 "register_operand" "w"))
3838 (sign_extend:<VWIDE>
3839 (vec_duplicate:VD_HSI
3841 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3842 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3847 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3848 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3850 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3853 (define_insn "aarch64_sqdmull_laneq<mode>"
3854 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3857 (sign_extend:<VWIDE>
3858 (match_operand:VD_HSI 1 "register_operand" "w"))
3859 (sign_extend:<VWIDE>
3860 (vec_duplicate:VD_HSI
3862 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3863 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3868 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3869 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3871 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3874 (define_insn "aarch64_sqdmull_lane<mode>"
3875 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3878 (sign_extend:<VWIDE>
3879 (match_operand:SD_HSI 1 "register_operand" "w"))
3880 (sign_extend:<VWIDE>
3882 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3883 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3888 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3889 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3891 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3894 (define_insn "aarch64_sqdmull_laneq<mode>"
3895 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3898 (sign_extend:<VWIDE>
3899 (match_operand:SD_HSI 1 "register_operand" "w"))
3900 (sign_extend:<VWIDE>
3902 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3903 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3908 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3909 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3911 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3916 (define_insn "aarch64_sqdmull_n<mode>"
3917 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3920 (sign_extend:<VWIDE>
3921 (match_operand:VD_HSI 1 "register_operand" "w"))
3922 (sign_extend:<VWIDE>
3923 (vec_duplicate:VD_HSI
3924 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3928 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3929 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3936 (define_insn "aarch64_sqdmull2<mode>_internal"
3937 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3940 (sign_extend:<VWIDE>
3942 (match_operand:VQ_HSI 1 "register_operand" "w")
3943 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3944 (sign_extend:<VWIDE>
3946 (match_operand:VQ_HSI 2 "register_operand" "w")
3951 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3952 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3955 (define_expand "aarch64_sqdmull2<mode>"
3956 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3957 (match_operand:VQ_HSI 1 "register_operand" "w")
3958 (match_operand:VQ_HSI 2 "register_operand" "w")]
3961 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3962 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
3969 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
3970 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3973 (sign_extend:<VWIDE>
3975 (match_operand:VQ_HSI 1 "register_operand" "w")
3976 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3977 (sign_extend:<VWIDE>
3978 (vec_duplicate:<VHALF>
3980 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3981 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3986 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3987 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3989 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3992 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
3993 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3996 (sign_extend:<VWIDE>
3998 (match_operand:VQ_HSI 1 "register_operand" "w")
3999 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4000 (sign_extend:<VWIDE>
4001 (vec_duplicate:<VHALF>
4003 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4004 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4009 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
4010 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4012 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4015 (define_expand "aarch64_sqdmull2_lane<mode>"
4016 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4017 (match_operand:VQ_HSI 1 "register_operand" "w")
4018 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4019 (match_operand:SI 3 "immediate_operand" "i")]
4022 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
4023 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4024 operands[2], operands[3],
4029 (define_expand "aarch64_sqdmull2_laneq<mode>"
4030 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4031 (match_operand:VQ_HSI 1 "register_operand" "w")
4032 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4033 (match_operand:SI 3 "immediate_operand" "i")]
4036 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
4037 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4038 operands[2], operands[3],
4045 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4046 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4049 (sign_extend:<VWIDE>
4051 (match_operand:VQ_HSI 1 "register_operand" "w")
4052 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4053 (sign_extend:<VWIDE>
4054 (vec_duplicate:<VHALF>
4055 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4059 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4060 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4063 (define_expand "aarch64_sqdmull2_n<mode>"
4064 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4065 (match_operand:VQ_HSI 1 "register_operand" "w")
4066 (match_operand:<VEL> 2 "register_operand" "w")]
4069 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
4070 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4077 (define_insn "aarch64_<sur>shl<mode>"
4078 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4080 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4081 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4084 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4085 [(set_attr "type" "neon_shift_reg<q>")]
4091 (define_insn "aarch64_<sur>q<r>shl<mode>"
4092 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4094 [(match_operand:VSDQ_I 1 "register_operand" "w")
4095 (match_operand:VSDQ_I 2 "register_operand" "w")]
4098 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4099 [(set_attr "type" "neon_sat_shift_reg<q>")]
4104 (define_insn "aarch64_<sur>shll_n<mode>"
4105 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4106 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4108 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4112 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4113 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4115 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4117 [(set_attr "type" "neon_shift_imm_long")]
4122 (define_insn "aarch64_<sur>shll2_n<mode>"
4123 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4124 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4125 (match_operand:SI 2 "immediate_operand" "i")]
4129 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4130 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4132 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4134 [(set_attr "type" "neon_shift_imm_long")]
4139 (define_insn "aarch64_<sur>shr_n<mode>"
4140 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4141 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4143 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4146 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4147 [(set_attr "type" "neon_sat_shift_imm<q>")]
4152 (define_insn "aarch64_<sur>sra_n<mode>"
4153 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4154 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4155 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4157 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4160 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4161 [(set_attr "type" "neon_shift_acc<q>")]
4166 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4167 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4168 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4169 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4171 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4174 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4175 [(set_attr "type" "neon_shift_imm<q>")]
4180 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4181 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4182 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4184 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4187 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4188 [(set_attr "type" "neon_sat_shift_imm<q>")]
4194 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4195 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4196 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4198 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4201 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4202 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4206 ;; cm(eq|ge|gt|lt|le)
4207 ;; Note, we have constraints for Dz and Z as different expanders
4208 ;; have different ideas of what should be passed to this pattern.
4210 (define_insn "aarch64_cm<optab><mode>"
4211 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4213 (COMPARISONS:<V_INT_EQUIV>
4214 (match_operand:VDQ_I 1 "register_operand" "w,w")
4215 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4219 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4220 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4221 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4224 (define_insn_and_split "aarch64_cm<optab>di"
4225 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4228 (match_operand:DI 1 "register_operand" "w,w,r")
4229 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4231 (clobber (reg:CC CC_REGNUM))]
4235 [(set (match_operand:DI 0 "register_operand")
4238 (match_operand:DI 1 "register_operand")
4239 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4242 /* If we are in the general purpose register file,
4243 we split to a sequence of comparison and store. */
4244 if (GP_REGNUM_P (REGNO (operands[0]))
4245 && GP_REGNUM_P (REGNO (operands[1])))
4247 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4248 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4249 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4250 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4253 /* Otherwise, we expand to a similar pattern which does not
4254 clobber CC_REGNUM. */
4256 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4259 (define_insn "*aarch64_cm<optab>di"
4260 [(set (match_operand:DI 0 "register_operand" "=w,w")
4263 (match_operand:DI 1 "register_operand" "w,w")
4264 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4266 "TARGET_SIMD && reload_completed"
4268 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4269 cm<optab>\t%d0, %d1, #0"
4270 [(set_attr "type" "neon_compare, neon_compare_zero")]
4275 (define_insn "aarch64_cm<optab><mode>"
4276 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4278 (UCOMPARISONS:<V_INT_EQUIV>
4279 (match_operand:VDQ_I 1 "register_operand" "w")
4280 (match_operand:VDQ_I 2 "register_operand" "w")
4283 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4284 [(set_attr "type" "neon_compare<q>")]
4287 (define_insn_and_split "aarch64_cm<optab>di"
4288 [(set (match_operand:DI 0 "register_operand" "=w,r")
4291 (match_operand:DI 1 "register_operand" "w,r")
4292 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4294 (clobber (reg:CC CC_REGNUM))]
4298 [(set (match_operand:DI 0 "register_operand")
4301 (match_operand:DI 1 "register_operand")
4302 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4305 /* If we are in the general purpose register file,
4306 we split to a sequence of comparison and store. */
4307 if (GP_REGNUM_P (REGNO (operands[0]))
4308 && GP_REGNUM_P (REGNO (operands[1])))
4310 machine_mode mode = CCmode;
4311 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4312 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4313 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4316 /* Otherwise, we expand to a similar pattern which does not
4317 clobber CC_REGNUM. */
4319 [(set_attr "type" "neon_compare,multiple")]
4322 (define_insn "*aarch64_cm<optab>di"
4323 [(set (match_operand:DI 0 "register_operand" "=w")
4326 (match_operand:DI 1 "register_operand" "w")
4327 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4329 "TARGET_SIMD && reload_completed"
4330 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4331 [(set_attr "type" "neon_compare")]
4336 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4337 ;; we don't have any insns using ne, and aarch64_vcond outputs
4338 ;; not (neg (eq (and x y) 0))
4339 ;; which is rewritten by simplify_rtx as
4340 ;; plus (eq (and x y) 0) -1.
4342 (define_insn "aarch64_cmtst<mode>"
4343 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4347 (match_operand:VDQ_I 1 "register_operand" "w")
4348 (match_operand:VDQ_I 2 "register_operand" "w"))
4349 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4350 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4353 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4354 [(set_attr "type" "neon_tst<q>")]
4357 (define_insn_and_split "aarch64_cmtstdi"
4358 [(set (match_operand:DI 0 "register_operand" "=w,r")
4362 (match_operand:DI 1 "register_operand" "w,r")
4363 (match_operand:DI 2 "register_operand" "w,r"))
4365 (clobber (reg:CC CC_REGNUM))]
4369 [(set (match_operand:DI 0 "register_operand")
4373 (match_operand:DI 1 "register_operand")
4374 (match_operand:DI 2 "register_operand"))
4377 /* If we are in the general purpose register file,
4378 we split to a sequence of comparison and store. */
4379 if (GP_REGNUM_P (REGNO (operands[0]))
4380 && GP_REGNUM_P (REGNO (operands[1])))
4382 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4383 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4384 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4385 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4386 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4389 /* Otherwise, we expand to a similar pattern which does not
4390 clobber CC_REGNUM. */
4392 [(set_attr "type" "neon_tst,multiple")]
4395 (define_insn "*aarch64_cmtstdi"
4396 [(set (match_operand:DI 0 "register_operand" "=w")
4400 (match_operand:DI 1 "register_operand" "w")
4401 (match_operand:DI 2 "register_operand" "w"))
4404 "cmtst\t%d0, %d1, %d2"
4405 [(set_attr "type" "neon_tst")]
4408 ;; fcm(eq|ge|gt|le|lt)
4410 (define_insn "aarch64_cm<optab><mode>"
4411 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4413 (COMPARISONS:<V_INT_EQUIV>
4414 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4415 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4419 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4420 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4421 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4425 ;; Note we can also handle what would be fac(le|lt) by
4426 ;; generating fac(ge|gt).
4428 (define_insn "aarch64_fac<optab><mode>"
4429 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4431 (FAC_COMPARISONS:<V_INT_EQUIV>
4433 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4435 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4438 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4439 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4444 (define_insn "aarch64_addp<mode>"
4445 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4447 [(match_operand:VD_BHSI 1 "register_operand" "w")
4448 (match_operand:VD_BHSI 2 "register_operand" "w")]
4451 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4452 [(set_attr "type" "neon_reduc_add<q>")]
4455 (define_insn "aarch64_addpdi"
4456 [(set (match_operand:DI 0 "register_operand" "=w")
4458 [(match_operand:V2DI 1 "register_operand" "w")]
4462 [(set_attr "type" "neon_reduc_add")]
4467 (define_expand "sqrt<mode>2"
4468 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4469 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4472 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4476 (define_insn "*sqrt<mode>2"
4477 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4478 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4480 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4481 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4484 ;; Patterns for vector struct loads and stores.
4486 (define_insn "aarch64_simd_ld2<mode>"
4487 [(set (match_operand:OI 0 "register_operand" "=w")
4488 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4489 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4492 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4493 [(set_attr "type" "neon_load2_2reg<q>")]
4496 (define_insn "aarch64_simd_ld2r<mode>"
4497 [(set (match_operand:OI 0 "register_operand" "=w")
4498 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4499 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4502 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4503 [(set_attr "type" "neon_load2_all_lanes<q>")]
4506 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4507 [(set (match_operand:OI 0 "register_operand" "=w")
4508 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4509 (match_operand:OI 2 "register_operand" "0")
4510 (match_operand:SI 3 "immediate_operand" "i")
4511 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4515 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4516 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4518 [(set_attr "type" "neon_load2_one_lane")]
4521 (define_expand "vec_load_lanesoi<mode>"
4522 [(set (match_operand:OI 0 "register_operand" "=w")
4523 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4524 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4528 if (BYTES_BIG_ENDIAN)
4530 rtx tmp = gen_reg_rtx (OImode);
4531 rtx mask = aarch64_reverse_mask (<MODE>mode);
4532 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4533 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4536 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4540 (define_insn "aarch64_simd_st2<mode>"
4541 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4542 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4543 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4546 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4547 [(set_attr "type" "neon_store2_2reg<q>")]
4550 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4551 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4552 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4553 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4554 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4555 (match_operand:SI 2 "immediate_operand" "i")]
4559 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4560 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4562 [(set_attr "type" "neon_store2_one_lane<q>")]
4565 (define_expand "vec_store_lanesoi<mode>"
4566 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4567 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4568 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4572 if (BYTES_BIG_ENDIAN)
4574 rtx tmp = gen_reg_rtx (OImode);
4575 rtx mask = aarch64_reverse_mask (<MODE>mode);
4576 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4577 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4580 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4584 (define_insn "aarch64_simd_ld3<mode>"
4585 [(set (match_operand:CI 0 "register_operand" "=w")
4586 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4587 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4590 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4591 [(set_attr "type" "neon_load3_3reg<q>")]
4594 (define_insn "aarch64_simd_ld3r<mode>"
4595 [(set (match_operand:CI 0 "register_operand" "=w")
4596 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4597 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4600 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4601 [(set_attr "type" "neon_load3_all_lanes<q>")]
4604 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4605 [(set (match_operand:CI 0 "register_operand" "=w")
4606 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4607 (match_operand:CI 2 "register_operand" "0")
4608 (match_operand:SI 3 "immediate_operand" "i")
4609 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4613 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4614 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4616 [(set_attr "type" "neon_load3_one_lane")]
4619 (define_expand "vec_load_lanesci<mode>"
4620 [(set (match_operand:CI 0 "register_operand" "=w")
4621 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4622 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4626 if (BYTES_BIG_ENDIAN)
4628 rtx tmp = gen_reg_rtx (CImode);
4629 rtx mask = aarch64_reverse_mask (<MODE>mode);
4630 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4631 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4634 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4638 (define_insn "aarch64_simd_st3<mode>"
4639 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4640 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4641 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4644 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4645 [(set_attr "type" "neon_store3_3reg<q>")]
4648 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4649 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4650 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4651 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4652 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4653 (match_operand:SI 2 "immediate_operand" "i")]
4657 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4658 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4660 [(set_attr "type" "neon_store3_one_lane<q>")]
4663 (define_expand "vec_store_lanesci<mode>"
4664 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4665 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4666 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4670 if (BYTES_BIG_ENDIAN)
4672 rtx tmp = gen_reg_rtx (CImode);
4673 rtx mask = aarch64_reverse_mask (<MODE>mode);
4674 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4675 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4678 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4682 (define_insn "aarch64_simd_ld4<mode>"
4683 [(set (match_operand:XI 0 "register_operand" "=w")
4684 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4685 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4688 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4689 [(set_attr "type" "neon_load4_4reg<q>")]
4692 (define_insn "aarch64_simd_ld4r<mode>"
4693 [(set (match_operand:XI 0 "register_operand" "=w")
4694 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4695 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4698 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4699 [(set_attr "type" "neon_load4_all_lanes<q>")]
4702 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4703 [(set (match_operand:XI 0 "register_operand" "=w")
4704 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4705 (match_operand:XI 2 "register_operand" "0")
4706 (match_operand:SI 3 "immediate_operand" "i")
4707 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4711 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4712 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4714 [(set_attr "type" "neon_load4_one_lane")]
4717 (define_expand "vec_load_lanesxi<mode>"
4718 [(set (match_operand:XI 0 "register_operand" "=w")
4719 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4720 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4724 if (BYTES_BIG_ENDIAN)
4726 rtx tmp = gen_reg_rtx (XImode);
4727 rtx mask = aarch64_reverse_mask (<MODE>mode);
4728 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4729 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4732 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4736 (define_insn "aarch64_simd_st4<mode>"
4737 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4738 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4739 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4742 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4743 [(set_attr "type" "neon_store4_4reg<q>")]
4746 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4747 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4748 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4749 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4750 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4751 (match_operand:SI 2 "immediate_operand" "i")]
4755 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4756 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
4758 [(set_attr "type" "neon_store4_one_lane<q>")]
4761 (define_expand "vec_store_lanesxi<mode>"
4762 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4763 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4764 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4768 if (BYTES_BIG_ENDIAN)
4770 rtx tmp = gen_reg_rtx (XImode);
4771 rtx mask = aarch64_reverse_mask (<MODE>mode);
4772 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
4773 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
4776 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
4780 (define_insn_and_split "aarch64_rev_reglist<mode>"
4781 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
4783 [(match_operand:VSTRUCT 1 "register_operand" "w")
4784 (match_operand:V16QI 2 "register_operand" "w")]
4785 UNSPEC_REV_REGLIST))]
4788 "&& reload_completed"
4792 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
4793 for (i = 0; i < nregs; i++)
4795 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
4796 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
4797 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
4801 [(set_attr "type" "neon_tbl1_q")
4802 (set_attr "length" "<insn_count>")]
4805 ;; Reload patterns for AdvSIMD register list operands.
4807 (define_expand "mov<mode>"
4808 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
4809 (match_operand:VSTRUCT 1 "general_operand" ""))]
4812 if (can_create_pseudo_p ())
4814 if (GET_CODE (operands[0]) != REG)
4815 operands[1] = force_reg (<MODE>mode, operands[1]);
4819 (define_insn "*aarch64_mov<mode>"
4820 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
4821 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
4822 "TARGET_SIMD && !BYTES_BIG_ENDIAN
4823 && (register_operand (operands[0], <MODE>mode)
4824 || register_operand (operands[1], <MODE>mode))"
4827 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
4828 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
4829 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
4830 neon_load<nregs>_<nregs>reg_q")
4831 (set_attr "length" "<insn_count>,4,4")]
4834 (define_insn "aarch64_be_ld1<mode>"
4835 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
4836 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
4837 "aarch64_simd_struct_operand" "Utv")]
4840 "ld1\\t{%0<Vmtype>}, %1"
4841 [(set_attr "type" "neon_load1_1reg<q>")]
4844 (define_insn "aarch64_be_st1<mode>"
4845 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
4846 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
4849 "st1\\t{%1<Vmtype>}, %0"
4850 [(set_attr "type" "neon_store1_1reg<q>")]
4853 (define_insn "*aarch64_be_movoi"
4854 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
4855 (match_operand:OI 1 "general_operand" " w,w,m"))]
4856 "TARGET_SIMD && BYTES_BIG_ENDIAN
4857 && (register_operand (operands[0], OImode)
4858 || register_operand (operands[1], OImode))"
4863 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
4864 (set_attr "length" "8,4,4")]
4867 (define_insn "*aarch64_be_movci"
4868 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
4869 (match_operand:CI 1 "general_operand" " w,w,o"))]
4870 "TARGET_SIMD && BYTES_BIG_ENDIAN
4871 && (register_operand (operands[0], CImode)
4872 || register_operand (operands[1], CImode))"
4874 [(set_attr "type" "multiple")
4875 (set_attr "length" "12,4,4")]
4878 (define_insn "*aarch64_be_movxi"
4879 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
4880 (match_operand:XI 1 "general_operand" " w,w,o"))]
4881 "TARGET_SIMD && BYTES_BIG_ENDIAN
4882 && (register_operand (operands[0], XImode)
4883 || register_operand (operands[1], XImode))"
4885 [(set_attr "type" "multiple")
4886 (set_attr "length" "16,4,4")]
4890 [(set (match_operand:OI 0 "register_operand")
4891 (match_operand:OI 1 "register_operand"))]
4892 "TARGET_SIMD && reload_completed"
4895 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
4900 [(set (match_operand:CI 0 "nonimmediate_operand")
4901 (match_operand:CI 1 "general_operand"))]
4902 "TARGET_SIMD && reload_completed"
4905 if (register_operand (operands[0], CImode)
4906 && register_operand (operands[1], CImode))
4908 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
4911 else if (BYTES_BIG_ENDIAN)
4913 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
4914 simplify_gen_subreg (OImode, operands[1], CImode, 0));
4915 emit_move_insn (gen_lowpart (V16QImode,
4916 simplify_gen_subreg (TImode, operands[0],
4918 gen_lowpart (V16QImode,
4919 simplify_gen_subreg (TImode, operands[1],
4928 [(set (match_operand:XI 0 "nonimmediate_operand")
4929 (match_operand:XI 1 "general_operand"))]
4930 "TARGET_SIMD && reload_completed"
4933 if (register_operand (operands[0], XImode)
4934 && register_operand (operands[1], XImode))
4936 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
4939 else if (BYTES_BIG_ENDIAN)
4941 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
4942 simplify_gen_subreg (OImode, operands[1], XImode, 0));
4943 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
4944 simplify_gen_subreg (OImode, operands[1], XImode, 32));
4951 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
4952 [(match_operand:VSTRUCT 0 "register_operand" "=w")
4953 (match_operand:DI 1 "register_operand" "w")
4954 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4957 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
4958 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
4961 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
4966 (define_insn "aarch64_ld2<mode>_dreg"
4967 [(set (match_operand:OI 0 "register_operand" "=w")
4968 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4969 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4972 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4973 [(set_attr "type" "neon_load2_2reg<q>")]
4976 (define_insn "aarch64_ld2<mode>_dreg"
4977 [(set (match_operand:OI 0 "register_operand" "=w")
4978 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4979 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4982 "ld1\\t{%S0.1d - %T0.1d}, %1"
4983 [(set_attr "type" "neon_load1_2reg<q>")]
4986 (define_insn "aarch64_ld3<mode>_dreg"
4987 [(set (match_operand:CI 0 "register_operand" "=w")
4988 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4989 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4992 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4993 [(set_attr "type" "neon_load3_3reg<q>")]
4996 (define_insn "aarch64_ld3<mode>_dreg"
4997 [(set (match_operand:CI 0 "register_operand" "=w")
4998 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4999 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5002 "ld1\\t{%S0.1d - %U0.1d}, %1"
5003 [(set_attr "type" "neon_load1_3reg<q>")]
5006 (define_insn "aarch64_ld4<mode>_dreg"
5007 [(set (match_operand:XI 0 "register_operand" "=w")
5008 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5009 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5012 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5013 [(set_attr "type" "neon_load4_4reg<q>")]
5016 (define_insn "aarch64_ld4<mode>_dreg"
5017 [(set (match_operand:XI 0 "register_operand" "=w")
5018 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5019 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5022 "ld1\\t{%S0.1d - %V0.1d}, %1"
5023 [(set_attr "type" "neon_load1_4reg<q>")]
5026 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5027 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5028 (match_operand:DI 1 "register_operand" "r")
5029 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5032 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5033 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5035 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5039 (define_expand "aarch64_ld1<VALL_F16:mode>"
5040 [(match_operand:VALL_F16 0 "register_operand")
5041 (match_operand:DI 1 "register_operand")]
5044 machine_mode mode = <VALL_F16:MODE>mode;
5045 rtx mem = gen_rtx_MEM (mode, operands[1]);
5047 if (BYTES_BIG_ENDIAN)
5048 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5050 emit_move_insn (operands[0], mem);
5054 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5055 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5056 (match_operand:DI 1 "register_operand" "r")
5057 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5060 machine_mode mode = <VSTRUCT:MODE>mode;
5061 rtx mem = gen_rtx_MEM (mode, operands[1]);
5063 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5067 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5068 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5069 (match_operand:DI 1 "register_operand" "w")
5070 (match_operand:VSTRUCT 2 "register_operand" "0")
5071 (match_operand:SI 3 "immediate_operand" "i")
5072 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5075 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5076 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5079 aarch64_simd_lane_bounds (operands[3], 0,
5080 GET_MODE_NUNITS (<VALLDIF:MODE>mode),
5082 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5083 operands[0], mem, operands[2], operands[3]));
5087 ;; Expanders for builtins to extract vector registers from large
5088 ;; opaque integer modes.
5092 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5093 [(match_operand:VDC 0 "register_operand" "=w")
5094 (match_operand:VSTRUCT 1 "register_operand" "w")
5095 (match_operand:SI 2 "immediate_operand" "i")]
5098 int part = INTVAL (operands[2]);
5099 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5100 int offset = part * 16;
5102 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5103 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5109 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5110 [(match_operand:VQ 0 "register_operand" "=w")
5111 (match_operand:VSTRUCT 1 "register_operand" "w")
5112 (match_operand:SI 2 "immediate_operand" "i")]
5115 int part = INTVAL (operands[2]);
5116 int offset = part * 16;
5118 emit_move_insn (operands[0],
5119 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5123 ;; Permuted-store expanders for neon intrinsics.
5125 ;; Permute instructions
5129 (define_expand "vec_perm_const<mode>"
5130 [(match_operand:VALL_F16 0 "register_operand")
5131 (match_operand:VALL_F16 1 "register_operand")
5132 (match_operand:VALL_F16 2 "register_operand")
5133 (match_operand:<V_INT_EQUIV> 3)]
5136 if (aarch64_expand_vec_perm_const (operands[0], operands[1],
5137 operands[2], operands[3]))
5143 (define_expand "vec_perm<mode>"
5144 [(match_operand:VB 0 "register_operand")
5145 (match_operand:VB 1 "register_operand")
5146 (match_operand:VB 2 "register_operand")
5147 (match_operand:VB 3 "register_operand")]
5150 aarch64_expand_vec_perm (operands[0], operands[1],
5151 operands[2], operands[3]);
5155 (define_insn "aarch64_tbl1<mode>"
5156 [(set (match_operand:VB 0 "register_operand" "=w")
5157 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5158 (match_operand:VB 2 "register_operand" "w")]
5161 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5162 [(set_attr "type" "neon_tbl1<q>")]
5165 ;; Two source registers.
5167 (define_insn "aarch64_tbl2v16qi"
5168 [(set (match_operand:V16QI 0 "register_operand" "=w")
5169 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5170 (match_operand:V16QI 2 "register_operand" "w")]
5173 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5174 [(set_attr "type" "neon_tbl2_q")]
5177 (define_insn "aarch64_tbl3<mode>"
5178 [(set (match_operand:VB 0 "register_operand" "=w")
5179 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5180 (match_operand:VB 2 "register_operand" "w")]
5183 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5184 [(set_attr "type" "neon_tbl3")]
5187 (define_insn "aarch64_tbx4<mode>"
5188 [(set (match_operand:VB 0 "register_operand" "=w")
5189 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5190 (match_operand:OI 2 "register_operand" "w")
5191 (match_operand:VB 3 "register_operand" "w")]
5194 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5195 [(set_attr "type" "neon_tbl4")]
5198 ;; Three source registers.
5200 (define_insn "aarch64_qtbl3<mode>"
5201 [(set (match_operand:VB 0 "register_operand" "=w")
5202 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5203 (match_operand:VB 2 "register_operand" "w")]
5206 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5207 [(set_attr "type" "neon_tbl3")]
5210 (define_insn "aarch64_qtbx3<mode>"
5211 [(set (match_operand:VB 0 "register_operand" "=w")
5212 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5213 (match_operand:CI 2 "register_operand" "w")
5214 (match_operand:VB 3 "register_operand" "w")]
5217 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5218 [(set_attr "type" "neon_tbl3")]
5221 ;; Four source registers.
5223 (define_insn "aarch64_qtbl4<mode>"
5224 [(set (match_operand:VB 0 "register_operand" "=w")
5225 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5226 (match_operand:VB 2 "register_operand" "w")]
5229 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5230 [(set_attr "type" "neon_tbl4")]
5233 (define_insn "aarch64_qtbx4<mode>"
5234 [(set (match_operand:VB 0 "register_operand" "=w")
5235 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5236 (match_operand:XI 2 "register_operand" "w")
5237 (match_operand:VB 3 "register_operand" "w")]
5240 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5241 [(set_attr "type" "neon_tbl4")]
5244 (define_insn_and_split "aarch64_combinev16qi"
5245 [(set (match_operand:OI 0 "register_operand" "=w")
5246 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5247 (match_operand:V16QI 2 "register_operand" "w")]
5251 "&& reload_completed"
5254 aarch64_split_combinev16qi (operands);
5257 [(set_attr "type" "multiple")]
5260 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5261 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5262 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5263 (match_operand:VALL_F16 2 "register_operand" "w")]
5266 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5267 [(set_attr "type" "neon_permute<q>")]
5270 ;; Note immediate (third) operand is lane index not byte index.
5271 (define_insn "aarch64_ext<mode>"
5272 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5273 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5274 (match_operand:VALL_F16 2 "register_operand" "w")
5275 (match_operand:SI 3 "immediate_operand" "i")]
5279 operands[3] = GEN_INT (INTVAL (operands[3])
5280 * GET_MODE_UNIT_SIZE (<MODE>mode));
5281 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5283 [(set_attr "type" "neon_ext<q>")]
5286 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5287 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5288 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5291 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5292 [(set_attr "type" "neon_rev<q>")]
5295 (define_insn "aarch64_st2<mode>_dreg"
5296 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5297 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5298 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5301 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5302 [(set_attr "type" "neon_store2_2reg")]
5305 (define_insn "aarch64_st2<mode>_dreg"
5306 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5307 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5308 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5311 "st1\\t{%S1.1d - %T1.1d}, %0"
5312 [(set_attr "type" "neon_store1_2reg")]
5315 (define_insn "aarch64_st3<mode>_dreg"
5316 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5317 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5318 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5321 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5322 [(set_attr "type" "neon_store3_3reg")]
5325 (define_insn "aarch64_st3<mode>_dreg"
5326 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5327 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5328 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5331 "st1\\t{%S1.1d - %U1.1d}, %0"
5332 [(set_attr "type" "neon_store1_3reg")]
5335 (define_insn "aarch64_st4<mode>_dreg"
5336 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5337 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5338 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5341 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5342 [(set_attr "type" "neon_store4_4reg")]
5345 (define_insn "aarch64_st4<mode>_dreg"
5346 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5347 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5348 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5351 "st1\\t{%S1.1d - %V1.1d}, %0"
5352 [(set_attr "type" "neon_store1_4reg")]
5355 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5356 [(match_operand:DI 0 "register_operand" "r")
5357 (match_operand:VSTRUCT 1 "register_operand" "w")
5358 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5361 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5362 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5364 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5368 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5369 [(match_operand:DI 0 "register_operand" "r")
5370 (match_operand:VSTRUCT 1 "register_operand" "w")
5371 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5374 machine_mode mode = <VSTRUCT:MODE>mode;
5375 rtx mem = gen_rtx_MEM (mode, operands[0]);
5377 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5381 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5382 [(match_operand:DI 0 "register_operand" "r")
5383 (match_operand:VSTRUCT 1 "register_operand" "w")
5384 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5385 (match_operand:SI 2 "immediate_operand")]
5388 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5389 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5392 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5393 mem, operands[1], operands[2]));
5397 (define_expand "aarch64_st1<VALL_F16:mode>"
5398 [(match_operand:DI 0 "register_operand")
5399 (match_operand:VALL_F16 1 "register_operand")]
5402 machine_mode mode = <VALL_F16:MODE>mode;
5403 rtx mem = gen_rtx_MEM (mode, operands[0]);
5405 if (BYTES_BIG_ENDIAN)
5406 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5408 emit_move_insn (mem, operands[1]);
5412 ;; Expander for builtins to insert vector registers into large
5413 ;; opaque integer modes.
5415 ;; Q-register list. We don't need a D-reg inserter as we zero
5416 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5418 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5419 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5420 (match_operand:VSTRUCT 1 "register_operand" "0")
5421 (match_operand:VQ 2 "register_operand" "w")
5422 (match_operand:SI 3 "immediate_operand" "i")]
5425 int part = INTVAL (operands[3]);
5426 int offset = part * 16;
5428 emit_move_insn (operands[0], operands[1]);
5429 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5434 ;; Standard pattern name vec_init<mode><Vel>.
5436 (define_expand "vec_init<mode><Vel>"
5437 [(match_operand:VALL_F16 0 "register_operand" "")
5438 (match_operand 1 "" "")]
5441 aarch64_expand_vector_init (operands[0], operands[1]);
5445 (define_insn "*aarch64_simd_ld1r<mode>"
5446 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5447 (vec_duplicate:VALL_F16
5448 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5450 "ld1r\\t{%0.<Vtype>}, %1"
5451 [(set_attr "type" "neon_load1_all_lanes")]
5454 (define_insn "aarch64_frecpe<mode>"
5455 [(set (match_operand:VHSDF 0 "register_operand" "=w")
5456 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5459 "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5460 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5463 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5464 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5465 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5468 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5469 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5472 (define_insn "aarch64_frecps<mode>"
5473 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5475 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5476 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5479 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5480 [(set_attr "type" "neon_fp_recps_<stype><q>")]
5483 (define_insn "aarch64_urecpe<mode>"
5484 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5485 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5488 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5489 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5491 ;; Standard pattern name vec_extract<mode><Vel>.
5493 (define_expand "vec_extract<mode><Vel>"
5494 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5495 (match_operand:VALL_F16 1 "register_operand" "")
5496 (match_operand:SI 2 "immediate_operand" "")]
5500 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5506 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5507 [(set (match_operand:V16QI 0 "register_operand" "=w")
5508 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5509 (match_operand:V16QI 2 "register_operand" "w")]
5511 "TARGET_SIMD && TARGET_CRYPTO"
5512 "aes<aes_op>\\t%0.16b, %2.16b"
5513 [(set_attr "type" "crypto_aese")]
5516 ;; When AES/AESMC fusion is enabled we want the register allocation to
5520 ;; So prefer to tie operand 1 to operand 0 when fusing.
5522 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5523 [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5524 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5526 "TARGET_SIMD && TARGET_CRYPTO"
5527 "aes<aesmc_op>\\t%0.16b, %1.16b"
5528 [(set_attr "type" "crypto_aesmc")
5529 (set_attr_alternative "enabled"
5530 [(if_then_else (match_test
5531 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5532 (const_string "yes" )
5533 (const_string "no"))
5534 (const_string "yes")])]
5539 (define_insn "aarch64_crypto_sha1hsi"
5540 [(set (match_operand:SI 0 "register_operand" "=w")
5541 (unspec:SI [(match_operand:SI 1
5542 "register_operand" "w")]
5544 "TARGET_SIMD && TARGET_CRYPTO"
5546 [(set_attr "type" "crypto_sha1_fast")]
5549 (define_insn "aarch64_crypto_sha1hv4si"
5550 [(set (match_operand:SI 0 "register_operand" "=w")
5551 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5552 (parallel [(const_int 0)]))]
5554 "TARGET_SIMD && TARGET_CRYPTO && !BYTES_BIG_ENDIAN"
5556 [(set_attr "type" "crypto_sha1_fast")]
5559 (define_insn "aarch64_be_crypto_sha1hv4si"
5560 [(set (match_operand:SI 0 "register_operand" "=w")
5561 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5562 (parallel [(const_int 3)]))]
5564 "TARGET_SIMD && TARGET_CRYPTO && BYTES_BIG_ENDIAN"
5566 [(set_attr "type" "crypto_sha1_fast")]
5569 (define_insn "aarch64_crypto_sha1su1v4si"
5570 [(set (match_operand:V4SI 0 "register_operand" "=w")
5571 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5572 (match_operand:V4SI 2 "register_operand" "w")]
5574 "TARGET_SIMD && TARGET_CRYPTO"
5575 "sha1su1\\t%0.4s, %2.4s"
5576 [(set_attr "type" "crypto_sha1_fast")]
5579 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5580 [(set (match_operand:V4SI 0 "register_operand" "=w")
5581 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5582 (match_operand:SI 2 "register_operand" "w")
5583 (match_operand:V4SI 3 "register_operand" "w")]
5585 "TARGET_SIMD && TARGET_CRYPTO"
5586 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5587 [(set_attr "type" "crypto_sha1_slow")]
5590 (define_insn "aarch64_crypto_sha1su0v4si"
5591 [(set (match_operand:V4SI 0 "register_operand" "=w")
5592 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5593 (match_operand:V4SI 2 "register_operand" "w")
5594 (match_operand:V4SI 3 "register_operand" "w")]
5596 "TARGET_SIMD && TARGET_CRYPTO"
5597 "sha1su0\\t%0.4s, %2.4s, %3.4s"
5598 [(set_attr "type" "crypto_sha1_xor")]
5603 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
5604 [(set (match_operand:V4SI 0 "register_operand" "=w")
5605 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5606 (match_operand:V4SI 2 "register_operand" "w")
5607 (match_operand:V4SI 3 "register_operand" "w")]
5609 "TARGET_SIMD && TARGET_CRYPTO"
5610 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5611 [(set_attr "type" "crypto_sha256_slow")]
5614 (define_insn "aarch64_crypto_sha256su0v4si"
5615 [(set (match_operand:V4SI 0 "register_operand" "=w")
5616 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5617 (match_operand:V4SI 2 "register_operand" "w")]
5619 "TARGET_SIMD &&TARGET_CRYPTO"
5620 "sha256su0\\t%0.4s, %2.4s"
5621 [(set_attr "type" "crypto_sha256_fast")]
5624 (define_insn "aarch64_crypto_sha256su1v4si"
5625 [(set (match_operand:V4SI 0 "register_operand" "=w")
5626 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5627 (match_operand:V4SI 2 "register_operand" "w")
5628 (match_operand:V4SI 3 "register_operand" "w")]
5630 "TARGET_SIMD &&TARGET_CRYPTO"
5631 "sha256su1\\t%0.4s, %2.4s, %3.4s"
5632 [(set_attr "type" "crypto_sha256_slow")]
5637 (define_insn "aarch64_crypto_pmulldi"
5638 [(set (match_operand:TI 0 "register_operand" "=w")
5639 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
5640 (match_operand:DI 2 "register_operand" "w")]
5642 "TARGET_SIMD && TARGET_CRYPTO"
5643 "pmull\\t%0.1q, %1.1d, %2.1d"
5644 [(set_attr "type" "crypto_pmull")]
5647 (define_insn "aarch64_crypto_pmullv2di"
5648 [(set (match_operand:TI 0 "register_operand" "=w")
5649 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
5650 (match_operand:V2DI 2 "register_operand" "w")]
5652 "TARGET_SIMD && TARGET_CRYPTO"
5653 "pmull2\\t%0.1q, %1.2d, %2.2d"
5654 [(set_attr "type" "crypto_pmull")]