1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2016 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16 1 "general_operand" ""))]
26 if (GET_CODE (operands[0]) == MEM)
27 operands[1] = force_reg (<MODE>mode, operands[1]);
31 (define_expand "movmisalign<mode>"
32 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
33 (match_operand:VALL 1 "general_operand" ""))]
36 /* This pattern is not permitted to fail during expansion: if both arguments
37 are non-registers (e.g. memory := constant, which can be created by the
38 auto-vectorizer), force operand 1 into a register. */
39 if (!register_operand (operands[0], <MODE>mode)
40 && !register_operand (operands[1], <MODE>mode))
41 operands[1] = force_reg (<MODE>mode, operands[1]);
44 (define_insn "aarch64_simd_dup<mode>"
45 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
47 (match_operand:<VEL> 1 "register_operand" "r, w")))]
50 dup\\t%0.<Vtype>, %<vw>1
51 dup\\t%0.<Vtype>, %1.<Vetype>[0]"
52 [(set_attr "type" "neon_from_gp<q>, neon_dup<q>")]
55 (define_insn "aarch64_simd_dup<mode>"
56 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
57 (vec_duplicate:VDQF_F16
58 (match_operand:<VEL> 1 "register_operand" "w")))]
60 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
61 [(set_attr "type" "neon_dup<q>")]
64 (define_insn "aarch64_dup_lane<mode>"
65 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
66 (vec_duplicate:VALL_F16
68 (match_operand:VALL_F16 1 "register_operand" "w")
69 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
73 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
74 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
76 [(set_attr "type" "neon_dup<q>")]
79 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
80 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
81 (vec_duplicate:VALL_F16
83 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
84 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
88 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
89 INTVAL (operands[2])));
90 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
92 [(set_attr "type" "neon_dup<q>")]
95 (define_insn "*aarch64_simd_mov<mode>"
96 [(set (match_operand:VD 0 "nonimmediate_operand"
97 "=w, m, w, ?r, ?w, ?r, w")
98 (match_operand:VD 1 "general_operand"
99 "m, w, w, w, r, r, Dn"))]
101 && (register_operand (operands[0], <MODE>mode)
102 || register_operand (operands[1], <MODE>mode))"
104 switch (which_alternative)
106 case 0: return "ldr\\t%d0, %1";
107 case 1: return "str\\t%d1, %0";
108 case 2: return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
109 case 3: return "umov\t%0, %1.d[0]";
110 case 4: return "ins\t%0.d[0], %1";
111 case 5: return "mov\t%0, %1";
113 return aarch64_output_simd_mov_immediate (operands[1],
115 default: gcc_unreachable ();
118 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
119 neon_logic<q>, neon_to_gp<q>, neon_from_gp<q>,\
120 mov_reg, neon_move<q>")]
123 (define_insn "*aarch64_simd_mov<mode>"
124 [(set (match_operand:VQ 0 "nonimmediate_operand"
125 "=w, m, w, ?r, ?w, ?r, w")
126 (match_operand:VQ 1 "general_operand"
127 "m, w, w, w, r, r, Dn"))]
129 && (register_operand (operands[0], <MODE>mode)
130 || register_operand (operands[1], <MODE>mode))"
132 switch (which_alternative)
135 return "ldr\\t%q0, %1";
137 return "str\\t%q1, %0";
139 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
145 return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
150 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
151 neon_logic<q>, multiple, multiple, multiple,\
153 (set_attr "length" "4,4,4,8,8,8,4")]
156 (define_insn "load_pair<mode>"
157 [(set (match_operand:VD 0 "register_operand" "=w")
158 (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
159 (set (match_operand:VD 2 "register_operand" "=w")
160 (match_operand:VD 3 "memory_operand" "m"))]
162 && rtx_equal_p (XEXP (operands[3], 0),
163 plus_constant (Pmode,
164 XEXP (operands[1], 0),
165 GET_MODE_SIZE (<MODE>mode)))"
167 [(set_attr "type" "neon_ldp")]
170 (define_insn "store_pair<mode>"
171 [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
172 (match_operand:VD 1 "register_operand" "w"))
173 (set (match_operand:VD 2 "memory_operand" "=m")
174 (match_operand:VD 3 "register_operand" "w"))]
176 && rtx_equal_p (XEXP (operands[2], 0),
177 plus_constant (Pmode,
178 XEXP (operands[0], 0),
179 GET_MODE_SIZE (<MODE>mode)))"
181 [(set_attr "type" "neon_stp")]
185 [(set (match_operand:VQ 0 "register_operand" "")
186 (match_operand:VQ 1 "register_operand" ""))]
187 "TARGET_SIMD && reload_completed
188 && GP_REGNUM_P (REGNO (operands[0]))
189 && GP_REGNUM_P (REGNO (operands[1]))"
192 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
197 [(set (match_operand:VQ 0 "register_operand" "")
198 (match_operand:VQ 1 "register_operand" ""))]
199 "TARGET_SIMD && reload_completed
200 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
201 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
204 aarch64_split_simd_move (operands[0], operands[1]);
208 (define_expand "aarch64_split_simd_mov<mode>"
209 [(set (match_operand:VQ 0)
210 (match_operand:VQ 1))]
213 rtx dst = operands[0];
214 rtx src = operands[1];
216 if (GP_REGNUM_P (REGNO (src)))
218 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
219 rtx src_high_part = gen_highpart (<VHALF>mode, src);
222 (gen_move_lo_quad_<mode> (dst, src_low_part));
224 (gen_move_hi_quad_<mode> (dst, src_high_part));
229 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
230 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
231 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
232 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
235 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
237 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
243 (define_insn "aarch64_simd_mov_from_<mode>low"
244 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
246 (match_operand:VQ 1 "register_operand" "w")
247 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
248 "TARGET_SIMD && reload_completed"
250 [(set_attr "type" "neon_to_gp<q>")
251 (set_attr "length" "4")
254 (define_insn "aarch64_simd_mov_from_<mode>high"
255 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
257 (match_operand:VQ 1 "register_operand" "w")
258 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
259 "TARGET_SIMD && reload_completed"
261 [(set_attr "type" "neon_to_gp<q>")
262 (set_attr "length" "4")
265 (define_insn "orn<mode>3"
266 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
267 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
268 (match_operand:VDQ_I 2 "register_operand" "w")))]
270 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
271 [(set_attr "type" "neon_logic<q>")]
274 (define_insn "bic<mode>3"
275 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
276 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
277 (match_operand:VDQ_I 2 "register_operand" "w")))]
279 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
280 [(set_attr "type" "neon_logic<q>")]
283 (define_insn "add<mode>3"
284 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
285 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
286 (match_operand:VDQ_I 2 "register_operand" "w")))]
288 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
289 [(set_attr "type" "neon_add<q>")]
292 (define_insn "sub<mode>3"
293 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
294 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
295 (match_operand:VDQ_I 2 "register_operand" "w")))]
297 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
298 [(set_attr "type" "neon_sub<q>")]
301 (define_insn "mul<mode>3"
302 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
303 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
304 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
306 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
307 [(set_attr "type" "neon_mul_<Vetype><q>")]
310 (define_insn "bswap<mode>2"
311 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
312 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
314 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
315 [(set_attr "type" "neon_rev<q>")]
318 (define_insn "aarch64_rbit<mode>"
319 [(set (match_operand:VB 0 "register_operand" "=w")
320 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
323 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
324 [(set_attr "type" "neon_rbit")]
327 (define_expand "ctz<mode>2"
328 [(set (match_operand:VS 0 "register_operand")
329 (ctz:VS (match_operand:VS 1 "register_operand")))]
332 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
333 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
335 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
336 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
341 (define_insn "*aarch64_mul3_elt<mode>"
342 [(set (match_operand:VMUL 0 "register_operand" "=w")
346 (match_operand:VMUL 1 "register_operand" "<h_con>")
347 (parallel [(match_operand:SI 2 "immediate_operand")])))
348 (match_operand:VMUL 3 "register_operand" "w")))]
351 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
352 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
354 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
357 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
358 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
359 (mult:VMUL_CHANGE_NLANES
360 (vec_duplicate:VMUL_CHANGE_NLANES
362 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
363 (parallel [(match_operand:SI 2 "immediate_operand")])))
364 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
367 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
368 INTVAL (operands[2])));
369 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
371 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
374 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
375 [(set (match_operand:VMUL 0 "register_operand" "=w")
378 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
379 (match_operand:VMUL 2 "register_operand" "w")))]
381 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
382 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
385 (define_insn "aarch64_rsqrte<mode>"
386 [(set (match_operand:VALLF 0 "register_operand" "=w")
387 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
390 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
391 [(set_attr "type" "neon_fp_rsqrte_<Vetype><q>")])
393 (define_insn "aarch64_rsqrts<mode>"
394 [(set (match_operand:VALLF 0 "register_operand" "=w")
395 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
396 (match_operand:VALLF 2 "register_operand" "w")]
399 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
400 [(set_attr "type" "neon_fp_rsqrts_<Vetype><q>")])
402 (define_expand "rsqrt<mode>2"
403 [(set (match_operand:VALLF 0 "register_operand" "=w")
404 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
408 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
412 (define_insn "*aarch64_mul3_elt_to_64v2df"
413 [(set (match_operand:DF 0 "register_operand" "=w")
416 (match_operand:V2DF 1 "register_operand" "w")
417 (parallel [(match_operand:SI 2 "immediate_operand")]))
418 (match_operand:DF 3 "register_operand" "w")))]
421 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
422 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
424 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
427 (define_insn "neg<mode>2"
428 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
429 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
431 "neg\t%0.<Vtype>, %1.<Vtype>"
432 [(set_attr "type" "neon_neg<q>")]
435 (define_insn "abs<mode>2"
436 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
437 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
439 "abs\t%0.<Vtype>, %1.<Vtype>"
440 [(set_attr "type" "neon_abs<q>")]
443 ;; The intrinsic version of integer ABS must not be allowed to
444 ;; combine with any operation with an integerated ABS step, such
446 (define_insn "aarch64_abs<mode>"
447 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
449 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
452 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
453 [(set_attr "type" "neon_abs<q>")]
456 (define_insn "abd<mode>_3"
457 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
458 (abs:VDQ_BHSI (minus:VDQ_BHSI
459 (match_operand:VDQ_BHSI 1 "register_operand" "w")
460 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
462 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
463 [(set_attr "type" "neon_abd<q>")]
466 (define_insn "aba<mode>_3"
467 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
468 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
469 (match_operand:VDQ_BHSI 1 "register_operand" "w")
470 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
471 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
473 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
474 [(set_attr "type" "neon_arith_acc<q>")]
477 (define_insn "fabd<mode>3"
478 [(set (match_operand:VALLF 0 "register_operand" "=w")
481 (match_operand:VALLF 1 "register_operand" "w")
482 (match_operand:VALLF 2 "register_operand" "w"))))]
484 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
485 [(set_attr "type" "neon_fp_abd_<Vetype><q>")]
488 (define_insn "and<mode>3"
489 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
490 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
491 (match_operand:VDQ_I 2 "register_operand" "w")))]
493 "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
494 [(set_attr "type" "neon_logic<q>")]
497 (define_insn "ior<mode>3"
498 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
499 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
500 (match_operand:VDQ_I 2 "register_operand" "w")))]
502 "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
503 [(set_attr "type" "neon_logic<q>")]
506 (define_insn "xor<mode>3"
507 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
508 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
509 (match_operand:VDQ_I 2 "register_operand" "w")))]
511 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
512 [(set_attr "type" "neon_logic<q>")]
515 (define_insn "one_cmpl<mode>2"
516 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
517 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
519 "not\t%0.<Vbtype>, %1.<Vbtype>"
520 [(set_attr "type" "neon_logic<q>")]
523 (define_insn "aarch64_simd_vec_set<mode>"
524 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
526 (vec_duplicate:VDQ_BHSI
527 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
528 (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
529 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
532 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
533 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
534 switch (which_alternative)
537 return "ins\\t%0.<Vetype>[%p2], %w1";
539 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
541 return "ld1\\t{%0.<Vetype>}[%p2], %1";
546 [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_1reg<q>")]
549 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
550 [(set (match_operand:VALL 0 "register_operand" "=w")
554 (match_operand:VALL 3 "register_operand" "w")
556 [(match_operand:SI 4 "immediate_operand" "i")])))
557 (match_operand:VALL 1 "register_operand" "0")
558 (match_operand:SI 2 "immediate_operand" "i")))]
561 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
562 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
563 operands[4] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[4])));
565 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
567 [(set_attr "type" "neon_ins<q>")]
570 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
571 [(set (match_operand:VALL 0 "register_operand" "=w")
575 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
577 [(match_operand:SI 4 "immediate_operand" "i")])))
578 (match_operand:VALL 1 "register_operand" "0")
579 (match_operand:SI 2 "immediate_operand" "i")))]
582 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
583 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
584 operands[4] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
585 INTVAL (operands[4])));
587 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
589 [(set_attr "type" "neon_ins<q>")]
592 (define_insn "aarch64_simd_lshr<mode>"
593 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
594 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
595 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
597 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
598 [(set_attr "type" "neon_shift_imm<q>")]
601 (define_insn "aarch64_simd_ashr<mode>"
602 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
603 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
604 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
606 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
607 [(set_attr "type" "neon_shift_imm<q>")]
610 (define_insn "aarch64_simd_imm_shl<mode>"
611 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
612 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
613 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
615 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
616 [(set_attr "type" "neon_shift_imm<q>")]
619 (define_insn "aarch64_simd_reg_sshl<mode>"
620 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
621 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
622 (match_operand:VDQ_I 2 "register_operand" "w")))]
624 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
625 [(set_attr "type" "neon_shift_reg<q>")]
628 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
629 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
630 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
631 (match_operand:VDQ_I 2 "register_operand" "w")]
632 UNSPEC_ASHIFT_UNSIGNED))]
634 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
635 [(set_attr "type" "neon_shift_reg<q>")]
638 (define_insn "aarch64_simd_reg_shl<mode>_signed"
639 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
640 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
641 (match_operand:VDQ_I 2 "register_operand" "w")]
642 UNSPEC_ASHIFT_SIGNED))]
644 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
645 [(set_attr "type" "neon_shift_reg<q>")]
648 (define_expand "ashl<mode>3"
649 [(match_operand:VDQ_I 0 "register_operand" "")
650 (match_operand:VDQ_I 1 "register_operand" "")
651 (match_operand:SI 2 "general_operand" "")]
654 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
657 if (CONST_INT_P (operands[2]))
659 shift_amount = INTVAL (operands[2]);
660 if (shift_amount >= 0 && shift_amount < bit_width)
662 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
664 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
671 operands[2] = force_reg (SImode, operands[2]);
674 else if (MEM_P (operands[2]))
676 operands[2] = force_reg (SImode, operands[2]);
679 if (REG_P (operands[2]))
681 rtx tmp = gen_reg_rtx (<MODE>mode);
682 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
683 convert_to_mode (<VEL>mode,
686 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
695 (define_expand "lshr<mode>3"
696 [(match_operand:VDQ_I 0 "register_operand" "")
697 (match_operand:VDQ_I 1 "register_operand" "")
698 (match_operand:SI 2 "general_operand" "")]
701 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
704 if (CONST_INT_P (operands[2]))
706 shift_amount = INTVAL (operands[2]);
707 if (shift_amount > 0 && shift_amount <= bit_width)
709 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
711 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
717 operands[2] = force_reg (SImode, operands[2]);
719 else if (MEM_P (operands[2]))
721 operands[2] = force_reg (SImode, operands[2]);
724 if (REG_P (operands[2]))
726 rtx tmp = gen_reg_rtx (SImode);
727 rtx tmp1 = gen_reg_rtx (<MODE>mode);
728 emit_insn (gen_negsi2 (tmp, operands[2]));
729 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
730 convert_to_mode (<VEL>mode,
732 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
742 (define_expand "ashr<mode>3"
743 [(match_operand:VDQ_I 0 "register_operand" "")
744 (match_operand:VDQ_I 1 "register_operand" "")
745 (match_operand:SI 2 "general_operand" "")]
748 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
751 if (CONST_INT_P (operands[2]))
753 shift_amount = INTVAL (operands[2]);
754 if (shift_amount > 0 && shift_amount <= bit_width)
756 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
758 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
764 operands[2] = force_reg (SImode, operands[2]);
766 else if (MEM_P (operands[2]))
768 operands[2] = force_reg (SImode, operands[2]);
771 if (REG_P (operands[2]))
773 rtx tmp = gen_reg_rtx (SImode);
774 rtx tmp1 = gen_reg_rtx (<MODE>mode);
775 emit_insn (gen_negsi2 (tmp, operands[2]));
776 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
777 convert_to_mode (<VEL>mode,
779 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
789 (define_expand "vashl<mode>3"
790 [(match_operand:VDQ_I 0 "register_operand" "")
791 (match_operand:VDQ_I 1 "register_operand" "")
792 (match_operand:VDQ_I 2 "register_operand" "")]
795 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
800 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
801 ;; Negating individual lanes most certainly offsets the
802 ;; gain from vectorization.
803 (define_expand "vashr<mode>3"
804 [(match_operand:VDQ_BHSI 0 "register_operand" "")
805 (match_operand:VDQ_BHSI 1 "register_operand" "")
806 (match_operand:VDQ_BHSI 2 "register_operand" "")]
809 rtx neg = gen_reg_rtx (<MODE>mode);
810 emit (gen_neg<mode>2 (neg, operands[2]));
811 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
817 (define_expand "aarch64_ashr_simddi"
818 [(match_operand:DI 0 "register_operand" "=w")
819 (match_operand:DI 1 "register_operand" "w")
820 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
823 /* An arithmetic shift right by 64 fills the result with copies of the sign
824 bit, just like asr by 63 - however the standard pattern does not handle
826 if (INTVAL (operands[2]) == 64)
827 operands[2] = GEN_INT (63);
828 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
833 (define_expand "vlshr<mode>3"
834 [(match_operand:VDQ_BHSI 0 "register_operand" "")
835 (match_operand:VDQ_BHSI 1 "register_operand" "")
836 (match_operand:VDQ_BHSI 2 "register_operand" "")]
839 rtx neg = gen_reg_rtx (<MODE>mode);
840 emit (gen_neg<mode>2 (neg, operands[2]));
841 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
846 (define_expand "aarch64_lshr_simddi"
847 [(match_operand:DI 0 "register_operand" "=w")
848 (match_operand:DI 1 "register_operand" "w")
849 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
852 if (INTVAL (operands[2]) == 64)
853 emit_move_insn (operands[0], const0_rtx);
855 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
860 (define_expand "vec_set<mode>"
861 [(match_operand:VDQ_BHSI 0 "register_operand")
862 (match_operand:<VEL> 1 "register_operand")
863 (match_operand:SI 2 "immediate_operand")]
866 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
867 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
868 GEN_INT (elem), operands[0]));
873 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
874 (define_insn "vec_shr_<mode>"
875 [(set (match_operand:VD 0 "register_operand" "=w")
876 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
877 (match_operand:SI 2 "immediate_operand" "i")]
881 if (BYTES_BIG_ENDIAN)
882 return "shl %d0, %d1, %2";
884 return "ushr %d0, %d1, %2";
886 [(set_attr "type" "neon_shift_imm")]
889 (define_insn "aarch64_simd_vec_setv2di"
890 [(set (match_operand:V2DI 0 "register_operand" "=w,w")
893 (match_operand:DI 1 "register_operand" "r,w"))
894 (match_operand:V2DI 3 "register_operand" "0,0")
895 (match_operand:SI 2 "immediate_operand" "i,i")))]
898 int elt = ENDIAN_LANE_N (V2DImode, exact_log2 (INTVAL (operands[2])));
899 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
900 switch (which_alternative)
903 return "ins\\t%0.d[%p2], %1";
905 return "ins\\t%0.d[%p2], %1.d[0]";
910 [(set_attr "type" "neon_from_gp, neon_ins_q")]
913 (define_expand "vec_setv2di"
914 [(match_operand:V2DI 0 "register_operand")
915 (match_operand:DI 1 "register_operand")
916 (match_operand:SI 2 "immediate_operand")]
919 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
920 emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
921 GEN_INT (elem), operands[0]));
926 (define_insn "aarch64_simd_vec_set<mode>"
927 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
929 (vec_duplicate:VDQF_F16
930 (match_operand:<VEL> 1 "register_operand" "w"))
931 (match_operand:VDQF_F16 3 "register_operand" "0")
932 (match_operand:SI 2 "immediate_operand" "i")))]
935 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
937 operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
938 return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
940 [(set_attr "type" "neon_ins<q>")]
943 (define_expand "vec_set<mode>"
944 [(match_operand:VDQF_F16 0 "register_operand" "+w")
945 (match_operand:<VEL> 1 "register_operand" "w")
946 (match_operand:SI 2 "immediate_operand" "")]
949 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
950 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
951 GEN_INT (elem), operands[0]));
957 (define_insn "aarch64_mla<mode>"
958 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
959 (plus:VDQ_BHSI (mult:VDQ_BHSI
960 (match_operand:VDQ_BHSI 2 "register_operand" "w")
961 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
962 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
964 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
965 [(set_attr "type" "neon_mla_<Vetype><q>")]
968 (define_insn "*aarch64_mla_elt<mode>"
969 [(set (match_operand:VDQHS 0 "register_operand" "=w")
974 (match_operand:VDQHS 1 "register_operand" "<h_con>")
975 (parallel [(match_operand:SI 2 "immediate_operand")])))
976 (match_operand:VDQHS 3 "register_operand" "w"))
977 (match_operand:VDQHS 4 "register_operand" "0")))]
980 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
981 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
983 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
986 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
987 [(set (match_operand:VDQHS 0 "register_operand" "=w")
992 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
993 (parallel [(match_operand:SI 2 "immediate_operand")])))
994 (match_operand:VDQHS 3 "register_operand" "w"))
995 (match_operand:VDQHS 4 "register_operand" "0")))]
998 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
999 INTVAL (operands[2])));
1000 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1002 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1005 (define_insn "aarch64_mls<mode>"
1006 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1007 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1008 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1009 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1011 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1012 [(set_attr "type" "neon_mla_<Vetype><q>")]
1015 (define_insn "*aarch64_mls_elt<mode>"
1016 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1018 (match_operand:VDQHS 4 "register_operand" "0")
1020 (vec_duplicate:VDQHS
1022 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1023 (parallel [(match_operand:SI 2 "immediate_operand")])))
1024 (match_operand:VDQHS 3 "register_operand" "w"))))]
1027 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1028 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1030 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1033 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1034 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1036 (match_operand:VDQHS 4 "register_operand" "0")
1038 (vec_duplicate:VDQHS
1040 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1041 (parallel [(match_operand:SI 2 "immediate_operand")])))
1042 (match_operand:VDQHS 3 "register_operand" "w"))))]
1045 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1046 INTVAL (operands[2])));
1047 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1049 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1052 ;; Max/Min operations.
1053 (define_insn "<su><maxmin><mode>3"
1054 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1055 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1056 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1058 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1059 [(set_attr "type" "neon_minmax<q>")]
1062 (define_expand "<su><maxmin>v2di3"
1063 [(set (match_operand:V2DI 0 "register_operand" "")
1064 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1065 (match_operand:V2DI 2 "register_operand" "")))]
1068 enum rtx_code cmp_operator;
1089 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1090 emit_insn (gen_aarch64_vcond_internalv2div2di (operands[0], operands[1],
1091 operands[2], cmp_fmt, operands[1], operands[2]));
1095 ;; Pairwise Integer Max/Min operations.
1096 (define_insn "aarch64_<maxmin_uns>p<mode>"
1097 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1098 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1099 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1102 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1103 [(set_attr "type" "neon_minmax<q>")]
1106 ;; Pairwise FP Max/Min operations.
1107 (define_insn "aarch64_<maxmin_uns>p<mode>"
1108 [(set (match_operand:VDQF 0 "register_operand" "=w")
1109 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
1110 (match_operand:VDQF 2 "register_operand" "w")]
1113 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1114 [(set_attr "type" "neon_minmax<q>")]
1117 ;; vec_concat gives a new vector with the low elements from operand 1, and
1118 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1119 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1120 ;; What that means, is that the RTL descriptions of the below patterns
1121 ;; need to change depending on endianness.
1123 ;; Move to the low architectural bits of the register.
1124 ;; On little-endian this is { operand, zeroes }
1125 ;; On big-endian this is { zeroes, operand }
1127 (define_insn "move_lo_quad_internal_<mode>"
1128 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1130 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1131 (vec_duplicate:<VHALF> (const_int 0))))]
1132 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1137 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1138 (set_attr "simd" "yes,*,yes")
1139 (set_attr "fp" "*,yes,*")
1140 (set_attr "length" "4")]
1143 (define_insn "move_lo_quad_internal_<mode>"
1144 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1146 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1148 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1153 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1154 (set_attr "simd" "yes,*,yes")
1155 (set_attr "fp" "*,yes,*")
1156 (set_attr "length" "4")]
1159 (define_insn "move_lo_quad_internal_be_<mode>"
1160 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1162 (vec_duplicate:<VHALF> (const_int 0))
1163 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1164 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1169 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1170 (set_attr "simd" "yes,*,yes")
1171 (set_attr "fp" "*,yes,*")
1172 (set_attr "length" "4")]
1175 (define_insn "move_lo_quad_internal_be_<mode>"
1176 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1179 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1180 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1185 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1186 (set_attr "simd" "yes,*,yes")
1187 (set_attr "fp" "*,yes,*")
1188 (set_attr "length" "4")]
1191 (define_expand "move_lo_quad_<mode>"
1192 [(match_operand:VQ 0 "register_operand")
1193 (match_operand:VQ 1 "register_operand")]
1196 if (BYTES_BIG_ENDIAN)
1197 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1199 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1204 ;; Move operand1 to the high architectural bits of the register, keeping
1205 ;; the low architectural bits of operand2.
1206 ;; For little-endian this is { operand2, operand1 }
1207 ;; For big-endian this is { operand1, operand2 }
1209 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1210 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1214 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1215 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1216 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1218 ins\\t%0.d[1], %1.d[0]
1220 [(set_attr "type" "neon_ins")]
1223 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1224 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1226 (match_operand:<VHALF> 1 "register_operand" "w,r")
1229 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1230 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1232 ins\\t%0.d[1], %1.d[0]
1234 [(set_attr "type" "neon_ins")]
1237 (define_expand "move_hi_quad_<mode>"
1238 [(match_operand:VQ 0 "register_operand" "")
1239 (match_operand:<VHALF> 1 "register_operand" "")]
1242 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1243 if (BYTES_BIG_ENDIAN)
1244 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1247 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1252 ;; Narrowing operations.
1255 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1256 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1257 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1259 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1260 [(set_attr "type" "neon_shift_imm_narrow_q")]
1263 (define_expand "vec_pack_trunc_<mode>"
1264 [(match_operand:<VNARROWD> 0 "register_operand" "")
1265 (match_operand:VDN 1 "register_operand" "")
1266 (match_operand:VDN 2 "register_operand" "")]
1269 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1270 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1271 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1273 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1274 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1275 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1281 (define_insn "vec_pack_trunc_<mode>"
1282 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1283 (vec_concat:<VNARROWQ2>
1284 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1285 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1288 if (BYTES_BIG_ENDIAN)
1289 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1291 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1293 [(set_attr "type" "multiple")
1294 (set_attr "length" "8")]
1297 ;; Widening operations.
1299 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1300 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1301 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1302 (match_operand:VQW 1 "register_operand" "w")
1303 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1306 "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1307 [(set_attr "type" "neon_shift_imm_long")]
1310 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1311 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1312 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1313 (match_operand:VQW 1 "register_operand" "w")
1314 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1317 "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1318 [(set_attr "type" "neon_shift_imm_long")]
1321 (define_expand "vec_unpack<su>_hi_<mode>"
1322 [(match_operand:<VWIDE> 0 "register_operand" "")
1323 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1326 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1327 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1333 (define_expand "vec_unpack<su>_lo_<mode>"
1334 [(match_operand:<VWIDE> 0 "register_operand" "")
1335 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1338 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1339 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1345 ;; Widening arithmetic.
1347 (define_insn "*aarch64_<su>mlal_lo<mode>"
1348 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1351 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1352 (match_operand:VQW 2 "register_operand" "w")
1353 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1354 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1355 (match_operand:VQW 4 "register_operand" "w")
1357 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1359 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1360 [(set_attr "type" "neon_mla_<Vetype>_long")]
1363 (define_insn "*aarch64_<su>mlal_hi<mode>"
1364 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1367 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1368 (match_operand:VQW 2 "register_operand" "w")
1369 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1370 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1371 (match_operand:VQW 4 "register_operand" "w")
1373 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1375 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1376 [(set_attr "type" "neon_mla_<Vetype>_long")]
1379 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1380 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1382 (match_operand:<VWIDE> 1 "register_operand" "0")
1384 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1385 (match_operand:VQW 2 "register_operand" "w")
1386 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1387 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1388 (match_operand:VQW 4 "register_operand" "w")
1391 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1392 [(set_attr "type" "neon_mla_<Vetype>_long")]
1395 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1396 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1398 (match_operand:<VWIDE> 1 "register_operand" "0")
1400 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1401 (match_operand:VQW 2 "register_operand" "w")
1402 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1403 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1404 (match_operand:VQW 4 "register_operand" "w")
1407 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1408 [(set_attr "type" "neon_mla_<Vetype>_long")]
1411 (define_insn "*aarch64_<su>mlal<mode>"
1412 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1416 (match_operand:VD_BHSI 1 "register_operand" "w"))
1418 (match_operand:VD_BHSI 2 "register_operand" "w")))
1419 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1421 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1422 [(set_attr "type" "neon_mla_<Vetype>_long")]
1425 (define_insn "*aarch64_<su>mlsl<mode>"
1426 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1428 (match_operand:<VWIDE> 1 "register_operand" "0")
1431 (match_operand:VD_BHSI 2 "register_operand" "w"))
1433 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1435 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1436 [(set_attr "type" "neon_mla_<Vetype>_long")]
1439 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1440 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1441 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1442 (match_operand:VQW 1 "register_operand" "w")
1443 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1444 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1445 (match_operand:VQW 2 "register_operand" "w")
1448 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1449 [(set_attr "type" "neon_mul_<Vetype>_long")]
1452 (define_expand "vec_widen_<su>mult_lo_<mode>"
1453 [(match_operand:<VWIDE> 0 "register_operand" "")
1454 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1455 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1458 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1459 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1466 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1467 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1468 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1469 (match_operand:VQW 1 "register_operand" "w")
1470 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1471 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1472 (match_operand:VQW 2 "register_operand" "w")
1475 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1476 [(set_attr "type" "neon_mul_<Vetype>_long")]
1479 (define_expand "vec_widen_<su>mult_hi_<mode>"
1480 [(match_operand:<VWIDE> 0 "register_operand" "")
1481 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1482 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1485 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1486 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1494 ;; FP vector operations.
1495 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1496 ;; double-precision (64-bit) floating-point data types and arithmetic as
1497 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1498 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1500 ;; Floating-point operations can raise an exception. Vectorizing such
1501 ;; operations are safe because of reasons explained below.
1503 ;; ARMv8 permits an extension to enable trapped floating-point
1504 ;; exception handling, however this is an optional feature. In the
1505 ;; event of a floating-point exception being raised by vectorised
1507 ;; 1. If trapped floating-point exceptions are available, then a trap
1508 ;; will be taken when any lane raises an enabled exception. A trap
1509 ;; handler may determine which lane raised the exception.
1510 ;; 2. Alternatively a sticky exception flag is set in the
1511 ;; floating-point status register (FPSR). Software may explicitly
1512 ;; test the exception flags, in which case the tests will either
1513 ;; prevent vectorisation, allowing precise identification of the
1514 ;; failing operation, or if tested outside of vectorisable regions
1515 ;; then the specific operation and lane are not of interest.
1517 ;; FP arithmetic operations.
1519 (define_insn "add<mode>3"
1520 [(set (match_operand:VDQF 0 "register_operand" "=w")
1521 (plus:VDQF (match_operand:VDQF 1 "register_operand" "w")
1522 (match_operand:VDQF 2 "register_operand" "w")))]
1524 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1525 [(set_attr "type" "neon_fp_addsub_<Vetype><q>")]
1528 (define_insn "sub<mode>3"
1529 [(set (match_operand:VDQF 0 "register_operand" "=w")
1530 (minus:VDQF (match_operand:VDQF 1 "register_operand" "w")
1531 (match_operand:VDQF 2 "register_operand" "w")))]
1533 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1534 [(set_attr "type" "neon_fp_addsub_<Vetype><q>")]
1537 (define_insn "mul<mode>3"
1538 [(set (match_operand:VDQF 0 "register_operand" "=w")
1539 (mult:VDQF (match_operand:VDQF 1 "register_operand" "w")
1540 (match_operand:VDQF 2 "register_operand" "w")))]
1542 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1543 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
1546 (define_expand "div<mode>3"
1547 [(set (match_operand:VDQF 0 "register_operand")
1548 (div:VDQF (match_operand:VDQF 1 "general_operand")
1549 (match_operand:VDQF 2 "register_operand")))]
1552 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1555 operands[1] = force_reg (<MODE>mode, operands[1]);
1558 (define_insn "*div<mode>3"
1559 [(set (match_operand:VDQF 0 "register_operand" "=w")
1560 (div:VDQF (match_operand:VDQF 1 "register_operand" "w")
1561 (match_operand:VDQF 2 "register_operand" "w")))]
1563 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1564 [(set_attr "type" "neon_fp_div_<Vetype><q>")]
1567 (define_insn "neg<mode>2"
1568 [(set (match_operand:VDQF 0 "register_operand" "=w")
1569 (neg:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
1571 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1572 [(set_attr "type" "neon_fp_neg_<Vetype><q>")]
1575 (define_insn "abs<mode>2"
1576 [(set (match_operand:VDQF 0 "register_operand" "=w")
1577 (abs:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
1579 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1580 [(set_attr "type" "neon_fp_abs_<Vetype><q>")]
1583 (define_insn "fma<mode>4"
1584 [(set (match_operand:VDQF 0 "register_operand" "=w")
1585 (fma:VDQF (match_operand:VDQF 1 "register_operand" "w")
1586 (match_operand:VDQF 2 "register_operand" "w")
1587 (match_operand:VDQF 3 "register_operand" "0")))]
1589 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1590 [(set_attr "type" "neon_fp_mla_<Vetype><q>")]
1593 (define_insn "*aarch64_fma4_elt<mode>"
1594 [(set (match_operand:VDQF 0 "register_operand" "=w")
1598 (match_operand:VDQF 1 "register_operand" "<h_con>")
1599 (parallel [(match_operand:SI 2 "immediate_operand")])))
1600 (match_operand:VDQF 3 "register_operand" "w")
1601 (match_operand:VDQF 4 "register_operand" "0")))]
1604 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1605 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1607 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1610 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1611 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1613 (vec_duplicate:VDQSF
1615 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1616 (parallel [(match_operand:SI 2 "immediate_operand")])))
1617 (match_operand:VDQSF 3 "register_operand" "w")
1618 (match_operand:VDQSF 4 "register_operand" "0")))]
1621 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1622 INTVAL (operands[2])));
1623 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1625 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1628 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1629 [(set (match_operand:VMUL 0 "register_operand" "=w")
1632 (match_operand:<VEL> 1 "register_operand" "w"))
1633 (match_operand:VMUL 2 "register_operand" "w")
1634 (match_operand:VMUL 3 "register_operand" "0")))]
1636 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1637 [(set_attr "type" "neon<fp>_mla_<Vetype>_scalar<q>")]
1640 (define_insn "*aarch64_fma4_elt_to_64v2df"
1641 [(set (match_operand:DF 0 "register_operand" "=w")
1644 (match_operand:V2DF 1 "register_operand" "w")
1645 (parallel [(match_operand:SI 2 "immediate_operand")]))
1646 (match_operand:DF 3 "register_operand" "w")
1647 (match_operand:DF 4 "register_operand" "0")))]
1650 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1651 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1653 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1656 (define_insn "fnma<mode>4"
1657 [(set (match_operand:VDQF 0 "register_operand" "=w")
1659 (match_operand:VDQF 1 "register_operand" "w")
1661 (match_operand:VDQF 2 "register_operand" "w"))
1662 (match_operand:VDQF 3 "register_operand" "0")))]
1664 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1665 [(set_attr "type" "neon_fp_mla_<Vetype><q>")]
1668 (define_insn "*aarch64_fnma4_elt<mode>"
1669 [(set (match_operand:VDQF 0 "register_operand" "=w")
1672 (match_operand:VDQF 3 "register_operand" "w"))
1675 (match_operand:VDQF 1 "register_operand" "<h_con>")
1676 (parallel [(match_operand:SI 2 "immediate_operand")])))
1677 (match_operand:VDQF 4 "register_operand" "0")))]
1680 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1681 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1683 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1686 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1687 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1690 (match_operand:VDQSF 3 "register_operand" "w"))
1691 (vec_duplicate:VDQSF
1693 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1694 (parallel [(match_operand:SI 2 "immediate_operand")])))
1695 (match_operand:VDQSF 4 "register_operand" "0")))]
1698 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1699 INTVAL (operands[2])));
1700 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1702 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1705 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1706 [(set (match_operand:VMUL 0 "register_operand" "=w")
1709 (match_operand:VMUL 2 "register_operand" "w"))
1711 (match_operand:<VEL> 1 "register_operand" "w"))
1712 (match_operand:VMUL 3 "register_operand" "0")))]
1714 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1715 [(set_attr "type" "neon<fp>_mla_<Vetype>_scalar<q>")]
1718 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1719 [(set (match_operand:DF 0 "register_operand" "=w")
1722 (match_operand:V2DF 1 "register_operand" "w")
1723 (parallel [(match_operand:SI 2 "immediate_operand")]))
1725 (match_operand:DF 3 "register_operand" "w"))
1726 (match_operand:DF 4 "register_operand" "0")))]
1729 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1730 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1732 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1735 ;; Vector versions of the floating-point frint patterns.
1736 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1737 (define_insn "<frint_pattern><mode>2"
1738 [(set (match_operand:VDQF 0 "register_operand" "=w")
1739 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
1742 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1743 [(set_attr "type" "neon_fp_round_<Vetype><q>")]
1746 ;; Vector versions of the fcvt standard patterns.
1747 ;; Expands to lbtrunc, lround, lceil, lfloor
1748 (define_insn "l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2"
1749 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1750 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1751 [(match_operand:VDQF 1 "register_operand" "w")]
1754 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1755 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1758 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1759 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1760 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1762 (match_operand:VDQF 1 "register_operand" "w")
1763 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1766 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1767 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1769 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1771 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1772 output_asm_insn (buf, operands);
1775 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1778 (define_expand "<optab><VDQF:mode><fcvt_target>2"
1779 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1780 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1781 [(match_operand:VDQF 1 "register_operand")]
1786 (define_expand "<fix_trunc_optab><VDQF:mode><fcvt_target>2"
1787 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1788 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1789 [(match_operand:VDQF 1 "register_operand")]
1794 (define_expand "ftrunc<VDQF:mode>2"
1795 [(set (match_operand:VDQF 0 "register_operand")
1796 (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
1801 (define_insn "<optab><fcvt_target><VDQF:mode>2"
1802 [(set (match_operand:VDQF 0 "register_operand" "=w")
1804 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
1806 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
1807 [(set_attr "type" "neon_int_to_fp_<Vetype><q>")]
1810 ;; Conversions between vectors of floats and doubles.
1811 ;; Contains a mix of patterns to match standard pattern names
1812 ;; and those for intrinsics.
1814 ;; Float widening operations.
1816 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
1817 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1818 (float_extend:<VWIDE> (vec_select:<VHALF>
1819 (match_operand:VQ_HSF 1 "register_operand" "w")
1820 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
1823 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
1824 [(set_attr "type" "neon_fp_cvt_widen_s")]
1827 ;; Convert between fixed-point and floating-point (vector modes)
1829 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VDQF:mode>3"
1830 [(set (match_operand:<VDQF:FCVT_TARGET> 0 "register_operand" "=w")
1831 (unspec:<VDQF:FCVT_TARGET> [(match_operand:VDQF 1 "register_operand" "w")
1832 (match_operand:SI 2 "immediate_operand" "i")]
1835 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
1836 [(set_attr "type" "neon_fp_to_int_<VDQF:Vetype><q>")]
1839 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_SDI:mode>3"
1840 [(set (match_operand:<VDQ_SDI:FCVT_TARGET> 0 "register_operand" "=w")
1841 (unspec:<VDQ_SDI:FCVT_TARGET> [(match_operand:VDQ_SDI 1 "register_operand" "w")
1842 (match_operand:SI 2 "immediate_operand" "i")]
1845 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
1846 [(set_attr "type" "neon_int_to_fp_<VDQ_SDI:Vetype><q>")]
1849 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
1850 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
1851 ;; the meaning of HI and LO changes depending on the target endianness.
1852 ;; While elsewhere we map the higher numbered elements of a vector to
1853 ;; the lower architectural lanes of the vector, for these patterns we want
1854 ;; to always treat "hi" as referring to the higher architectural lanes.
1855 ;; Consequently, while the patterns below look inconsistent with our
1856 ;; other big-endian patterns their behavior is as required.
1858 (define_expand "vec_unpacks_lo_<mode>"
1859 [(match_operand:<VWIDE> 0 "register_operand" "")
1860 (match_operand:VQ_HSF 1 "register_operand" "")]
1863 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1864 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
1870 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
1871 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1872 (float_extend:<VWIDE> (vec_select:<VHALF>
1873 (match_operand:VQ_HSF 1 "register_operand" "w")
1874 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
1877 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
1878 [(set_attr "type" "neon_fp_cvt_widen_s")]
1881 (define_expand "vec_unpacks_hi_<mode>"
1882 [(match_operand:<VWIDE> 0 "register_operand" "")
1883 (match_operand:VQ_HSF 1 "register_operand" "")]
1886 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1887 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
1892 (define_insn "aarch64_float_extend_lo_<Vwide>"
1893 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1894 (float_extend:<VWIDE>
1895 (match_operand:VDF 1 "register_operand" "w")))]
1897 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
1898 [(set_attr "type" "neon_fp_cvt_widen_s")]
1901 ;; Float narrowing operations.
1903 (define_insn "aarch64_float_truncate_lo_<mode>"
1904 [(set (match_operand:VDF 0 "register_operand" "=w")
1906 (match_operand:<VWIDE> 1 "register_operand" "w")))]
1908 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
1909 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1912 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
1913 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
1915 (match_operand:VDF 1 "register_operand" "0")
1917 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
1918 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1919 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
1920 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1923 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
1924 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
1927 (match_operand:<VWIDE> 2 "register_operand" "w"))
1928 (match_operand:VDF 1 "register_operand" "0")))]
1929 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1930 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
1931 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1934 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
1935 [(match_operand:<VDBL> 0 "register_operand" "=w")
1936 (match_operand:VDF 1 "register_operand" "0")
1937 (match_operand:<VWIDE> 2 "register_operand" "w")]
1940 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
1941 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
1942 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
1943 emit_insn (gen (operands[0], operands[1], operands[2]));
1948 (define_expand "vec_pack_trunc_v2df"
1949 [(set (match_operand:V4SF 0 "register_operand")
1951 (float_truncate:V2SF
1952 (match_operand:V2DF 1 "register_operand"))
1953 (float_truncate:V2SF
1954 (match_operand:V2DF 2 "register_operand"))
1958 rtx tmp = gen_reg_rtx (V2SFmode);
1959 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1960 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1962 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
1963 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
1964 tmp, operands[hi]));
1969 (define_expand "vec_pack_trunc_df"
1970 [(set (match_operand:V2SF 0 "register_operand")
1973 (match_operand:DF 1 "register_operand"))
1975 (match_operand:DF 2 "register_operand"))
1979 rtx tmp = gen_reg_rtx (V2SFmode);
1980 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1981 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1983 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
1984 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
1985 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
1991 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
1993 ;; a = (b < c) ? b : c;
1994 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
1995 ;; either explicitly or indirectly via -ffast-math.
1997 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
1998 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
1999 ;; operand will be returned when both operands are zero (i.e. they may not
2000 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2001 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2004 (define_insn "<su><maxmin><mode>3"
2005 [(set (match_operand:VDQF 0 "register_operand" "=w")
2006 (FMAXMIN:VDQF (match_operand:VDQF 1 "register_operand" "w")
2007 (match_operand:VDQF 2 "register_operand" "w")))]
2009 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2010 [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
2013 (define_insn "<maxmin_uns><mode>3"
2014 [(set (match_operand:VDQF 0 "register_operand" "=w")
2015 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
2016 (match_operand:VDQF 2 "register_operand" "w")]
2019 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2020 [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
2023 ;; Auto-vectorized forms for the IEEE-754 fmax()/fmin() functions
2024 (define_insn "<fmaxmin><mode>3"
2025 [(set (match_operand:VDQF 0 "register_operand" "=w")
2026 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
2027 (match_operand:VDQF 2 "register_operand" "w")]
2030 "<fmaxmin_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2031 [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
2034 ;; 'across lanes' add.
2036 (define_expand "reduc_plus_scal_<mode>"
2037 [(match_operand:<VEL> 0 "register_operand" "=w")
2038 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2042 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2043 rtx scratch = gen_reg_rtx (<MODE>mode);
2044 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2045 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2050 (define_insn "aarch64_faddp<mode>"
2051 [(set (match_operand:VDQF 0 "register_operand" "=w")
2052 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
2053 (match_operand:VDQF 2 "register_operand" "w")]
2056 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2057 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2060 (define_insn "aarch64_reduc_plus_internal<mode>"
2061 [(set (match_operand:VDQV 0 "register_operand" "=w")
2062 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2065 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2066 [(set_attr "type" "neon_reduc_add<q>")]
2069 (define_insn "aarch64_reduc_plus_internalv2si"
2070 [(set (match_operand:V2SI 0 "register_operand" "=w")
2071 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2074 "addp\\t%0.2s, %1.2s, %1.2s"
2075 [(set_attr "type" "neon_reduc_add")]
2078 (define_insn "reduc_plus_scal_<mode>"
2079 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2080 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2083 "faddp\\t%<Vetype>0, %1.<Vtype>"
2084 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2087 (define_expand "reduc_plus_scal_v4sf"
2088 [(set (match_operand:SF 0 "register_operand")
2089 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2093 rtx elt = GEN_INT (ENDIAN_LANE_N (V4SFmode, 0));
2094 rtx scratch = gen_reg_rtx (V4SFmode);
2095 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2096 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2097 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2101 (define_insn "clrsb<mode>2"
2102 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2103 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2105 "cls\\t%0.<Vtype>, %1.<Vtype>"
2106 [(set_attr "type" "neon_cls<q>")]
2109 (define_insn "clz<mode>2"
2110 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2111 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2113 "clz\\t%0.<Vtype>, %1.<Vtype>"
2114 [(set_attr "type" "neon_cls<q>")]
2117 (define_insn "popcount<mode>2"
2118 [(set (match_operand:VB 0 "register_operand" "=w")
2119 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2121 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2122 [(set_attr "type" "neon_cnt<q>")]
2125 ;; 'across lanes' max and min ops.
2127 ;; Template for outputting a scalar, so we can create __builtins which can be
2128 ;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin).
2129 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2130 [(match_operand:<VEL> 0 "register_operand")
2131 (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
2135 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2136 rtx scratch = gen_reg_rtx (<MODE>mode);
2137 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2139 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2144 ;; Likewise for integer cases, signed and unsigned.
2145 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2146 [(match_operand:<VEL> 0 "register_operand")
2147 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2151 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2152 rtx scratch = gen_reg_rtx (<MODE>mode);
2153 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2155 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2160 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2161 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2162 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2165 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2166 [(set_attr "type" "neon_reduc_minmax<q>")]
2169 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2170 [(set (match_operand:V2SI 0 "register_operand" "=w")
2171 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2174 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2175 [(set_attr "type" "neon_reduc_minmax")]
2178 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2179 [(set (match_operand:VDQF 0 "register_operand" "=w")
2180 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
2183 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2184 [(set_attr "type" "neon_fp_reduc_minmax_<Vetype><q>")]
2187 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2189 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2192 ;; Thus our BSL is of the form:
2193 ;; op0 = bsl (mask, op2, op3)
2194 ;; We can use any of:
2197 ;; bsl mask, op1, op2
2198 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2199 ;; bit op0, op2, mask
2200 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2201 ;; bif op0, op1, mask
2203 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2204 ;; Some forms of straight-line code may generate the equivalent form
2205 ;; in *aarch64_simd_bsl<mode>_alt.
2207 (define_insn "aarch64_simd_bsl<mode>_internal"
2208 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2212 (match_operand:<V_cmp_result> 3 "register_operand" "w,0,w")
2213 (match_operand:VSDQ_I_DI 2 "register_operand" "w,w,0"))
2214 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2215 (match_dup:<V_cmp_result> 3)
2219 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2220 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2221 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2222 [(set_attr "type" "neon_bsl<q>")]
2225 ;; We need this form in addition to the above pattern to match the case
2226 ;; when combine tries merging three insns such that the second operand of
2227 ;; the outer XOR matches the second operand of the inner XOR rather than
2228 ;; the first. The two are equivalent but since recog doesn't try all
2229 ;; permutations of commutative operations, we have to have a separate pattern.
2231 (define_insn "*aarch64_simd_bsl<mode>_alt"
2232 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2236 (match_operand:VSDQ_I_DI 3 "register_operand" "w,w,0")
2237 (match_operand:VSDQ_I_DI 2 "register_operand" "w,0,w"))
2238 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2239 (match_dup:VSDQ_I_DI 2)))]
2242 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2243 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2244 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2245 [(set_attr "type" "neon_bsl<q>")]
2248 (define_expand "aarch64_simd_bsl<mode>"
2249 [(match_operand:VALLDIF 0 "register_operand")
2250 (match_operand:<V_cmp_result> 1 "register_operand")
2251 (match_operand:VALLDIF 2 "register_operand")
2252 (match_operand:VALLDIF 3 "register_operand")]
2255 /* We can't alias operands together if they have different modes. */
2256 rtx tmp = operands[0];
2257 if (FLOAT_MODE_P (<MODE>mode))
2259 operands[2] = gen_lowpart (<V_cmp_result>mode, operands[2]);
2260 operands[3] = gen_lowpart (<V_cmp_result>mode, operands[3]);
2261 tmp = gen_reg_rtx (<V_cmp_result>mode);
2263 operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]);
2264 emit_insn (gen_aarch64_simd_bsl<v_cmp_result>_internal (tmp,
2268 if (tmp != operands[0])
2269 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2274 (define_expand "aarch64_vcond_internal<mode><mode>"
2275 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2276 (if_then_else:VSDQ_I_DI
2277 (match_operator 3 "comparison_operator"
2278 [(match_operand:VSDQ_I_DI 4 "register_operand")
2279 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2280 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2281 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2284 rtx op1 = operands[1];
2285 rtx op2 = operands[2];
2286 rtx mask = gen_reg_rtx (<MODE>mode);
2287 enum rtx_code code = GET_CODE (operands[3]);
2289 /* Switching OP1 and OP2 is necessary for NE (to output a cmeq insn),
2290 and desirable for other comparisons if it results in FOO ? -1 : 0
2291 (this allows direct use of the comparison result without a bsl). */
2294 && op1 == CONST0_RTX (<V_cmp_result>mode)
2295 && op2 == CONSTM1_RTX (<V_cmp_result>mode)))
2301 case LE: code = GT; break;
2302 case LT: code = GE; break;
2303 case GE: code = LT; break;
2304 case GT: code = LE; break;
2306 case NE: code = EQ; break;
2307 case LTU: code = GEU; break;
2308 case LEU: code = GTU; break;
2309 case GTU: code = LEU; break;
2310 case GEU: code = LTU; break;
2311 default: gcc_unreachable ();
2315 /* Make sure we can handle the last operand. */
2319 /* Normalized to EQ above. */
2327 /* These instructions have a form taking an immediate zero. */
2328 if (operands[5] == CONST0_RTX (<MODE>mode))
2330 /* Fall through, as may need to load into register. */
2332 if (!REG_P (operands[5]))
2333 operands[5] = force_reg (<MODE>mode, operands[5]);
2340 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[4], operands[5]));
2344 emit_insn (gen_aarch64_cmge<mode> (mask, operands[4], operands[5]));
2348 emit_insn (gen_aarch64_cmle<mode> (mask, operands[4], operands[5]));
2352 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5]));
2356 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[5], operands[4]));
2360 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5]));
2364 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[5], operands[4]));
2368 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5]));
2371 /* NE has been normalized to EQ above. */
2373 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[4], operands[5]));
2380 /* If we have (a = (b CMP c) ? -1 : 0);
2381 Then we can simply move the generated mask. */
2383 if (op1 == CONSTM1_RTX (<V_cmp_result>mode)
2384 && op2 == CONST0_RTX (<V_cmp_result>mode))
2385 emit_move_insn (operands[0], mask);
2389 op1 = force_reg (<MODE>mode, op1);
2391 op2 = force_reg (<MODE>mode, op2);
2392 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask,
2399 (define_expand "aarch64_vcond_internal<VDQF_COND:mode><VDQF:mode>"
2400 [(set (match_operand:VDQF_COND 0 "register_operand")
2402 (match_operator 3 "comparison_operator"
2403 [(match_operand:VDQF 4 "register_operand")
2404 (match_operand:VDQF 5 "nonmemory_operand")])
2405 (match_operand:VDQF_COND 1 "nonmemory_operand")
2406 (match_operand:VDQF_COND 2 "nonmemory_operand")))]
2410 int use_zero_form = 0;
2411 int swap_bsl_operands = 0;
2412 rtx op1 = operands[1];
2413 rtx op2 = operands[2];
2414 rtx mask = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
2415 rtx tmp = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
2417 rtx (*base_comparison) (rtx, rtx, rtx);
2418 rtx (*complimentary_comparison) (rtx, rtx, rtx);
2420 switch (GET_CODE (operands[3]))
2427 if (operands[5] == CONST0_RTX (<MODE>mode))
2434 if (!REG_P (operands[5]))
2435 operands[5] = force_reg (<VDQF:MODE>mode, operands[5]);
2438 switch (GET_CODE (operands[3]))
2448 base_comparison = gen_aarch64_cmge<VDQF:mode>;
2449 complimentary_comparison = gen_aarch64_cmgt<VDQF:mode>;
2457 base_comparison = gen_aarch64_cmgt<VDQF:mode>;
2458 complimentary_comparison = gen_aarch64_cmge<VDQF:mode>;
2463 base_comparison = gen_aarch64_cmeq<VDQF:mode>;
2464 complimentary_comparison = gen_aarch64_cmeq<VDQF:mode>;
2470 switch (GET_CODE (operands[3]))
2477 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2478 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2484 Note that there also exist direct comparison against 0 forms,
2485 so catch those as a special case. */
2489 switch (GET_CODE (operands[3]))
2492 base_comparison = gen_aarch64_cmlt<VDQF:mode>;
2495 base_comparison = gen_aarch64_cmle<VDQF:mode>;
2498 /* Do nothing, other zero form cases already have the correct
2505 emit_insn (base_comparison (mask, operands[4], operands[5]));
2507 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
2514 /* FCM returns false for lanes which are unordered, so if we use
2515 the inverse of the comparison we actually want to emit, then
2516 swap the operands to BSL, we will end up with the correct result.
2517 Note that a NE NaN and NaN NE b are true for all a, b.
2519 Our transformations are:
2524 a NE b -> !(a EQ b) */
2527 emit_insn (base_comparison (mask, operands[4], operands[5]));
2529 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
2531 swap_bsl_operands = 1;
2534 /* We check (a > b || b > a). combining these comparisons give us
2535 true iff !(a != b && a ORDERED b), swapping the operands to BSL
2536 will then give us (a == b || a UNORDERED b) as intended. */
2538 emit_insn (gen_aarch64_cmgt<VDQF:mode> (mask, operands[4], operands[5]));
2539 emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[5], operands[4]));
2540 emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
2541 swap_bsl_operands = 1;
2544 /* Operands are ORDERED iff (a > b || b >= a).
2545 Swapping the operands to BSL will give the UNORDERED case. */
2546 swap_bsl_operands = 1;
2549 emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[4], operands[5]));
2550 emit_insn (gen_aarch64_cmge<VDQF:mode> (mask, operands[5], operands[4]));
2551 emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
2557 if (swap_bsl_operands)
2563 /* If we have (a = (b CMP c) ? -1 : 0);
2564 Then we can simply move the generated mask. */
2566 if (op1 == CONSTM1_RTX (<VDQF_COND:V_cmp_result>mode)
2567 && op2 == CONST0_RTX (<VDQF_COND:V_cmp_result>mode))
2568 emit_move_insn (operands[0], mask);
2572 op1 = force_reg (<VDQF_COND:MODE>mode, op1);
2574 op2 = force_reg (<VDQF_COND:MODE>mode, op2);
2575 emit_insn (gen_aarch64_simd_bsl<VDQF_COND:mode> (operands[0], mask,
2582 (define_expand "vcond<mode><mode>"
2583 [(set (match_operand:VALLDI 0 "register_operand")
2584 (if_then_else:VALLDI
2585 (match_operator 3 "comparison_operator"
2586 [(match_operand:VALLDI 4 "register_operand")
2587 (match_operand:VALLDI 5 "nonmemory_operand")])
2588 (match_operand:VALLDI 1 "nonmemory_operand")
2589 (match_operand:VALLDI 2 "nonmemory_operand")))]
2592 emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
2593 operands[2], operands[3],
2594 operands[4], operands[5]));
2598 (define_expand "vcond<v_cmp_result><mode>"
2599 [(set (match_operand:<V_cmp_result> 0 "register_operand")
2600 (if_then_else:<V_cmp_result>
2601 (match_operator 3 "comparison_operator"
2602 [(match_operand:VDQF 4 "register_operand")
2603 (match_operand:VDQF 5 "nonmemory_operand")])
2604 (match_operand:<V_cmp_result> 1 "nonmemory_operand")
2605 (match_operand:<V_cmp_result> 2 "nonmemory_operand")))]
2608 emit_insn (gen_aarch64_vcond_internal<v_cmp_result><mode> (
2609 operands[0], operands[1],
2610 operands[2], operands[3],
2611 operands[4], operands[5]));
2615 (define_expand "vcondu<mode><mode>"
2616 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2617 (if_then_else:VSDQ_I_DI
2618 (match_operator 3 "comparison_operator"
2619 [(match_operand:VSDQ_I_DI 4 "register_operand")
2620 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2621 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2622 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2625 emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
2626 operands[2], operands[3],
2627 operands[4], operands[5]));
2631 ;; Patterns for AArch64 SIMD Intrinsics.
2633 ;; Lane extraction with sign extension to general purpose register.
2634 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2635 [(set (match_operand:GPI 0 "register_operand" "=r")
2638 (match_operand:VDQQH 1 "register_operand" "w")
2639 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2642 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2643 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
2645 [(set_attr "type" "neon_to_gp<q>")]
2648 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
2649 [(set (match_operand:SI 0 "register_operand" "=r")
2652 (match_operand:VDQQH 1 "register_operand" "w")
2653 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2656 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2657 return "umov\\t%w0, %1.<Vetype>[%2]";
2659 [(set_attr "type" "neon_to_gp<q>")]
2662 ;; Lane extraction of a value, neither sign nor zero extension
2663 ;; is guaranteed so upper bits should be considered undefined.
2664 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
2665 (define_insn "aarch64_get_lane<mode>"
2666 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2668 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
2669 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
2672 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2673 switch (which_alternative)
2676 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2678 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2680 return "st1\\t{%1.<Vetype>}[%2], %0";
2685 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
2688 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2691 (define_insn "*aarch64_combinez<mode>"
2692 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2694 (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")
2695 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")))]
2696 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2701 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2702 (set_attr "simd" "yes,*,yes")
2703 (set_attr "fp" "*,yes,*")]
2706 (define_insn "*aarch64_combinez_be<mode>"
2707 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2709 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")
2710 (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")))]
2711 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2716 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2717 (set_attr "simd" "yes,*,yes")
2718 (set_attr "fp" "*,yes,*")]
2721 (define_expand "aarch64_combine<mode>"
2722 [(match_operand:<VDBL> 0 "register_operand")
2723 (match_operand:VDC 1 "register_operand")
2724 (match_operand:VDC 2 "register_operand")]
2728 if (BYTES_BIG_ENDIAN)
2738 emit_insn (gen_aarch64_combine_internal<mode> (operands[0], op1, op2));
2743 (define_insn_and_split "aarch64_combine_internal<mode>"
2744 [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
2745 (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
2746 (match_operand:VDC 2 "register_operand" "w")))]
2749 "&& reload_completed"
2752 if (BYTES_BIG_ENDIAN)
2753 aarch64_split_simd_combine (operands[0], operands[2], operands[1]);
2755 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
2758 [(set_attr "type" "multiple")]
2761 (define_expand "aarch64_simd_combine<mode>"
2762 [(match_operand:<VDBL> 0 "register_operand")
2763 (match_operand:VDC 1 "register_operand")
2764 (match_operand:VDC 2 "register_operand")]
2767 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
2768 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
2771 [(set_attr "type" "multiple")]
2774 ;; <su><addsub>l<q>.
2776 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
2777 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2778 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2779 (match_operand:VQW 1 "register_operand" "w")
2780 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2781 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2782 (match_operand:VQW 2 "register_operand" "w")
2785 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2786 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2789 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
2790 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2791 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2792 (match_operand:VQW 1 "register_operand" "w")
2793 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2794 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2795 (match_operand:VQW 2 "register_operand" "w")
2798 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2799 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2803 (define_expand "aarch64_saddl2<mode>"
2804 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2805 (match_operand:VQW 1 "register_operand" "w")
2806 (match_operand:VQW 2 "register_operand" "w")]
2809 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2810 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
2815 (define_expand "aarch64_uaddl2<mode>"
2816 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2817 (match_operand:VQW 1 "register_operand" "w")
2818 (match_operand:VQW 2 "register_operand" "w")]
2821 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2822 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
2827 (define_expand "aarch64_ssubl2<mode>"
2828 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2829 (match_operand:VQW 1 "register_operand" "w")
2830 (match_operand:VQW 2 "register_operand" "w")]
2833 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2834 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
2839 (define_expand "aarch64_usubl2<mode>"
2840 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2841 (match_operand:VQW 1 "register_operand" "w")
2842 (match_operand:VQW 2 "register_operand" "w")]
2845 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2846 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
2851 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
2852 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2853 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
2854 (match_operand:VD_BHSI 1 "register_operand" "w"))
2856 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2858 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2859 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2862 ;; <su><addsub>w<q>.
2864 (define_expand "widen_ssum<mode>3"
2865 [(set (match_operand:<VDBLW> 0 "register_operand" "")
2866 (plus:<VDBLW> (sign_extend:<VDBLW>
2867 (match_operand:VQW 1 "register_operand" ""))
2868 (match_operand:<VDBLW> 2 "register_operand" "")))]
2871 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
2872 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
2874 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
2876 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
2881 (define_expand "widen_ssum<mode>3"
2882 [(set (match_operand:<VWIDE> 0 "register_operand" "")
2883 (plus:<VWIDE> (sign_extend:<VWIDE>
2884 (match_operand:VD_BHSI 1 "register_operand" ""))
2885 (match_operand:<VWIDE> 2 "register_operand" "")))]
2888 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
2892 (define_expand "widen_usum<mode>3"
2893 [(set (match_operand:<VDBLW> 0 "register_operand" "")
2894 (plus:<VDBLW> (zero_extend:<VDBLW>
2895 (match_operand:VQW 1 "register_operand" ""))
2896 (match_operand:<VDBLW> 2 "register_operand" "")))]
2899 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
2900 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
2902 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
2904 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
2909 (define_expand "widen_usum<mode>3"
2910 [(set (match_operand:<VWIDE> 0 "register_operand" "")
2911 (plus:<VWIDE> (zero_extend:<VWIDE>
2912 (match_operand:VD_BHSI 1 "register_operand" ""))
2913 (match_operand:<VWIDE> 2 "register_operand" "")))]
2916 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
2920 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
2921 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2922 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2924 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2926 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
2927 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2930 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
2931 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2932 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2935 (match_operand:VQW 2 "register_operand" "w")
2936 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
2938 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
2939 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2942 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
2943 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2944 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2947 (match_operand:VQW 2 "register_operand" "w")
2948 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
2950 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
2951 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2954 (define_expand "aarch64_saddw2<mode>"
2955 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2956 (match_operand:<VWIDE> 1 "register_operand" "w")
2957 (match_operand:VQW 2 "register_operand" "w")]
2960 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2961 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
2966 (define_expand "aarch64_uaddw2<mode>"
2967 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2968 (match_operand:<VWIDE> 1 "register_operand" "w")
2969 (match_operand:VQW 2 "register_operand" "w")]
2972 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2973 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
2979 (define_expand "aarch64_ssubw2<mode>"
2980 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2981 (match_operand:<VWIDE> 1 "register_operand" "w")
2982 (match_operand:VQW 2 "register_operand" "w")]
2985 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2986 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
2991 (define_expand "aarch64_usubw2<mode>"
2992 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2993 (match_operand:<VWIDE> 1 "register_operand" "w")
2994 (match_operand:VQW 2 "register_operand" "w")]
2997 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2998 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3003 ;; <su><r>h<addsub>.
3005 (define_insn "aarch64_<sur>h<addsub><mode>"
3006 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3007 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3008 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3011 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3012 [(set_attr "type" "neon_<addsub>_halve<q>")]
3015 ;; <r><addsub>hn<q>.
3017 (define_insn "aarch64_<sur><addsub>hn<mode>"
3018 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3019 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3020 (match_operand:VQN 2 "register_operand" "w")]
3023 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3024 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3027 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3028 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3029 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3030 (match_operand:VQN 2 "register_operand" "w")
3031 (match_operand:VQN 3 "register_operand" "w")]
3034 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3035 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3040 (define_insn "aarch64_pmul<mode>"
3041 [(set (match_operand:VB 0 "register_operand" "=w")
3042 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3043 (match_operand:VB 2 "register_operand" "w")]
3046 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3047 [(set_attr "type" "neon_mul_<Vetype><q>")]
3052 (define_insn "aarch64_fmulx<mode>"
3053 [(set (match_operand:VALLF 0 "register_operand" "=w")
3054 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
3055 (match_operand:VALLF 2 "register_operand" "w")]
3058 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3059 [(set_attr "type" "neon_fp_mul_<Vetype>")]
3062 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3064 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3065 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3067 [(match_operand:VDQSF 1 "register_operand" "w")
3068 (vec_duplicate:VDQSF
3070 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3071 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3075 operands[3] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
3076 INTVAL (operands[3])));
3077 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3079 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3082 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3084 (define_insn "*aarch64_mulx_elt<mode>"
3085 [(set (match_operand:VDQF 0 "register_operand" "=w")
3087 [(match_operand:VDQF 1 "register_operand" "w")
3090 (match_operand:VDQF 2 "register_operand" "w")
3091 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3095 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
3096 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3098 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3103 (define_insn "*aarch64_mulx_elt_to_64v2df"
3104 [(set (match_operand:V2DF 0 "register_operand" "=w")
3106 [(match_operand:V2DF 1 "register_operand" "w")
3108 (match_operand:DF 2 "register_operand" "w"))]
3112 return "fmulx\t%0.2d, %1.2d, %2.d[0]";
3114 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
3117 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3118 ;; vmulxd_lane_f64 == vmulx_lane_f64
3119 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3121 (define_insn "*aarch64_vgetfmulx<mode>"
3122 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3124 [(match_operand:<VEL> 1 "register_operand" "w")
3126 (match_operand:VDQF_DF 2 "register_operand" "w")
3127 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3131 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
3132 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3134 [(set_attr "type" "fmul<Vetype>")]
3138 (define_insn "aarch64_<su_optab><optab><mode>"
3139 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3140 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3141 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3143 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3144 [(set_attr "type" "neon_<optab><q>")]
3147 ;; suqadd and usqadd
3149 (define_insn "aarch64_<sur>qadd<mode>"
3150 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3151 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3152 (match_operand:VSDQ_I 2 "register_operand" "w")]
3155 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3156 [(set_attr "type" "neon_qadd<q>")]
3161 (define_insn "aarch64_sqmovun<mode>"
3162 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3163 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3166 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3167 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3170 ;; sqmovn and uqmovn
3172 (define_insn "aarch64_<sur>qmovn<mode>"
3173 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3174 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3177 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3178 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3183 (define_insn "aarch64_s<optab><mode>"
3184 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3186 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3188 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3189 [(set_attr "type" "neon_<optab><q>")]
3194 (define_insn "aarch64_sq<r>dmulh<mode>"
3195 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3197 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3198 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3201 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3202 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3207 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3208 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3210 [(match_operand:VDQHS 1 "register_operand" "w")
3212 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3213 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3217 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3218 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3219 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3222 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3223 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3225 [(match_operand:VDQHS 1 "register_operand" "w")
3227 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3228 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3232 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3233 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3234 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3237 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3238 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3240 [(match_operand:SD_HSI 1 "register_operand" "w")
3242 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3243 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3247 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3248 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3249 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3252 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3253 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3255 [(match_operand:SD_HSI 1 "register_operand" "w")
3257 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3258 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3262 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3263 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3264 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3269 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3270 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3272 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3273 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3274 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3277 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3278 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3281 ;; sqrdml[as]h_lane.
3283 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3284 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3286 [(match_operand:VDQHS 1 "register_operand" "0")
3287 (match_operand:VDQHS 2 "register_operand" "w")
3289 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3290 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3294 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3296 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3298 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3301 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3302 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3304 [(match_operand:SD_HSI 1 "register_operand" "0")
3305 (match_operand:SD_HSI 2 "register_operand" "w")
3307 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3308 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3312 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3314 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3316 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3319 ;; sqrdml[as]h_laneq.
3321 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3322 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3324 [(match_operand:VDQHS 1 "register_operand" "0")
3325 (match_operand:VDQHS 2 "register_operand" "w")
3327 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3328 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3332 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3334 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3336 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3339 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3340 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3342 [(match_operand:SD_HSI 1 "register_operand" "0")
3343 (match_operand:SD_HSI 2 "register_operand" "w")
3345 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3346 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3350 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3352 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3354 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3359 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3360 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3362 (match_operand:<VWIDE> 1 "register_operand" "0")
3365 (sign_extend:<VWIDE>
3366 (match_operand:VSD_HSI 2 "register_operand" "w"))
3367 (sign_extend:<VWIDE>
3368 (match_operand:VSD_HSI 3 "register_operand" "w")))
3371 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3372 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3377 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3378 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3380 (match_operand:<VWIDE> 1 "register_operand" "0")
3383 (sign_extend:<VWIDE>
3384 (match_operand:VD_HSI 2 "register_operand" "w"))
3385 (sign_extend:<VWIDE>
3386 (vec_duplicate:VD_HSI
3388 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3389 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3394 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3396 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3398 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3401 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3402 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3404 (match_operand:<VWIDE> 1 "register_operand" "0")
3407 (sign_extend:<VWIDE>
3408 (match_operand:VD_HSI 2 "register_operand" "w"))
3409 (sign_extend:<VWIDE>
3410 (vec_duplicate:VD_HSI
3412 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3413 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3418 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3420 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3422 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3425 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3426 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3428 (match_operand:<VWIDE> 1 "register_operand" "0")
3431 (sign_extend:<VWIDE>
3432 (match_operand:SD_HSI 2 "register_operand" "w"))
3433 (sign_extend:<VWIDE>
3435 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3436 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3441 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3443 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3445 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3448 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3449 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3451 (match_operand:<VWIDE> 1 "register_operand" "0")
3454 (sign_extend:<VWIDE>
3455 (match_operand:SD_HSI 2 "register_operand" "w"))
3456 (sign_extend:<VWIDE>
3458 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3459 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3464 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3466 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3468 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3473 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3474 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3476 (match_operand:<VWIDE> 1 "register_operand" "0")
3479 (sign_extend:<VWIDE>
3480 (match_operand:VD_HSI 2 "register_operand" "w"))
3481 (sign_extend:<VWIDE>
3482 (vec_duplicate:VD_HSI
3483 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3486 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3487 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3492 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3493 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3495 (match_operand:<VWIDE> 1 "register_operand" "0")
3498 (sign_extend:<VWIDE>
3500 (match_operand:VQ_HSI 2 "register_operand" "w")
3501 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3502 (sign_extend:<VWIDE>
3504 (match_operand:VQ_HSI 3 "register_operand" "w")
3508 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3509 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3512 (define_expand "aarch64_sqdmlal2<mode>"
3513 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3514 (match_operand:<VWIDE> 1 "register_operand" "w")
3515 (match_operand:VQ_HSI 2 "register_operand" "w")
3516 (match_operand:VQ_HSI 3 "register_operand" "w")]
3519 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3520 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3521 operands[2], operands[3], p));
3525 (define_expand "aarch64_sqdmlsl2<mode>"
3526 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3527 (match_operand:<VWIDE> 1 "register_operand" "w")
3528 (match_operand:VQ_HSI 2 "register_operand" "w")
3529 (match_operand:VQ_HSI 3 "register_operand" "w")]
3532 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3533 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3534 operands[2], operands[3], p));
3540 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3541 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3543 (match_operand:<VWIDE> 1 "register_operand" "0")
3546 (sign_extend:<VWIDE>
3548 (match_operand:VQ_HSI 2 "register_operand" "w")
3549 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3550 (sign_extend:<VWIDE>
3551 (vec_duplicate:<VHALF>
3553 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3554 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3559 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3561 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3563 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3566 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3567 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3569 (match_operand:<VWIDE> 1 "register_operand" "0")
3572 (sign_extend:<VWIDE>
3574 (match_operand:VQ_HSI 2 "register_operand" "w")
3575 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3576 (sign_extend:<VWIDE>
3577 (vec_duplicate:<VHALF>
3579 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3580 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3585 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3587 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3589 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3592 (define_expand "aarch64_sqdmlal2_lane<mode>"
3593 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3594 (match_operand:<VWIDE> 1 "register_operand" "w")
3595 (match_operand:VQ_HSI 2 "register_operand" "w")
3596 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3597 (match_operand:SI 4 "immediate_operand" "i")]
3600 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3601 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3602 operands[2], operands[3],
3607 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3608 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3609 (match_operand:<VWIDE> 1 "register_operand" "w")
3610 (match_operand:VQ_HSI 2 "register_operand" "w")
3611 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3612 (match_operand:SI 4 "immediate_operand" "i")]
3615 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3616 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3617 operands[2], operands[3],
3622 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3623 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3624 (match_operand:<VWIDE> 1 "register_operand" "w")
3625 (match_operand:VQ_HSI 2 "register_operand" "w")
3626 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3627 (match_operand:SI 4 "immediate_operand" "i")]
3630 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3631 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3632 operands[2], operands[3],
3637 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3638 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3639 (match_operand:<VWIDE> 1 "register_operand" "w")
3640 (match_operand:VQ_HSI 2 "register_operand" "w")
3641 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3642 (match_operand:SI 4 "immediate_operand" "i")]
3645 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3646 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
3647 operands[2], operands[3],
3652 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
3653 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3655 (match_operand:<VWIDE> 1 "register_operand" "0")
3658 (sign_extend:<VWIDE>
3660 (match_operand:VQ_HSI 2 "register_operand" "w")
3661 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3662 (sign_extend:<VWIDE>
3663 (vec_duplicate:<VHALF>
3664 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3667 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3668 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3671 (define_expand "aarch64_sqdmlal2_n<mode>"
3672 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3673 (match_operand:<VWIDE> 1 "register_operand" "w")
3674 (match_operand:VQ_HSI 2 "register_operand" "w")
3675 (match_operand:<VEL> 3 "register_operand" "w")]
3678 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3679 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
3680 operands[2], operands[3],
3685 (define_expand "aarch64_sqdmlsl2_n<mode>"
3686 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3687 (match_operand:<VWIDE> 1 "register_operand" "w")
3688 (match_operand:VQ_HSI 2 "register_operand" "w")
3689 (match_operand:<VEL> 3 "register_operand" "w")]
3692 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3693 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
3694 operands[2], operands[3],
3701 (define_insn "aarch64_sqdmull<mode>"
3702 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3705 (sign_extend:<VWIDE>
3706 (match_operand:VSD_HSI 1 "register_operand" "w"))
3707 (sign_extend:<VWIDE>
3708 (match_operand:VSD_HSI 2 "register_operand" "w")))
3711 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3712 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
3717 (define_insn "aarch64_sqdmull_lane<mode>"
3718 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3721 (sign_extend:<VWIDE>
3722 (match_operand:VD_HSI 1 "register_operand" "w"))
3723 (sign_extend:<VWIDE>
3724 (vec_duplicate:VD_HSI
3726 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3727 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3732 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3733 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3735 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3738 (define_insn "aarch64_sqdmull_laneq<mode>"
3739 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3742 (sign_extend:<VWIDE>
3743 (match_operand:VD_HSI 1 "register_operand" "w"))
3744 (sign_extend:<VWIDE>
3745 (vec_duplicate:VD_HSI
3747 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3748 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3753 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3754 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3756 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3759 (define_insn "aarch64_sqdmull_lane<mode>"
3760 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3763 (sign_extend:<VWIDE>
3764 (match_operand:SD_HSI 1 "register_operand" "w"))
3765 (sign_extend:<VWIDE>
3767 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3768 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3773 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3774 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3776 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3779 (define_insn "aarch64_sqdmull_laneq<mode>"
3780 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3783 (sign_extend:<VWIDE>
3784 (match_operand:SD_HSI 1 "register_operand" "w"))
3785 (sign_extend:<VWIDE>
3787 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3788 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3793 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3794 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3796 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3801 (define_insn "aarch64_sqdmull_n<mode>"
3802 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3805 (sign_extend:<VWIDE>
3806 (match_operand:VD_HSI 1 "register_operand" "w"))
3807 (sign_extend:<VWIDE>
3808 (vec_duplicate:VD_HSI
3809 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3813 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3814 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3821 (define_insn "aarch64_sqdmull2<mode>_internal"
3822 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3825 (sign_extend:<VWIDE>
3827 (match_operand:VQ_HSI 1 "register_operand" "w")
3828 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3829 (sign_extend:<VWIDE>
3831 (match_operand:VQ_HSI 2 "register_operand" "w")
3836 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3837 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3840 (define_expand "aarch64_sqdmull2<mode>"
3841 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3842 (match_operand:VQ_HSI 1 "register_operand" "w")
3843 (match_operand:VQ_HSI 2 "register_operand" "w")]
3846 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3847 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
3854 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
3855 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3858 (sign_extend:<VWIDE>
3860 (match_operand:VQ_HSI 1 "register_operand" "w")
3861 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3862 (sign_extend:<VWIDE>
3863 (vec_duplicate:<VHALF>
3865 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3866 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3871 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3872 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3874 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3877 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
3878 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3881 (sign_extend:<VWIDE>
3883 (match_operand:VQ_HSI 1 "register_operand" "w")
3884 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3885 (sign_extend:<VWIDE>
3886 (vec_duplicate:<VHALF>
3888 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3889 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3894 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3895 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3897 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3900 (define_expand "aarch64_sqdmull2_lane<mode>"
3901 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3902 (match_operand:VQ_HSI 1 "register_operand" "w")
3903 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3904 (match_operand:SI 3 "immediate_operand" "i")]
3907 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3908 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
3909 operands[2], operands[3],
3914 (define_expand "aarch64_sqdmull2_laneq<mode>"
3915 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3916 (match_operand:VQ_HSI 1 "register_operand" "w")
3917 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3918 (match_operand:SI 3 "immediate_operand" "i")]
3921 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3922 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
3923 operands[2], operands[3],
3930 (define_insn "aarch64_sqdmull2_n<mode>_internal"
3931 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3934 (sign_extend:<VWIDE>
3936 (match_operand:VQ_HSI 1 "register_operand" "w")
3937 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3938 (sign_extend:<VWIDE>
3939 (vec_duplicate:<VHALF>
3940 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3944 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3945 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3948 (define_expand "aarch64_sqdmull2_n<mode>"
3949 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3950 (match_operand:VQ_HSI 1 "register_operand" "w")
3951 (match_operand:<VEL> 2 "register_operand" "w")]
3954 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3955 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
3962 (define_insn "aarch64_<sur>shl<mode>"
3963 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3965 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
3966 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
3969 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
3970 [(set_attr "type" "neon_shift_reg<q>")]
3976 (define_insn "aarch64_<sur>q<r>shl<mode>"
3977 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3979 [(match_operand:VSDQ_I 1 "register_operand" "w")
3980 (match_operand:VSDQ_I 2 "register_operand" "w")]
3983 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
3984 [(set_attr "type" "neon_sat_shift_reg<q>")]
3989 (define_insn "aarch64_<sur>shll_n<mode>"
3990 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3991 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
3993 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
3997 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
3998 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4000 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4002 [(set_attr "type" "neon_shift_imm_long")]
4007 (define_insn "aarch64_<sur>shll2_n<mode>"
4008 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4009 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4010 (match_operand:SI 2 "immediate_operand" "i")]
4014 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4015 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4017 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4019 [(set_attr "type" "neon_shift_imm_long")]
4024 (define_insn "aarch64_<sur>shr_n<mode>"
4025 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4026 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4028 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4031 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4032 [(set_attr "type" "neon_sat_shift_imm<q>")]
4037 (define_insn "aarch64_<sur>sra_n<mode>"
4038 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4039 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4040 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4042 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4045 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4046 [(set_attr "type" "neon_shift_acc<q>")]
4051 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4052 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4053 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4054 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4056 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4059 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4060 [(set_attr "type" "neon_shift_imm<q>")]
4065 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4066 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4067 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4069 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4072 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4073 [(set_attr "type" "neon_sat_shift_imm<q>")]
4079 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4080 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4081 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4083 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4086 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4087 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4091 ;; cm(eq|ge|gt|lt|le)
4092 ;; Note, we have constraints for Dz and Z as different expanders
4093 ;; have different ideas of what should be passed to this pattern.
4095 (define_insn "aarch64_cm<optab><mode>"
4096 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
4098 (COMPARISONS:<V_cmp_result>
4099 (match_operand:VDQ_I 1 "register_operand" "w,w")
4100 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4104 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4105 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4106 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4109 (define_insn_and_split "aarch64_cm<optab>di"
4110 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4113 (match_operand:DI 1 "register_operand" "w,w,r")
4114 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4116 (clobber (reg:CC CC_REGNUM))]
4120 [(set (match_operand:DI 0 "register_operand")
4123 (match_operand:DI 1 "register_operand")
4124 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4127 /* If we are in the general purpose register file,
4128 we split to a sequence of comparison and store. */
4129 if (GP_REGNUM_P (REGNO (operands[0]))
4130 && GP_REGNUM_P (REGNO (operands[1])))
4132 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4133 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4134 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4135 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4138 /* Otherwise, we expand to a similar pattern which does not
4139 clobber CC_REGNUM. */
4141 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4144 (define_insn "*aarch64_cm<optab>di"
4145 [(set (match_operand:DI 0 "register_operand" "=w,w")
4148 (match_operand:DI 1 "register_operand" "w,w")
4149 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4151 "TARGET_SIMD && reload_completed"
4153 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4154 cm<optab>\t%d0, %d1, #0"
4155 [(set_attr "type" "neon_compare, neon_compare_zero")]
4160 (define_insn "aarch64_cm<optab><mode>"
4161 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4163 (UCOMPARISONS:<V_cmp_result>
4164 (match_operand:VDQ_I 1 "register_operand" "w")
4165 (match_operand:VDQ_I 2 "register_operand" "w")
4168 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4169 [(set_attr "type" "neon_compare<q>")]
4172 (define_insn_and_split "aarch64_cm<optab>di"
4173 [(set (match_operand:DI 0 "register_operand" "=w,r")
4176 (match_operand:DI 1 "register_operand" "w,r")
4177 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4179 (clobber (reg:CC CC_REGNUM))]
4183 [(set (match_operand:DI 0 "register_operand")
4186 (match_operand:DI 1 "register_operand")
4187 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4190 /* If we are in the general purpose register file,
4191 we split to a sequence of comparison and store. */
4192 if (GP_REGNUM_P (REGNO (operands[0]))
4193 && GP_REGNUM_P (REGNO (operands[1])))
4195 machine_mode mode = CCmode;
4196 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4197 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4198 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4201 /* Otherwise, we expand to a similar pattern which does not
4202 clobber CC_REGNUM. */
4204 [(set_attr "type" "neon_compare,multiple")]
4207 (define_insn "*aarch64_cm<optab>di"
4208 [(set (match_operand:DI 0 "register_operand" "=w")
4211 (match_operand:DI 1 "register_operand" "w")
4212 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4214 "TARGET_SIMD && reload_completed"
4215 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4216 [(set_attr "type" "neon_compare")]
4221 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4222 ;; we don't have any insns using ne, and aarch64_vcond_internal outputs
4223 ;; not (neg (eq (and x y) 0))
4224 ;; which is rewritten by simplify_rtx as
4225 ;; plus (eq (and x y) 0) -1.
4227 (define_insn "aarch64_cmtst<mode>"
4228 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4229 (plus:<V_cmp_result>
4232 (match_operand:VDQ_I 1 "register_operand" "w")
4233 (match_operand:VDQ_I 2 "register_operand" "w"))
4234 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4235 (match_operand:<V_cmp_result> 4 "aarch64_simd_imm_minus_one")))
4238 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4239 [(set_attr "type" "neon_tst<q>")]
4242 (define_insn_and_split "aarch64_cmtstdi"
4243 [(set (match_operand:DI 0 "register_operand" "=w,r")
4247 (match_operand:DI 1 "register_operand" "w,r")
4248 (match_operand:DI 2 "register_operand" "w,r"))
4250 (clobber (reg:CC CC_REGNUM))]
4254 [(set (match_operand:DI 0 "register_operand")
4258 (match_operand:DI 1 "register_operand")
4259 (match_operand:DI 2 "register_operand"))
4262 /* If we are in the general purpose register file,
4263 we split to a sequence of comparison and store. */
4264 if (GP_REGNUM_P (REGNO (operands[0]))
4265 && GP_REGNUM_P (REGNO (operands[1])))
4267 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4268 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4269 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4270 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4271 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4274 /* Otherwise, we expand to a similar pattern which does not
4275 clobber CC_REGNUM. */
4277 [(set_attr "type" "neon_tst,multiple")]
4280 (define_insn "*aarch64_cmtstdi"
4281 [(set (match_operand:DI 0 "register_operand" "=w")
4285 (match_operand:DI 1 "register_operand" "w")
4286 (match_operand:DI 2 "register_operand" "w"))
4289 "cmtst\t%d0, %d1, %d2"
4290 [(set_attr "type" "neon_tst")]
4293 ;; fcm(eq|ge|gt|le|lt)
4295 (define_insn "aarch64_cm<optab><mode>"
4296 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
4298 (COMPARISONS:<V_cmp_result>
4299 (match_operand:VALLF 1 "register_operand" "w,w")
4300 (match_operand:VALLF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4304 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4305 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4306 [(set_attr "type" "neon_fp_compare_<Vetype><q>")]
4310 ;; Note we can also handle what would be fac(le|lt) by
4311 ;; generating fac(ge|gt).
4313 (define_insn "*aarch64_fac<optab><mode>"
4314 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4316 (FAC_COMPARISONS:<V_cmp_result>
4317 (abs:VALLF (match_operand:VALLF 1 "register_operand" "w"))
4318 (abs:VALLF (match_operand:VALLF 2 "register_operand" "w"))
4321 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4322 [(set_attr "type" "neon_fp_compare_<Vetype><q>")]
4327 (define_insn "aarch64_addp<mode>"
4328 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4330 [(match_operand:VD_BHSI 1 "register_operand" "w")
4331 (match_operand:VD_BHSI 2 "register_operand" "w")]
4334 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4335 [(set_attr "type" "neon_reduc_add<q>")]
4338 (define_insn "aarch64_addpdi"
4339 [(set (match_operand:DI 0 "register_operand" "=w")
4341 [(match_operand:V2DI 1 "register_operand" "w")]
4345 [(set_attr "type" "neon_reduc_add")]
4350 (define_expand "sqrt<mode>2"
4351 [(set (match_operand:VDQF 0 "register_operand")
4352 (sqrt:VDQF (match_operand:VDQF 1 "register_operand")))]
4355 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4359 (define_insn "*sqrt<mode>2"
4360 [(set (match_operand:VDQF 0 "register_operand" "=w")
4361 (sqrt:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
4363 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4364 [(set_attr "type" "neon_fp_sqrt_<Vetype><q>")]
4367 ;; Patterns for vector struct loads and stores.
4369 (define_insn "aarch64_simd_ld2<mode>"
4370 [(set (match_operand:OI 0 "register_operand" "=w")
4371 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4372 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4375 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4376 [(set_attr "type" "neon_load2_2reg<q>")]
4379 (define_insn "aarch64_simd_ld2r<mode>"
4380 [(set (match_operand:OI 0 "register_operand" "=w")
4381 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4382 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4385 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4386 [(set_attr "type" "neon_load2_all_lanes<q>")]
4389 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4390 [(set (match_operand:OI 0 "register_operand" "=w")
4391 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4392 (match_operand:OI 2 "register_operand" "0")
4393 (match_operand:SI 3 "immediate_operand" "i")
4394 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4398 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4399 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4401 [(set_attr "type" "neon_load2_one_lane")]
4404 (define_expand "vec_load_lanesoi<mode>"
4405 [(set (match_operand:OI 0 "register_operand" "=w")
4406 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4407 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4411 if (BYTES_BIG_ENDIAN)
4413 rtx tmp = gen_reg_rtx (OImode);
4414 rtx mask = aarch64_reverse_mask (<MODE>mode);
4415 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4416 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4419 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4423 (define_insn "aarch64_simd_st2<mode>"
4424 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4425 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4426 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4429 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4430 [(set_attr "type" "neon_store2_2reg<q>")]
4433 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4434 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4435 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4436 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4437 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4438 (match_operand:SI 2 "immediate_operand" "i")]
4442 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4443 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4445 [(set_attr "type" "neon_store2_one_lane<q>")]
4448 (define_expand "vec_store_lanesoi<mode>"
4449 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4450 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4451 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4455 if (BYTES_BIG_ENDIAN)
4457 rtx tmp = gen_reg_rtx (OImode);
4458 rtx mask = aarch64_reverse_mask (<MODE>mode);
4459 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4460 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4463 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4467 (define_insn "aarch64_simd_ld3<mode>"
4468 [(set (match_operand:CI 0 "register_operand" "=w")
4469 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4470 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4473 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4474 [(set_attr "type" "neon_load3_3reg<q>")]
4477 (define_insn "aarch64_simd_ld3r<mode>"
4478 [(set (match_operand:CI 0 "register_operand" "=w")
4479 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4480 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4483 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4484 [(set_attr "type" "neon_load3_all_lanes<q>")]
4487 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4488 [(set (match_operand:CI 0 "register_operand" "=w")
4489 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4490 (match_operand:CI 2 "register_operand" "0")
4491 (match_operand:SI 3 "immediate_operand" "i")
4492 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4496 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4497 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4499 [(set_attr "type" "neon_load3_one_lane")]
4502 (define_expand "vec_load_lanesci<mode>"
4503 [(set (match_operand:CI 0 "register_operand" "=w")
4504 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4505 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4509 if (BYTES_BIG_ENDIAN)
4511 rtx tmp = gen_reg_rtx (CImode);
4512 rtx mask = aarch64_reverse_mask (<MODE>mode);
4513 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4514 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4517 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4521 (define_insn "aarch64_simd_st3<mode>"
4522 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4523 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4524 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4527 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4528 [(set_attr "type" "neon_store3_3reg<q>")]
4531 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4532 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4533 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4534 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4535 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4536 (match_operand:SI 2 "immediate_operand" "i")]
4540 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4541 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4543 [(set_attr "type" "neon_store3_one_lane<q>")]
4546 (define_expand "vec_store_lanesci<mode>"
4547 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4548 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4549 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4553 if (BYTES_BIG_ENDIAN)
4555 rtx tmp = gen_reg_rtx (CImode);
4556 rtx mask = aarch64_reverse_mask (<MODE>mode);
4557 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4558 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4561 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4565 (define_insn "aarch64_simd_ld4<mode>"
4566 [(set (match_operand:XI 0 "register_operand" "=w")
4567 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4568 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4571 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4572 [(set_attr "type" "neon_load4_4reg<q>")]
4575 (define_insn "aarch64_simd_ld4r<mode>"
4576 [(set (match_operand:XI 0 "register_operand" "=w")
4577 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4578 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4581 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4582 [(set_attr "type" "neon_load4_all_lanes<q>")]
4585 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4586 [(set (match_operand:XI 0 "register_operand" "=w")
4587 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4588 (match_operand:XI 2 "register_operand" "0")
4589 (match_operand:SI 3 "immediate_operand" "i")
4590 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4594 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4595 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4597 [(set_attr "type" "neon_load4_one_lane")]
4600 (define_expand "vec_load_lanesxi<mode>"
4601 [(set (match_operand:XI 0 "register_operand" "=w")
4602 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4603 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4607 if (BYTES_BIG_ENDIAN)
4609 rtx tmp = gen_reg_rtx (XImode);
4610 rtx mask = aarch64_reverse_mask (<MODE>mode);
4611 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4612 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4615 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4619 (define_insn "aarch64_simd_st4<mode>"
4620 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4621 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4622 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4625 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4626 [(set_attr "type" "neon_store4_4reg<q>")]
4629 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4630 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4631 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4632 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4633 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4634 (match_operand:SI 2 "immediate_operand" "i")]
4638 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4639 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
4641 [(set_attr "type" "neon_store4_one_lane<q>")]
4644 (define_expand "vec_store_lanesxi<mode>"
4645 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4646 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4647 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4651 if (BYTES_BIG_ENDIAN)
4653 rtx tmp = gen_reg_rtx (XImode);
4654 rtx mask = aarch64_reverse_mask (<MODE>mode);
4655 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
4656 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
4659 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
4663 (define_insn_and_split "aarch64_rev_reglist<mode>"
4664 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
4666 [(match_operand:VSTRUCT 1 "register_operand" "w")
4667 (match_operand:V16QI 2 "register_operand" "w")]
4668 UNSPEC_REV_REGLIST))]
4671 "&& reload_completed"
4675 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
4676 for (i = 0; i < nregs; i++)
4678 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
4679 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
4680 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
4684 [(set_attr "type" "neon_tbl1_q")
4685 (set_attr "length" "<insn_count>")]
4688 ;; Reload patterns for AdvSIMD register list operands.
4690 (define_expand "mov<mode>"
4691 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
4692 (match_operand:VSTRUCT 1 "general_operand" ""))]
4695 if (can_create_pseudo_p ())
4697 if (GET_CODE (operands[0]) != REG)
4698 operands[1] = force_reg (<MODE>mode, operands[1]);
4702 (define_insn "*aarch64_mov<mode>"
4703 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
4704 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
4705 "TARGET_SIMD && !BYTES_BIG_ENDIAN
4706 && (register_operand (operands[0], <MODE>mode)
4707 || register_operand (operands[1], <MODE>mode))"
4710 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
4711 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
4712 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
4713 neon_load<nregs>_<nregs>reg_q")
4714 (set_attr "length" "<insn_count>,4,4")]
4717 (define_insn "aarch64_be_ld1<mode>"
4718 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
4719 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
4720 "aarch64_simd_struct_operand" "Utv")]
4723 "ld1\\t{%0<Vmtype>}, %1"
4724 [(set_attr "type" "neon_load1_1reg<q>")]
4727 (define_insn "aarch64_be_st1<mode>"
4728 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
4729 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
4732 "st1\\t{%1<Vmtype>}, %0"
4733 [(set_attr "type" "neon_store1_1reg<q>")]
4736 (define_insn "*aarch64_be_movoi"
4737 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
4738 (match_operand:OI 1 "general_operand" " w,w,m"))]
4739 "TARGET_SIMD && BYTES_BIG_ENDIAN
4740 && (register_operand (operands[0], OImode)
4741 || register_operand (operands[1], OImode))"
4746 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
4747 (set_attr "length" "8,4,4")]
4750 (define_insn "*aarch64_be_movci"
4751 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
4752 (match_operand:CI 1 "general_operand" " w,w,o"))]
4753 "TARGET_SIMD && BYTES_BIG_ENDIAN
4754 && (register_operand (operands[0], CImode)
4755 || register_operand (operands[1], CImode))"
4757 [(set_attr "type" "multiple")
4758 (set_attr "length" "12,4,4")]
4761 (define_insn "*aarch64_be_movxi"
4762 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
4763 (match_operand:XI 1 "general_operand" " w,w,o"))]
4764 "TARGET_SIMD && BYTES_BIG_ENDIAN
4765 && (register_operand (operands[0], XImode)
4766 || register_operand (operands[1], XImode))"
4768 [(set_attr "type" "multiple")
4769 (set_attr "length" "16,4,4")]
4773 [(set (match_operand:OI 0 "register_operand")
4774 (match_operand:OI 1 "register_operand"))]
4775 "TARGET_SIMD && reload_completed"
4778 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
4783 [(set (match_operand:CI 0 "nonimmediate_operand")
4784 (match_operand:CI 1 "general_operand"))]
4785 "TARGET_SIMD && reload_completed"
4788 if (register_operand (operands[0], CImode)
4789 && register_operand (operands[1], CImode))
4791 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
4794 else if (BYTES_BIG_ENDIAN)
4796 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
4797 simplify_gen_subreg (OImode, operands[1], CImode, 0));
4798 emit_move_insn (gen_lowpart (V16QImode,
4799 simplify_gen_subreg (TImode, operands[0],
4801 gen_lowpart (V16QImode,
4802 simplify_gen_subreg (TImode, operands[1],
4811 [(set (match_operand:XI 0 "nonimmediate_operand")
4812 (match_operand:XI 1 "general_operand"))]
4813 "TARGET_SIMD && reload_completed"
4816 if (register_operand (operands[0], XImode)
4817 && register_operand (operands[1], XImode))
4819 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
4822 else if (BYTES_BIG_ENDIAN)
4824 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
4825 simplify_gen_subreg (OImode, operands[1], XImode, 0));
4826 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
4827 simplify_gen_subreg (OImode, operands[1], XImode, 32));
4834 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
4835 [(match_operand:VSTRUCT 0 "register_operand" "=w")
4836 (match_operand:DI 1 "register_operand" "w")
4837 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4840 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
4841 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
4844 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
4849 (define_insn "aarch64_ld2<mode>_dreg"
4850 [(set (match_operand:OI 0 "register_operand" "=w")
4855 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4857 (vec_duplicate:VD (const_int 0)))
4859 (unspec:VD [(match_dup 1)]
4861 (vec_duplicate:VD (const_int 0)))) 0))]
4863 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4864 [(set_attr "type" "neon_load2_2reg<q>")]
4867 (define_insn "aarch64_ld2<mode>_dreg"
4868 [(set (match_operand:OI 0 "register_operand" "=w")
4873 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4877 (unspec:DX [(match_dup 1)]
4879 (const_int 0))) 0))]
4881 "ld1\\t{%S0.1d - %T0.1d}, %1"
4882 [(set_attr "type" "neon_load1_2reg<q>")]
4885 (define_insn "aarch64_ld3<mode>_dreg"
4886 [(set (match_operand:CI 0 "register_operand" "=w")
4892 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4894 (vec_duplicate:VD (const_int 0)))
4896 (unspec:VD [(match_dup 1)]
4898 (vec_duplicate:VD (const_int 0))))
4900 (unspec:VD [(match_dup 1)]
4902 (vec_duplicate:VD (const_int 0)))) 0))]
4904 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4905 [(set_attr "type" "neon_load3_3reg<q>")]
4908 (define_insn "aarch64_ld3<mode>_dreg"
4909 [(set (match_operand:CI 0 "register_operand" "=w")
4915 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4919 (unspec:DX [(match_dup 1)]
4923 (unspec:DX [(match_dup 1)]
4925 (const_int 0))) 0))]
4927 "ld1\\t{%S0.1d - %U0.1d}, %1"
4928 [(set_attr "type" "neon_load1_3reg<q>")]
4931 (define_insn "aarch64_ld4<mode>_dreg"
4932 [(set (match_operand:XI 0 "register_operand" "=w")
4938 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4940 (vec_duplicate:VD (const_int 0)))
4942 (unspec:VD [(match_dup 1)]
4944 (vec_duplicate:VD (const_int 0))))
4947 (unspec:VD [(match_dup 1)]
4949 (vec_duplicate:VD (const_int 0)))
4951 (unspec:VD [(match_dup 1)]
4953 (vec_duplicate:VD (const_int 0))))) 0))]
4955 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4956 [(set_attr "type" "neon_load4_4reg<q>")]
4959 (define_insn "aarch64_ld4<mode>_dreg"
4960 [(set (match_operand:XI 0 "register_operand" "=w")
4966 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4970 (unspec:DX [(match_dup 1)]
4975 (unspec:DX [(match_dup 1)]
4979 (unspec:DX [(match_dup 1)]
4981 (const_int 0)))) 0))]
4983 "ld1\\t{%S0.1d - %V0.1d}, %1"
4984 [(set_attr "type" "neon_load1_4reg<q>")]
4987 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
4988 [(match_operand:VSTRUCT 0 "register_operand" "=w")
4989 (match_operand:DI 1 "register_operand" "r")
4990 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4993 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
4994 set_mem_size (mem, <VSTRUCT:nregs> * 8);
4996 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5000 (define_expand "aarch64_ld1<VALL_F16:mode>"
5001 [(match_operand:VALL_F16 0 "register_operand")
5002 (match_operand:DI 1 "register_operand")]
5005 machine_mode mode = <VALL_F16:MODE>mode;
5006 rtx mem = gen_rtx_MEM (mode, operands[1]);
5008 if (BYTES_BIG_ENDIAN)
5009 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5011 emit_move_insn (operands[0], mem);
5015 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5016 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5017 (match_operand:DI 1 "register_operand" "r")
5018 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5021 machine_mode mode = <VSTRUCT:MODE>mode;
5022 rtx mem = gen_rtx_MEM (mode, operands[1]);
5024 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5028 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5029 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5030 (match_operand:DI 1 "register_operand" "w")
5031 (match_operand:VSTRUCT 2 "register_operand" "0")
5032 (match_operand:SI 3 "immediate_operand" "i")
5033 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5036 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5037 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5040 aarch64_simd_lane_bounds (operands[3], 0,
5041 GET_MODE_NUNITS (<VALLDIF:MODE>mode),
5043 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5044 operands[0], mem, operands[2], operands[3]));
5048 ;; Expanders for builtins to extract vector registers from large
5049 ;; opaque integer modes.
5053 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5054 [(match_operand:VDC 0 "register_operand" "=w")
5055 (match_operand:VSTRUCT 1 "register_operand" "w")
5056 (match_operand:SI 2 "immediate_operand" "i")]
5059 int part = INTVAL (operands[2]);
5060 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5061 int offset = part * 16;
5063 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5064 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5070 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5071 [(match_operand:VQ 0 "register_operand" "=w")
5072 (match_operand:VSTRUCT 1 "register_operand" "w")
5073 (match_operand:SI 2 "immediate_operand" "i")]
5076 int part = INTVAL (operands[2]);
5077 int offset = part * 16;
5079 emit_move_insn (operands[0],
5080 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5084 ;; Permuted-store expanders for neon intrinsics.
5086 ;; Permute instructions
5090 (define_expand "vec_perm_const<mode>"
5091 [(match_operand:VALL_F16 0 "register_operand")
5092 (match_operand:VALL_F16 1 "register_operand")
5093 (match_operand:VALL_F16 2 "register_operand")
5094 (match_operand:<V_cmp_result> 3)]
5097 if (aarch64_expand_vec_perm_const (operands[0], operands[1],
5098 operands[2], operands[3]))
5104 (define_expand "vec_perm<mode>"
5105 [(match_operand:VB 0 "register_operand")
5106 (match_operand:VB 1 "register_operand")
5107 (match_operand:VB 2 "register_operand")
5108 (match_operand:VB 3 "register_operand")]
5111 aarch64_expand_vec_perm (operands[0], operands[1],
5112 operands[2], operands[3]);
5116 (define_insn "aarch64_tbl1<mode>"
5117 [(set (match_operand:VB 0 "register_operand" "=w")
5118 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5119 (match_operand:VB 2 "register_operand" "w")]
5122 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5123 [(set_attr "type" "neon_tbl1<q>")]
5126 ;; Two source registers.
5128 (define_insn "aarch64_tbl2v16qi"
5129 [(set (match_operand:V16QI 0 "register_operand" "=w")
5130 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5131 (match_operand:V16QI 2 "register_operand" "w")]
5134 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5135 [(set_attr "type" "neon_tbl2_q")]
5138 (define_insn "aarch64_tbl3<mode>"
5139 [(set (match_operand:VB 0 "register_operand" "=w")
5140 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5141 (match_operand:VB 2 "register_operand" "w")]
5144 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5145 [(set_attr "type" "neon_tbl3")]
5148 (define_insn "aarch64_tbx4<mode>"
5149 [(set (match_operand:VB 0 "register_operand" "=w")
5150 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5151 (match_operand:OI 2 "register_operand" "w")
5152 (match_operand:VB 3 "register_operand" "w")]
5155 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5156 [(set_attr "type" "neon_tbl4")]
5159 ;; Three source registers.
5161 (define_insn "aarch64_qtbl3<mode>"
5162 [(set (match_operand:VB 0 "register_operand" "=w")
5163 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5164 (match_operand:VB 2 "register_operand" "w")]
5167 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5168 [(set_attr "type" "neon_tbl3")]
5171 (define_insn "aarch64_qtbx3<mode>"
5172 [(set (match_operand:VB 0 "register_operand" "=w")
5173 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5174 (match_operand:CI 2 "register_operand" "w")
5175 (match_operand:VB 3 "register_operand" "w")]
5178 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5179 [(set_attr "type" "neon_tbl3")]
5182 ;; Four source registers.
5184 (define_insn "aarch64_qtbl4<mode>"
5185 [(set (match_operand:VB 0 "register_operand" "=w")
5186 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5187 (match_operand:VB 2 "register_operand" "w")]
5190 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5191 [(set_attr "type" "neon_tbl4")]
5194 (define_insn "aarch64_qtbx4<mode>"
5195 [(set (match_operand:VB 0 "register_operand" "=w")
5196 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5197 (match_operand:XI 2 "register_operand" "w")
5198 (match_operand:VB 3 "register_operand" "w")]
5201 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5202 [(set_attr "type" "neon_tbl4")]
5205 (define_insn_and_split "aarch64_combinev16qi"
5206 [(set (match_operand:OI 0 "register_operand" "=w")
5207 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5208 (match_operand:V16QI 2 "register_operand" "w")]
5212 "&& reload_completed"
5215 aarch64_split_combinev16qi (operands);
5218 [(set_attr "type" "multiple")]
5221 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5222 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5223 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5224 (match_operand:VALL_F16 2 "register_operand" "w")]
5227 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5228 [(set_attr "type" "neon_permute<q>")]
5231 ;; Note immediate (third) operand is lane index not byte index.
5232 (define_insn "aarch64_ext<mode>"
5233 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5234 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5235 (match_operand:VALL_F16 2 "register_operand" "w")
5236 (match_operand:SI 3 "immediate_operand" "i")]
5240 operands[3] = GEN_INT (INTVAL (operands[3])
5241 * GET_MODE_UNIT_SIZE (<MODE>mode));
5242 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5244 [(set_attr "type" "neon_ext<q>")]
5247 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5248 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5249 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5252 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5253 [(set_attr "type" "neon_rev<q>")]
5256 (define_insn "aarch64_st2<mode>_dreg"
5257 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5258 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5259 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5262 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5263 [(set_attr "type" "neon_store2_2reg")]
5266 (define_insn "aarch64_st2<mode>_dreg"
5267 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5268 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5269 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5272 "st1\\t{%S1.1d - %T1.1d}, %0"
5273 [(set_attr "type" "neon_store1_2reg")]
5276 (define_insn "aarch64_st3<mode>_dreg"
5277 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5278 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5279 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5282 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5283 [(set_attr "type" "neon_store3_3reg")]
5286 (define_insn "aarch64_st3<mode>_dreg"
5287 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5288 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5289 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5292 "st1\\t{%S1.1d - %U1.1d}, %0"
5293 [(set_attr "type" "neon_store1_3reg")]
5296 (define_insn "aarch64_st4<mode>_dreg"
5297 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5298 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5299 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5302 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5303 [(set_attr "type" "neon_store4_4reg")]
5306 (define_insn "aarch64_st4<mode>_dreg"
5307 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5308 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5309 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5312 "st1\\t{%S1.1d - %V1.1d}, %0"
5313 [(set_attr "type" "neon_store1_4reg")]
5316 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5317 [(match_operand:DI 0 "register_operand" "r")
5318 (match_operand:VSTRUCT 1 "register_operand" "w")
5319 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5322 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5323 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5325 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5329 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5330 [(match_operand:DI 0 "register_operand" "r")
5331 (match_operand:VSTRUCT 1 "register_operand" "w")
5332 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5335 machine_mode mode = <VSTRUCT:MODE>mode;
5336 rtx mem = gen_rtx_MEM (mode, operands[0]);
5338 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5342 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5343 [(match_operand:DI 0 "register_operand" "r")
5344 (match_operand:VSTRUCT 1 "register_operand" "w")
5345 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5346 (match_operand:SI 2 "immediate_operand")]
5349 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5350 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5353 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5354 mem, operands[1], operands[2]));
5358 (define_expand "aarch64_st1<VALL_F16:mode>"
5359 [(match_operand:DI 0 "register_operand")
5360 (match_operand:VALL_F16 1 "register_operand")]
5363 machine_mode mode = <VALL_F16:MODE>mode;
5364 rtx mem = gen_rtx_MEM (mode, operands[0]);
5366 if (BYTES_BIG_ENDIAN)
5367 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5369 emit_move_insn (mem, operands[1]);
5373 ;; Expander for builtins to insert vector registers into large
5374 ;; opaque integer modes.
5376 ;; Q-register list. We don't need a D-reg inserter as we zero
5377 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5379 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5380 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5381 (match_operand:VSTRUCT 1 "register_operand" "0")
5382 (match_operand:VQ 2 "register_operand" "w")
5383 (match_operand:SI 3 "immediate_operand" "i")]
5386 int part = INTVAL (operands[3]);
5387 int offset = part * 16;
5389 emit_move_insn (operands[0], operands[1]);
5390 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5395 ;; Standard pattern name vec_init<mode>.
5397 (define_expand "vec_init<mode>"
5398 [(match_operand:VALL_F16 0 "register_operand" "")
5399 (match_operand 1 "" "")]
5402 aarch64_expand_vector_init (operands[0], operands[1]);
5406 (define_insn "*aarch64_simd_ld1r<mode>"
5407 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5408 (vec_duplicate:VALL_F16
5409 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5411 "ld1r\\t{%0.<Vtype>}, %1"
5412 [(set_attr "type" "neon_load1_all_lanes")]
5415 (define_insn "aarch64_frecpe<mode>"
5416 [(set (match_operand:VDQF 0 "register_operand" "=w")
5417 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
5420 "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5421 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")]
5424 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5425 [(set (match_operand:GPF 0 "register_operand" "=w")
5426 (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
5429 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5430 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF:Vetype><GPF:q>")]
5433 (define_insn "aarch64_frecps<mode>"
5434 [(set (match_operand:VALLF 0 "register_operand" "=w")
5435 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
5436 (match_operand:VALLF 2 "register_operand" "w")]
5439 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5440 [(set_attr "type" "neon_fp_recps_<Vetype><q>")]
5443 (define_insn "aarch64_urecpe<mode>"
5444 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5445 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5448 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5449 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5451 ;; Standard pattern name vec_extract<mode>.
5453 (define_expand "vec_extract<mode>"
5454 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5455 (match_operand:VALL_F16 1 "register_operand" "")
5456 (match_operand:SI 2 "immediate_operand" "")]
5460 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5466 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5467 [(set (match_operand:V16QI 0 "register_operand" "=w")
5468 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5469 (match_operand:V16QI 2 "register_operand" "w")]
5471 "TARGET_SIMD && TARGET_CRYPTO"
5472 "aes<aes_op>\\t%0.16b, %2.16b"
5473 [(set_attr "type" "crypto_aese")]
5476 ;; When AES/AESMC fusion is enabled we want the register allocation to
5480 ;; So prefer to tie operand 1 to operand 0 when fusing.
5482 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5483 [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5484 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5486 "TARGET_SIMD && TARGET_CRYPTO"
5487 "aes<aesmc_op>\\t%0.16b, %1.16b"
5488 [(set_attr "type" "crypto_aesmc")
5489 (set_attr_alternative "enabled"
5490 [(if_then_else (match_test
5491 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5492 (const_string "yes" )
5493 (const_string "no"))
5494 (const_string "yes")])]
5499 (define_insn "aarch64_crypto_sha1hsi"
5500 [(set (match_operand:SI 0 "register_operand" "=w")
5501 (unspec:SI [(match_operand:SI 1
5502 "register_operand" "w")]
5504 "TARGET_SIMD && TARGET_CRYPTO"
5506 [(set_attr "type" "crypto_sha1_fast")]
5509 (define_insn "aarch64_crypto_sha1su1v4si"
5510 [(set (match_operand:V4SI 0 "register_operand" "=w")
5511 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5512 (match_operand:V4SI 2 "register_operand" "w")]
5514 "TARGET_SIMD && TARGET_CRYPTO"
5515 "sha1su1\\t%0.4s, %2.4s"
5516 [(set_attr "type" "crypto_sha1_fast")]
5519 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5520 [(set (match_operand:V4SI 0 "register_operand" "=w")
5521 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5522 (match_operand:SI 2 "register_operand" "w")
5523 (match_operand:V4SI 3 "register_operand" "w")]
5525 "TARGET_SIMD && TARGET_CRYPTO"
5526 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5527 [(set_attr "type" "crypto_sha1_slow")]
5530 (define_insn "aarch64_crypto_sha1su0v4si"
5531 [(set (match_operand:V4SI 0 "register_operand" "=w")
5532 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5533 (match_operand:V4SI 2 "register_operand" "w")
5534 (match_operand:V4SI 3 "register_operand" "w")]
5536 "TARGET_SIMD && TARGET_CRYPTO"
5537 "sha1su0\\t%0.4s, %2.4s, %3.4s"
5538 [(set_attr "type" "crypto_sha1_xor")]
5543 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
5544 [(set (match_operand:V4SI 0 "register_operand" "=w")
5545 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5546 (match_operand:V4SI 2 "register_operand" "w")
5547 (match_operand:V4SI 3 "register_operand" "w")]
5549 "TARGET_SIMD && TARGET_CRYPTO"
5550 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5551 [(set_attr "type" "crypto_sha256_slow")]
5554 (define_insn "aarch64_crypto_sha256su0v4si"
5555 [(set (match_operand:V4SI 0 "register_operand" "=w")
5556 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5557 (match_operand:V4SI 2 "register_operand" "w")]
5559 "TARGET_SIMD &&TARGET_CRYPTO"
5560 "sha256su0\\t%0.4s, %2.4s"
5561 [(set_attr "type" "crypto_sha256_fast")]
5564 (define_insn "aarch64_crypto_sha256su1v4si"
5565 [(set (match_operand:V4SI 0 "register_operand" "=w")
5566 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5567 (match_operand:V4SI 2 "register_operand" "w")
5568 (match_operand:V4SI 3 "register_operand" "w")]
5570 "TARGET_SIMD &&TARGET_CRYPTO"
5571 "sha256su1\\t%0.4s, %2.4s, %3.4s"
5572 [(set_attr "type" "crypto_sha256_slow")]
5577 (define_insn "aarch64_crypto_pmulldi"
5578 [(set (match_operand:TI 0 "register_operand" "=w")
5579 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
5580 (match_operand:DI 2 "register_operand" "w")]
5582 "TARGET_SIMD && TARGET_CRYPTO"
5583 "pmull\\t%0.1q, %1.1d, %2.1d"
5584 [(set_attr "type" "neon_mul_d_long")]
5587 (define_insn "aarch64_crypto_pmullv2di"
5588 [(set (match_operand:TI 0 "register_operand" "=w")
5589 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
5590 (match_operand:V2DI 2 "register_operand" "w")]
5592 "TARGET_SIMD && TARGET_CRYPTO"
5593 "pmull2\\t%0.1q, %1.2d, %2.2d"
5594 [(set_attr "type" "neon_mul_d_long")]