1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2016 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16 1 "general_operand" ""))]
26 if (GET_CODE (operands[0]) == MEM)
27 operands[1] = force_reg (<MODE>mode, operands[1]);
31 (define_expand "movmisalign<mode>"
32 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
33 (match_operand:VALL 1 "general_operand" ""))]
36 /* This pattern is not permitted to fail during expansion: if both arguments
37 are non-registers (e.g. memory := constant, which can be created by the
38 auto-vectorizer), force operand 1 into a register. */
39 if (!register_operand (operands[0], <MODE>mode)
40 && !register_operand (operands[1], <MODE>mode))
41 operands[1] = force_reg (<MODE>mode, operands[1]);
44 (define_insn "aarch64_simd_dup<mode>"
45 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
47 (match_operand:<VEL> 1 "register_operand" "r, w")))]
50 dup\\t%0.<Vtype>, %<vw>1
51 dup\\t%0.<Vtype>, %1.<Vetype>[0]"
52 [(set_attr "type" "neon_from_gp<q>, neon_dup<q>")]
55 (define_insn "aarch64_simd_dup<mode>"
56 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
57 (vec_duplicate:VDQF_F16
58 (match_operand:<VEL> 1 "register_operand" "w")))]
60 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
61 [(set_attr "type" "neon_dup<q>")]
64 (define_insn "aarch64_dup_lane<mode>"
65 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
66 (vec_duplicate:VALL_F16
68 (match_operand:VALL_F16 1 "register_operand" "w")
69 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
73 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
74 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
76 [(set_attr "type" "neon_dup<q>")]
79 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
80 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
81 (vec_duplicate:VALL_F16
83 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
84 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
88 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
89 INTVAL (operands[2])));
90 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
92 [(set_attr "type" "neon_dup<q>")]
95 (define_insn "*aarch64_simd_mov<mode>"
96 [(set (match_operand:VD 0 "nonimmediate_operand"
97 "=w, m, w, ?r, ?w, ?r, w")
98 (match_operand:VD 1 "general_operand"
99 "m, w, w, w, r, r, Dn"))]
101 && (register_operand (operands[0], <MODE>mode)
102 || register_operand (operands[1], <MODE>mode))"
104 switch (which_alternative)
106 case 0: return "ldr\\t%d0, %1";
107 case 1: return "str\\t%d1, %0";
108 case 2: return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
109 case 3: return "umov\t%0, %1.d[0]";
110 case 4: return "ins\t%0.d[0], %1";
111 case 5: return "mov\t%0, %1";
113 return aarch64_output_simd_mov_immediate (operands[1],
115 default: gcc_unreachable ();
118 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
119 neon_logic<q>, neon_to_gp<q>, neon_from_gp<q>,\
120 mov_reg, neon_move<q>")]
123 (define_insn "*aarch64_simd_mov<mode>"
124 [(set (match_operand:VQ 0 "nonimmediate_operand"
125 "=w, m, w, ?r, ?w, ?r, w")
126 (match_operand:VQ 1 "general_operand"
127 "m, w, w, w, r, r, Dn"))]
129 && (register_operand (operands[0], <MODE>mode)
130 || register_operand (operands[1], <MODE>mode))"
132 switch (which_alternative)
135 return "ldr\\t%q0, %1";
137 return "str\\t%q1, %0";
139 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
145 return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
150 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
151 neon_logic<q>, multiple, multiple, multiple,\
153 (set_attr "length" "4,4,4,8,8,8,4")]
156 (define_insn "load_pair<mode>"
157 [(set (match_operand:VD 0 "register_operand" "=w")
158 (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
159 (set (match_operand:VD 2 "register_operand" "=w")
160 (match_operand:VD 3 "memory_operand" "m"))]
162 && rtx_equal_p (XEXP (operands[3], 0),
163 plus_constant (Pmode,
164 XEXP (operands[1], 0),
165 GET_MODE_SIZE (<MODE>mode)))"
167 [(set_attr "type" "neon_ldp")]
170 (define_insn "store_pair<mode>"
171 [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
172 (match_operand:VD 1 "register_operand" "w"))
173 (set (match_operand:VD 2 "memory_operand" "=m")
174 (match_operand:VD 3 "register_operand" "w"))]
176 && rtx_equal_p (XEXP (operands[2], 0),
177 plus_constant (Pmode,
178 XEXP (operands[0], 0),
179 GET_MODE_SIZE (<MODE>mode)))"
181 [(set_attr "type" "neon_stp")]
185 [(set (match_operand:VQ 0 "register_operand" "")
186 (match_operand:VQ 1 "register_operand" ""))]
187 "TARGET_SIMD && reload_completed
188 && GP_REGNUM_P (REGNO (operands[0]))
189 && GP_REGNUM_P (REGNO (operands[1]))"
192 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
197 [(set (match_operand:VQ 0 "register_operand" "")
198 (match_operand:VQ 1 "register_operand" ""))]
199 "TARGET_SIMD && reload_completed
200 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
201 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
204 aarch64_split_simd_move (operands[0], operands[1]);
208 (define_expand "aarch64_split_simd_mov<mode>"
209 [(set (match_operand:VQ 0)
210 (match_operand:VQ 1))]
213 rtx dst = operands[0];
214 rtx src = operands[1];
216 if (GP_REGNUM_P (REGNO (src)))
218 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
219 rtx src_high_part = gen_highpart (<VHALF>mode, src);
222 (gen_move_lo_quad_<mode> (dst, src_low_part));
224 (gen_move_hi_quad_<mode> (dst, src_high_part));
229 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
230 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
231 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
232 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
235 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
237 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
243 (define_insn "aarch64_simd_mov_from_<mode>low"
244 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
246 (match_operand:VQ 1 "register_operand" "w")
247 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
248 "TARGET_SIMD && reload_completed"
250 [(set_attr "type" "neon_to_gp<q>")
251 (set_attr "length" "4")
254 (define_insn "aarch64_simd_mov_from_<mode>high"
255 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
257 (match_operand:VQ 1 "register_operand" "w")
258 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
259 "TARGET_SIMD && reload_completed"
261 [(set_attr "type" "neon_to_gp<q>")
262 (set_attr "length" "4")
265 (define_insn "orn<mode>3"
266 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
267 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
268 (match_operand:VDQ_I 2 "register_operand" "w")))]
270 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
271 [(set_attr "type" "neon_logic<q>")]
274 (define_insn "bic<mode>3"
275 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
276 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
277 (match_operand:VDQ_I 2 "register_operand" "w")))]
279 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
280 [(set_attr "type" "neon_logic<q>")]
283 (define_insn "add<mode>3"
284 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
285 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
286 (match_operand:VDQ_I 2 "register_operand" "w")))]
288 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
289 [(set_attr "type" "neon_add<q>")]
292 (define_insn "sub<mode>3"
293 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
294 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
295 (match_operand:VDQ_I 2 "register_operand" "w")))]
297 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
298 [(set_attr "type" "neon_sub<q>")]
301 (define_insn "mul<mode>3"
302 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
303 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
304 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
306 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
307 [(set_attr "type" "neon_mul_<Vetype><q>")]
310 (define_insn "bswap<mode>2"
311 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
312 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
314 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
315 [(set_attr "type" "neon_rev<q>")]
318 (define_insn "aarch64_rbit<mode>"
319 [(set (match_operand:VB 0 "register_operand" "=w")
320 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
323 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
324 [(set_attr "type" "neon_rbit")]
327 (define_expand "ctz<mode>2"
328 [(set (match_operand:VS 0 "register_operand")
329 (ctz:VS (match_operand:VS 1 "register_operand")))]
332 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
333 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
335 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
336 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
341 (define_insn "*aarch64_mul3_elt<mode>"
342 [(set (match_operand:VMUL 0 "register_operand" "=w")
346 (match_operand:VMUL 1 "register_operand" "<h_con>")
347 (parallel [(match_operand:SI 2 "immediate_operand")])))
348 (match_operand:VMUL 3 "register_operand" "w")))]
351 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
352 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
354 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
357 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
358 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
359 (mult:VMUL_CHANGE_NLANES
360 (vec_duplicate:VMUL_CHANGE_NLANES
362 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
363 (parallel [(match_operand:SI 2 "immediate_operand")])))
364 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
367 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
368 INTVAL (operands[2])));
369 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
371 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
374 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
375 [(set (match_operand:VMUL 0 "register_operand" "=w")
378 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
379 (match_operand:VMUL 2 "register_operand" "w")))]
381 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
382 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
385 (define_insn "aarch64_rsqrte<mode>"
386 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
387 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
390 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
391 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
393 (define_insn "aarch64_rsqrts<mode>"
394 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
395 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
396 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
399 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
400 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
402 (define_expand "rsqrt<mode>2"
403 [(set (match_operand:VALLF 0 "register_operand" "=w")
404 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
408 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
412 (define_insn "*aarch64_mul3_elt_to_64v2df"
413 [(set (match_operand:DF 0 "register_operand" "=w")
416 (match_operand:V2DF 1 "register_operand" "w")
417 (parallel [(match_operand:SI 2 "immediate_operand")]))
418 (match_operand:DF 3 "register_operand" "w")))]
421 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
422 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
424 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
427 (define_insn "neg<mode>2"
428 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
429 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
431 "neg\t%0.<Vtype>, %1.<Vtype>"
432 [(set_attr "type" "neon_neg<q>")]
435 (define_insn "abs<mode>2"
436 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
437 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
439 "abs\t%0.<Vtype>, %1.<Vtype>"
440 [(set_attr "type" "neon_abs<q>")]
443 ;; The intrinsic version of integer ABS must not be allowed to
444 ;; combine with any operation with an integerated ABS step, such
446 (define_insn "aarch64_abs<mode>"
447 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
449 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
452 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
453 [(set_attr "type" "neon_abs<q>")]
456 (define_insn "abd<mode>_3"
457 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
458 (abs:VDQ_BHSI (minus:VDQ_BHSI
459 (match_operand:VDQ_BHSI 1 "register_operand" "w")
460 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
462 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
463 [(set_attr "type" "neon_abd<q>")]
466 (define_insn "aba<mode>_3"
467 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
468 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
469 (match_operand:VDQ_BHSI 1 "register_operand" "w")
470 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
471 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
473 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
474 [(set_attr "type" "neon_arith_acc<q>")]
477 (define_insn "fabd<mode>3"
478 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
481 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
482 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
484 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
485 [(set_attr "type" "neon_fp_abd_<stype><q>")]
488 (define_insn "and<mode>3"
489 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
490 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
491 (match_operand:VDQ_I 2 "register_operand" "w")))]
493 "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
494 [(set_attr "type" "neon_logic<q>")]
497 (define_insn "ior<mode>3"
498 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
499 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
500 (match_operand:VDQ_I 2 "register_operand" "w")))]
502 "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
503 [(set_attr "type" "neon_logic<q>")]
506 (define_insn "xor<mode>3"
507 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
508 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
509 (match_operand:VDQ_I 2 "register_operand" "w")))]
511 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
512 [(set_attr "type" "neon_logic<q>")]
515 (define_insn "one_cmpl<mode>2"
516 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
517 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
519 "not\t%0.<Vbtype>, %1.<Vbtype>"
520 [(set_attr "type" "neon_logic<q>")]
523 (define_insn "aarch64_simd_vec_set<mode>"
524 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
526 (vec_duplicate:VDQ_BHSI
527 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
528 (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
529 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
532 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
533 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
534 switch (which_alternative)
537 return "ins\\t%0.<Vetype>[%p2], %w1";
539 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
541 return "ld1\\t{%0.<Vetype>}[%p2], %1";
546 [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_1reg<q>")]
549 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
550 [(set (match_operand:VALL 0 "register_operand" "=w")
554 (match_operand:VALL 3 "register_operand" "w")
556 [(match_operand:SI 4 "immediate_operand" "i")])))
557 (match_operand:VALL 1 "register_operand" "0")
558 (match_operand:SI 2 "immediate_operand" "i")))]
561 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
562 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
563 operands[4] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[4])));
565 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
567 [(set_attr "type" "neon_ins<q>")]
570 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
571 [(set (match_operand:VALL 0 "register_operand" "=w")
575 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
577 [(match_operand:SI 4 "immediate_operand" "i")])))
578 (match_operand:VALL 1 "register_operand" "0")
579 (match_operand:SI 2 "immediate_operand" "i")))]
582 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
583 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
584 operands[4] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
585 INTVAL (operands[4])));
587 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
589 [(set_attr "type" "neon_ins<q>")]
592 (define_insn "aarch64_simd_lshr<mode>"
593 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
594 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
595 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
597 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
598 [(set_attr "type" "neon_shift_imm<q>")]
601 (define_insn "aarch64_simd_ashr<mode>"
602 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
603 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
604 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
606 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
607 [(set_attr "type" "neon_shift_imm<q>")]
610 (define_insn "aarch64_simd_imm_shl<mode>"
611 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
612 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
613 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
615 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
616 [(set_attr "type" "neon_shift_imm<q>")]
619 (define_insn "aarch64_simd_reg_sshl<mode>"
620 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
621 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
622 (match_operand:VDQ_I 2 "register_operand" "w")))]
624 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
625 [(set_attr "type" "neon_shift_reg<q>")]
628 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
629 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
630 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
631 (match_operand:VDQ_I 2 "register_operand" "w")]
632 UNSPEC_ASHIFT_UNSIGNED))]
634 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
635 [(set_attr "type" "neon_shift_reg<q>")]
638 (define_insn "aarch64_simd_reg_shl<mode>_signed"
639 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
640 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
641 (match_operand:VDQ_I 2 "register_operand" "w")]
642 UNSPEC_ASHIFT_SIGNED))]
644 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
645 [(set_attr "type" "neon_shift_reg<q>")]
648 (define_expand "ashl<mode>3"
649 [(match_operand:VDQ_I 0 "register_operand" "")
650 (match_operand:VDQ_I 1 "register_operand" "")
651 (match_operand:SI 2 "general_operand" "")]
654 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
657 if (CONST_INT_P (operands[2]))
659 shift_amount = INTVAL (operands[2]);
660 if (shift_amount >= 0 && shift_amount < bit_width)
662 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
664 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
671 operands[2] = force_reg (SImode, operands[2]);
674 else if (MEM_P (operands[2]))
676 operands[2] = force_reg (SImode, operands[2]);
679 if (REG_P (operands[2]))
681 rtx tmp = gen_reg_rtx (<MODE>mode);
682 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
683 convert_to_mode (<VEL>mode,
686 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
695 (define_expand "lshr<mode>3"
696 [(match_operand:VDQ_I 0 "register_operand" "")
697 (match_operand:VDQ_I 1 "register_operand" "")
698 (match_operand:SI 2 "general_operand" "")]
701 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
704 if (CONST_INT_P (operands[2]))
706 shift_amount = INTVAL (operands[2]);
707 if (shift_amount > 0 && shift_amount <= bit_width)
709 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
711 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
717 operands[2] = force_reg (SImode, operands[2]);
719 else if (MEM_P (operands[2]))
721 operands[2] = force_reg (SImode, operands[2]);
724 if (REG_P (operands[2]))
726 rtx tmp = gen_reg_rtx (SImode);
727 rtx tmp1 = gen_reg_rtx (<MODE>mode);
728 emit_insn (gen_negsi2 (tmp, operands[2]));
729 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
730 convert_to_mode (<VEL>mode,
732 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
742 (define_expand "ashr<mode>3"
743 [(match_operand:VDQ_I 0 "register_operand" "")
744 (match_operand:VDQ_I 1 "register_operand" "")
745 (match_operand:SI 2 "general_operand" "")]
748 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
751 if (CONST_INT_P (operands[2]))
753 shift_amount = INTVAL (operands[2]);
754 if (shift_amount > 0 && shift_amount <= bit_width)
756 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
758 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
764 operands[2] = force_reg (SImode, operands[2]);
766 else if (MEM_P (operands[2]))
768 operands[2] = force_reg (SImode, operands[2]);
771 if (REG_P (operands[2]))
773 rtx tmp = gen_reg_rtx (SImode);
774 rtx tmp1 = gen_reg_rtx (<MODE>mode);
775 emit_insn (gen_negsi2 (tmp, operands[2]));
776 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
777 convert_to_mode (<VEL>mode,
779 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
789 (define_expand "vashl<mode>3"
790 [(match_operand:VDQ_I 0 "register_operand" "")
791 (match_operand:VDQ_I 1 "register_operand" "")
792 (match_operand:VDQ_I 2 "register_operand" "")]
795 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
800 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
801 ;; Negating individual lanes most certainly offsets the
802 ;; gain from vectorization.
803 (define_expand "vashr<mode>3"
804 [(match_operand:VDQ_BHSI 0 "register_operand" "")
805 (match_operand:VDQ_BHSI 1 "register_operand" "")
806 (match_operand:VDQ_BHSI 2 "register_operand" "")]
809 rtx neg = gen_reg_rtx (<MODE>mode);
810 emit (gen_neg<mode>2 (neg, operands[2]));
811 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
817 (define_expand "aarch64_ashr_simddi"
818 [(match_operand:DI 0 "register_operand" "=w")
819 (match_operand:DI 1 "register_operand" "w")
820 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
823 /* An arithmetic shift right by 64 fills the result with copies of the sign
824 bit, just like asr by 63 - however the standard pattern does not handle
826 if (INTVAL (operands[2]) == 64)
827 operands[2] = GEN_INT (63);
828 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
833 (define_expand "vlshr<mode>3"
834 [(match_operand:VDQ_BHSI 0 "register_operand" "")
835 (match_operand:VDQ_BHSI 1 "register_operand" "")
836 (match_operand:VDQ_BHSI 2 "register_operand" "")]
839 rtx neg = gen_reg_rtx (<MODE>mode);
840 emit (gen_neg<mode>2 (neg, operands[2]));
841 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
846 (define_expand "aarch64_lshr_simddi"
847 [(match_operand:DI 0 "register_operand" "=w")
848 (match_operand:DI 1 "register_operand" "w")
849 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
852 if (INTVAL (operands[2]) == 64)
853 emit_move_insn (operands[0], const0_rtx);
855 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
860 (define_expand "vec_set<mode>"
861 [(match_operand:VDQ_BHSI 0 "register_operand")
862 (match_operand:<VEL> 1 "register_operand")
863 (match_operand:SI 2 "immediate_operand")]
866 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
867 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
868 GEN_INT (elem), operands[0]));
873 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
874 (define_insn "vec_shr_<mode>"
875 [(set (match_operand:VD 0 "register_operand" "=w")
876 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
877 (match_operand:SI 2 "immediate_operand" "i")]
881 if (BYTES_BIG_ENDIAN)
882 return "shl %d0, %d1, %2";
884 return "ushr %d0, %d1, %2";
886 [(set_attr "type" "neon_shift_imm")]
889 (define_insn "aarch64_simd_vec_setv2di"
890 [(set (match_operand:V2DI 0 "register_operand" "=w,w")
893 (match_operand:DI 1 "register_operand" "r,w"))
894 (match_operand:V2DI 3 "register_operand" "0,0")
895 (match_operand:SI 2 "immediate_operand" "i,i")))]
898 int elt = ENDIAN_LANE_N (V2DImode, exact_log2 (INTVAL (operands[2])));
899 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
900 switch (which_alternative)
903 return "ins\\t%0.d[%p2], %1";
905 return "ins\\t%0.d[%p2], %1.d[0]";
910 [(set_attr "type" "neon_from_gp, neon_ins_q")]
913 (define_expand "vec_setv2di"
914 [(match_operand:V2DI 0 "register_operand")
915 (match_operand:DI 1 "register_operand")
916 (match_operand:SI 2 "immediate_operand")]
919 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
920 emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
921 GEN_INT (elem), operands[0]));
926 (define_insn "aarch64_simd_vec_set<mode>"
927 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
929 (vec_duplicate:VDQF_F16
930 (match_operand:<VEL> 1 "register_operand" "w"))
931 (match_operand:VDQF_F16 3 "register_operand" "0")
932 (match_operand:SI 2 "immediate_operand" "i")))]
935 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
937 operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
938 return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
940 [(set_attr "type" "neon_ins<q>")]
943 (define_expand "vec_set<mode>"
944 [(match_operand:VDQF_F16 0 "register_operand" "+w")
945 (match_operand:<VEL> 1 "register_operand" "w")
946 (match_operand:SI 2 "immediate_operand" "")]
949 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
950 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
951 GEN_INT (elem), operands[0]));
957 (define_insn "aarch64_mla<mode>"
958 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
959 (plus:VDQ_BHSI (mult:VDQ_BHSI
960 (match_operand:VDQ_BHSI 2 "register_operand" "w")
961 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
962 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
964 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
965 [(set_attr "type" "neon_mla_<Vetype><q>")]
968 (define_insn "*aarch64_mla_elt<mode>"
969 [(set (match_operand:VDQHS 0 "register_operand" "=w")
974 (match_operand:VDQHS 1 "register_operand" "<h_con>")
975 (parallel [(match_operand:SI 2 "immediate_operand")])))
976 (match_operand:VDQHS 3 "register_operand" "w"))
977 (match_operand:VDQHS 4 "register_operand" "0")))]
980 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
981 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
983 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
986 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
987 [(set (match_operand:VDQHS 0 "register_operand" "=w")
992 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
993 (parallel [(match_operand:SI 2 "immediate_operand")])))
994 (match_operand:VDQHS 3 "register_operand" "w"))
995 (match_operand:VDQHS 4 "register_operand" "0")))]
998 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
999 INTVAL (operands[2])));
1000 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1002 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1005 (define_insn "aarch64_mls<mode>"
1006 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1007 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1008 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1009 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1011 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1012 [(set_attr "type" "neon_mla_<Vetype><q>")]
1015 (define_insn "*aarch64_mls_elt<mode>"
1016 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1018 (match_operand:VDQHS 4 "register_operand" "0")
1020 (vec_duplicate:VDQHS
1022 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1023 (parallel [(match_operand:SI 2 "immediate_operand")])))
1024 (match_operand:VDQHS 3 "register_operand" "w"))))]
1027 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1028 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1030 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1033 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1034 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1036 (match_operand:VDQHS 4 "register_operand" "0")
1038 (vec_duplicate:VDQHS
1040 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1041 (parallel [(match_operand:SI 2 "immediate_operand")])))
1042 (match_operand:VDQHS 3 "register_operand" "w"))))]
1045 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1046 INTVAL (operands[2])));
1047 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1049 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1052 ;; Max/Min operations.
1053 (define_insn "<su><maxmin><mode>3"
1054 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1055 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1056 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1058 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1059 [(set_attr "type" "neon_minmax<q>")]
1062 (define_expand "<su><maxmin>v2di3"
1063 [(set (match_operand:V2DI 0 "register_operand" "")
1064 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1065 (match_operand:V2DI 2 "register_operand" "")))]
1068 enum rtx_code cmp_operator;
1089 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1090 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1091 operands[2], cmp_fmt, operands[1], operands[2]));
1095 ;; Pairwise Integer Max/Min operations.
1096 (define_insn "aarch64_<maxmin_uns>p<mode>"
1097 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1098 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1099 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1102 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1103 [(set_attr "type" "neon_minmax<q>")]
1106 ;; Pairwise FP Max/Min operations.
1107 (define_insn "aarch64_<maxmin_uns>p<mode>"
1108 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1109 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1110 (match_operand:VHSDF 2 "register_operand" "w")]
1113 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1114 [(set_attr "type" "neon_minmax<q>")]
1117 ;; vec_concat gives a new vector with the low elements from operand 1, and
1118 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1119 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1120 ;; What that means, is that the RTL descriptions of the below patterns
1121 ;; need to change depending on endianness.
1123 ;; Move to the low architectural bits of the register.
1124 ;; On little-endian this is { operand, zeroes }
1125 ;; On big-endian this is { zeroes, operand }
1127 (define_insn "move_lo_quad_internal_<mode>"
1128 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1130 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1131 (vec_duplicate:<VHALF> (const_int 0))))]
1132 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1137 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1138 (set_attr "simd" "yes,*,yes")
1139 (set_attr "fp" "*,yes,*")
1140 (set_attr "length" "4")]
1143 (define_insn "move_lo_quad_internal_<mode>"
1144 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1146 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1148 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1153 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1154 (set_attr "simd" "yes,*,yes")
1155 (set_attr "fp" "*,yes,*")
1156 (set_attr "length" "4")]
1159 (define_insn "move_lo_quad_internal_be_<mode>"
1160 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1162 (vec_duplicate:<VHALF> (const_int 0))
1163 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1164 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1169 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1170 (set_attr "simd" "yes,*,yes")
1171 (set_attr "fp" "*,yes,*")
1172 (set_attr "length" "4")]
1175 (define_insn "move_lo_quad_internal_be_<mode>"
1176 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1179 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1180 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1185 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1186 (set_attr "simd" "yes,*,yes")
1187 (set_attr "fp" "*,yes,*")
1188 (set_attr "length" "4")]
1191 (define_expand "move_lo_quad_<mode>"
1192 [(match_operand:VQ 0 "register_operand")
1193 (match_operand:VQ 1 "register_operand")]
1196 if (BYTES_BIG_ENDIAN)
1197 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1199 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1204 ;; Move operand1 to the high architectural bits of the register, keeping
1205 ;; the low architectural bits of operand2.
1206 ;; For little-endian this is { operand2, operand1 }
1207 ;; For big-endian this is { operand1, operand2 }
1209 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1210 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1214 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1215 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1216 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1218 ins\\t%0.d[1], %1.d[0]
1220 [(set_attr "type" "neon_ins")]
1223 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1224 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1226 (match_operand:<VHALF> 1 "register_operand" "w,r")
1229 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1230 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1232 ins\\t%0.d[1], %1.d[0]
1234 [(set_attr "type" "neon_ins")]
1237 (define_expand "move_hi_quad_<mode>"
1238 [(match_operand:VQ 0 "register_operand" "")
1239 (match_operand:<VHALF> 1 "register_operand" "")]
1242 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1243 if (BYTES_BIG_ENDIAN)
1244 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1247 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1252 ;; Narrowing operations.
1255 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1256 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1257 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1259 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1260 [(set_attr "type" "neon_shift_imm_narrow_q")]
1263 (define_expand "vec_pack_trunc_<mode>"
1264 [(match_operand:<VNARROWD> 0 "register_operand" "")
1265 (match_operand:VDN 1 "register_operand" "")
1266 (match_operand:VDN 2 "register_operand" "")]
1269 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1270 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1271 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1273 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1274 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1275 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1281 (define_insn "vec_pack_trunc_<mode>"
1282 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1283 (vec_concat:<VNARROWQ2>
1284 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1285 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1288 if (BYTES_BIG_ENDIAN)
1289 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1291 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1293 [(set_attr "type" "multiple")
1294 (set_attr "length" "8")]
1297 ;; Widening operations.
1299 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1300 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1301 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1302 (match_operand:VQW 1 "register_operand" "w")
1303 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1306 "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1307 [(set_attr "type" "neon_shift_imm_long")]
1310 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1311 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1312 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1313 (match_operand:VQW 1 "register_operand" "w")
1314 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1317 "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1318 [(set_attr "type" "neon_shift_imm_long")]
1321 (define_expand "vec_unpack<su>_hi_<mode>"
1322 [(match_operand:<VWIDE> 0 "register_operand" "")
1323 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1326 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1327 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1333 (define_expand "vec_unpack<su>_lo_<mode>"
1334 [(match_operand:<VWIDE> 0 "register_operand" "")
1335 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1338 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1339 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1345 ;; Widening arithmetic.
1347 (define_insn "*aarch64_<su>mlal_lo<mode>"
1348 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1351 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1352 (match_operand:VQW 2 "register_operand" "w")
1353 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1354 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1355 (match_operand:VQW 4 "register_operand" "w")
1357 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1359 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1360 [(set_attr "type" "neon_mla_<Vetype>_long")]
1363 (define_insn "*aarch64_<su>mlal_hi<mode>"
1364 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1367 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1368 (match_operand:VQW 2 "register_operand" "w")
1369 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1370 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1371 (match_operand:VQW 4 "register_operand" "w")
1373 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1375 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1376 [(set_attr "type" "neon_mla_<Vetype>_long")]
1379 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1380 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1382 (match_operand:<VWIDE> 1 "register_operand" "0")
1384 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1385 (match_operand:VQW 2 "register_operand" "w")
1386 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1387 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1388 (match_operand:VQW 4 "register_operand" "w")
1391 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1392 [(set_attr "type" "neon_mla_<Vetype>_long")]
1395 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1396 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1398 (match_operand:<VWIDE> 1 "register_operand" "0")
1400 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1401 (match_operand:VQW 2 "register_operand" "w")
1402 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1403 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1404 (match_operand:VQW 4 "register_operand" "w")
1407 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1408 [(set_attr "type" "neon_mla_<Vetype>_long")]
1411 (define_insn "*aarch64_<su>mlal<mode>"
1412 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1416 (match_operand:VD_BHSI 1 "register_operand" "w"))
1418 (match_operand:VD_BHSI 2 "register_operand" "w")))
1419 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1421 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1422 [(set_attr "type" "neon_mla_<Vetype>_long")]
1425 (define_insn "*aarch64_<su>mlsl<mode>"
1426 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1428 (match_operand:<VWIDE> 1 "register_operand" "0")
1431 (match_operand:VD_BHSI 2 "register_operand" "w"))
1433 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1435 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1436 [(set_attr "type" "neon_mla_<Vetype>_long")]
1439 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1440 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1441 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1442 (match_operand:VQW 1 "register_operand" "w")
1443 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1444 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1445 (match_operand:VQW 2 "register_operand" "w")
1448 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1449 [(set_attr "type" "neon_mul_<Vetype>_long")]
1452 (define_expand "vec_widen_<su>mult_lo_<mode>"
1453 [(match_operand:<VWIDE> 0 "register_operand" "")
1454 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1455 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1458 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1459 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1466 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1467 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1468 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1469 (match_operand:VQW 1 "register_operand" "w")
1470 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1471 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1472 (match_operand:VQW 2 "register_operand" "w")
1475 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1476 [(set_attr "type" "neon_mul_<Vetype>_long")]
1479 (define_expand "vec_widen_<su>mult_hi_<mode>"
1480 [(match_operand:<VWIDE> 0 "register_operand" "")
1481 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1482 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1485 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1486 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1494 ;; FP vector operations.
1495 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1496 ;; double-precision (64-bit) floating-point data types and arithmetic as
1497 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1498 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1500 ;; Floating-point operations can raise an exception. Vectorizing such
1501 ;; operations are safe because of reasons explained below.
1503 ;; ARMv8 permits an extension to enable trapped floating-point
1504 ;; exception handling, however this is an optional feature. In the
1505 ;; event of a floating-point exception being raised by vectorised
1507 ;; 1. If trapped floating-point exceptions are available, then a trap
1508 ;; will be taken when any lane raises an enabled exception. A trap
1509 ;; handler may determine which lane raised the exception.
1510 ;; 2. Alternatively a sticky exception flag is set in the
1511 ;; floating-point status register (FPSR). Software may explicitly
1512 ;; test the exception flags, in which case the tests will either
1513 ;; prevent vectorisation, allowing precise identification of the
1514 ;; failing operation, or if tested outside of vectorisable regions
1515 ;; then the specific operation and lane are not of interest.
1517 ;; FP arithmetic operations.
1519 (define_insn "add<mode>3"
1520 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1521 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1522 (match_operand:VHSDF 2 "register_operand" "w")))]
1524 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1525 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1528 (define_insn "sub<mode>3"
1529 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1530 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1531 (match_operand:VHSDF 2 "register_operand" "w")))]
1533 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1534 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1537 (define_insn "mul<mode>3"
1538 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1539 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1540 (match_operand:VHSDF 2 "register_operand" "w")))]
1542 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1543 [(set_attr "type" "neon_fp_mul_<stype><q>")]
1546 (define_expand "div<mode>3"
1547 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1548 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1549 (match_operand:VHSDF 2 "register_operand" "w")))]
1552 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1555 operands[1] = force_reg (<MODE>mode, operands[1]);
1558 (define_insn "*div<mode>3"
1559 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1560 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1561 (match_operand:VHSDF 2 "register_operand" "w")))]
1563 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1564 [(set_attr "type" "neon_fp_div_<stype><q>")]
1567 (define_insn "neg<mode>2"
1568 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1569 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1571 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1572 [(set_attr "type" "neon_fp_neg_<stype><q>")]
1575 (define_insn "abs<mode>2"
1576 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1577 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1579 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1580 [(set_attr "type" "neon_fp_abs_<stype><q>")]
1583 (define_insn "fma<mode>4"
1584 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1585 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1586 (match_operand:VHSDF 2 "register_operand" "w")
1587 (match_operand:VHSDF 3 "register_operand" "0")))]
1589 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1590 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1593 (define_insn "*aarch64_fma4_elt<mode>"
1594 [(set (match_operand:VDQF 0 "register_operand" "=w")
1598 (match_operand:VDQF 1 "register_operand" "<h_con>")
1599 (parallel [(match_operand:SI 2 "immediate_operand")])))
1600 (match_operand:VDQF 3 "register_operand" "w")
1601 (match_operand:VDQF 4 "register_operand" "0")))]
1604 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1605 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1607 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1610 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1611 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1613 (vec_duplicate:VDQSF
1615 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1616 (parallel [(match_operand:SI 2 "immediate_operand")])))
1617 (match_operand:VDQSF 3 "register_operand" "w")
1618 (match_operand:VDQSF 4 "register_operand" "0")))]
1621 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1622 INTVAL (operands[2])));
1623 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1625 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1628 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1629 [(set (match_operand:VMUL 0 "register_operand" "=w")
1632 (match_operand:<VEL> 1 "register_operand" "w"))
1633 (match_operand:VMUL 2 "register_operand" "w")
1634 (match_operand:VMUL 3 "register_operand" "0")))]
1636 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1637 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1640 (define_insn "*aarch64_fma4_elt_to_64v2df"
1641 [(set (match_operand:DF 0 "register_operand" "=w")
1644 (match_operand:V2DF 1 "register_operand" "w")
1645 (parallel [(match_operand:SI 2 "immediate_operand")]))
1646 (match_operand:DF 3 "register_operand" "w")
1647 (match_operand:DF 4 "register_operand" "0")))]
1650 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1651 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1653 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1656 (define_insn "fnma<mode>4"
1657 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1659 (match_operand:VHSDF 1 "register_operand" "w")
1661 (match_operand:VHSDF 2 "register_operand" "w"))
1662 (match_operand:VHSDF 3 "register_operand" "0")))]
1664 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1665 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1668 (define_insn "*aarch64_fnma4_elt<mode>"
1669 [(set (match_operand:VDQF 0 "register_operand" "=w")
1672 (match_operand:VDQF 3 "register_operand" "w"))
1675 (match_operand:VDQF 1 "register_operand" "<h_con>")
1676 (parallel [(match_operand:SI 2 "immediate_operand")])))
1677 (match_operand:VDQF 4 "register_operand" "0")))]
1680 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1681 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1683 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1686 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1687 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1690 (match_operand:VDQSF 3 "register_operand" "w"))
1691 (vec_duplicate:VDQSF
1693 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1694 (parallel [(match_operand:SI 2 "immediate_operand")])))
1695 (match_operand:VDQSF 4 "register_operand" "0")))]
1698 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1699 INTVAL (operands[2])));
1700 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1702 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1705 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1706 [(set (match_operand:VMUL 0 "register_operand" "=w")
1709 (match_operand:VMUL 2 "register_operand" "w"))
1711 (match_operand:<VEL> 1 "register_operand" "w"))
1712 (match_operand:VMUL 3 "register_operand" "0")))]
1714 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1715 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1718 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1719 [(set (match_operand:DF 0 "register_operand" "=w")
1722 (match_operand:V2DF 1 "register_operand" "w")
1723 (parallel [(match_operand:SI 2 "immediate_operand")]))
1725 (match_operand:DF 3 "register_operand" "w"))
1726 (match_operand:DF 4 "register_operand" "0")))]
1729 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1730 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1732 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1735 ;; Vector versions of the floating-point frint patterns.
1736 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1737 (define_insn "<frint_pattern><mode>2"
1738 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1739 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1742 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1743 [(set_attr "type" "neon_fp_round_<stype><q>")]
1746 ;; Vector versions of the fcvt standard patterns.
1747 ;; Expands to lbtrunc, lround, lceil, lfloor
1748 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1749 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1750 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1751 [(match_operand:VHSDF 1 "register_operand" "w")]
1754 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1755 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1758 ;; HF Scalar variants of related SIMD instructions.
1759 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1760 [(set (match_operand:HI 0 "register_operand" "=w")
1761 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1763 "TARGET_SIMD_F16INST"
1764 "fcvt<frint_suffix><su>\t%h0, %h1"
1765 [(set_attr "type" "neon_fp_to_int_s")]
1768 (define_insn "<optab>_trunchfhi2"
1769 [(set (match_operand:HI 0 "register_operand" "=w")
1770 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1771 "TARGET_SIMD_F16INST"
1772 "fcvtz<su>\t%h0, %h1"
1773 [(set_attr "type" "neon_fp_to_int_s")]
1776 (define_insn "<optab>hihf2"
1777 [(set (match_operand:HF 0 "register_operand" "=w")
1778 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1779 "TARGET_SIMD_F16INST"
1780 "<su_optab>cvtf\t%h0, %h1"
1781 [(set_attr "type" "neon_int_to_fp_s")]
1784 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1785 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1786 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1788 (match_operand:VDQF 1 "register_operand" "w")
1789 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1792 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1793 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1795 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1797 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1798 output_asm_insn (buf, operands);
1801 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1804 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
1805 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1806 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1807 [(match_operand:VHSDF 1 "register_operand")]
1812 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
1813 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1814 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1815 [(match_operand:VHSDF 1 "register_operand")]
1820 (define_expand "ftrunc<VHSDF:mode>2"
1821 [(set (match_operand:VHSDF 0 "register_operand")
1822 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
1827 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
1828 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1830 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
1832 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
1833 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
1836 ;; Conversions between vectors of floats and doubles.
1837 ;; Contains a mix of patterns to match standard pattern names
1838 ;; and those for intrinsics.
1840 ;; Float widening operations.
1842 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
1843 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1844 (float_extend:<VWIDE> (vec_select:<VHALF>
1845 (match_operand:VQ_HSF 1 "register_operand" "w")
1846 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
1849 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
1850 [(set_attr "type" "neon_fp_cvt_widen_s")]
1853 ;; Convert between fixed-point and floating-point (vector modes)
1855 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
1856 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
1857 (unspec:<VHSDF:FCVT_TARGET>
1858 [(match_operand:VHSDF 1 "register_operand" "w")
1859 (match_operand:SI 2 "immediate_operand" "i")]
1862 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
1863 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
1866 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
1867 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
1868 (unspec:<VDQ_HSDI:FCVT_TARGET>
1869 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
1870 (match_operand:SI 2 "immediate_operand" "i")]
1873 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
1874 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
1877 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
1878 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
1879 ;; the meaning of HI and LO changes depending on the target endianness.
1880 ;; While elsewhere we map the higher numbered elements of a vector to
1881 ;; the lower architectural lanes of the vector, for these patterns we want
1882 ;; to always treat "hi" as referring to the higher architectural lanes.
1883 ;; Consequently, while the patterns below look inconsistent with our
1884 ;; other big-endian patterns their behavior is as required.
1886 (define_expand "vec_unpacks_lo_<mode>"
1887 [(match_operand:<VWIDE> 0 "register_operand" "")
1888 (match_operand:VQ_HSF 1 "register_operand" "")]
1891 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1892 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
1898 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
1899 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1900 (float_extend:<VWIDE> (vec_select:<VHALF>
1901 (match_operand:VQ_HSF 1 "register_operand" "w")
1902 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
1905 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
1906 [(set_attr "type" "neon_fp_cvt_widen_s")]
1909 (define_expand "vec_unpacks_hi_<mode>"
1910 [(match_operand:<VWIDE> 0 "register_operand" "")
1911 (match_operand:VQ_HSF 1 "register_operand" "")]
1914 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1915 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
1920 (define_insn "aarch64_float_extend_lo_<Vwide>"
1921 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1922 (float_extend:<VWIDE>
1923 (match_operand:VDF 1 "register_operand" "w")))]
1925 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
1926 [(set_attr "type" "neon_fp_cvt_widen_s")]
1929 ;; Float narrowing operations.
1931 (define_insn "aarch64_float_truncate_lo_<mode>"
1932 [(set (match_operand:VDF 0 "register_operand" "=w")
1934 (match_operand:<VWIDE> 1 "register_operand" "w")))]
1936 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
1937 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1940 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
1941 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
1943 (match_operand:VDF 1 "register_operand" "0")
1945 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
1946 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1947 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
1948 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1951 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
1952 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
1955 (match_operand:<VWIDE> 2 "register_operand" "w"))
1956 (match_operand:VDF 1 "register_operand" "0")))]
1957 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1958 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
1959 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1962 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
1963 [(match_operand:<VDBL> 0 "register_operand" "=w")
1964 (match_operand:VDF 1 "register_operand" "0")
1965 (match_operand:<VWIDE> 2 "register_operand" "w")]
1968 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
1969 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
1970 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
1971 emit_insn (gen (operands[0], operands[1], operands[2]));
1976 (define_expand "vec_pack_trunc_v2df"
1977 [(set (match_operand:V4SF 0 "register_operand")
1979 (float_truncate:V2SF
1980 (match_operand:V2DF 1 "register_operand"))
1981 (float_truncate:V2SF
1982 (match_operand:V2DF 2 "register_operand"))
1986 rtx tmp = gen_reg_rtx (V2SFmode);
1987 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1988 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1990 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
1991 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
1992 tmp, operands[hi]));
1997 (define_expand "vec_pack_trunc_df"
1998 [(set (match_operand:V2SF 0 "register_operand")
2001 (match_operand:DF 1 "register_operand"))
2003 (match_operand:DF 2 "register_operand"))
2007 rtx tmp = gen_reg_rtx (V2SFmode);
2008 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2009 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2011 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2012 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2013 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2019 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2021 ;; a = (b < c) ? b : c;
2022 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
2023 ;; either explicitly or indirectly via -ffast-math.
2025 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2026 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2027 ;; operand will be returned when both operands are zero (i.e. they may not
2028 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2029 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2032 (define_insn "<su><maxmin><mode>3"
2033 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2034 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2035 (match_operand:VHSDF 2 "register_operand" "w")))]
2037 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2038 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2041 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2042 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2043 ;; which implement the IEEE fmax ()/fmin () functions.
2044 (define_insn "<maxmin_uns><mode>3"
2045 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2046 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2047 (match_operand:VHSDF 2 "register_operand" "w")]
2050 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2051 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2054 ;; 'across lanes' add.
2056 (define_expand "reduc_plus_scal_<mode>"
2057 [(match_operand:<VEL> 0 "register_operand" "=w")
2058 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2062 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2063 rtx scratch = gen_reg_rtx (<MODE>mode);
2064 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2065 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2070 (define_insn "aarch64_faddp<mode>"
2071 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2072 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2073 (match_operand:VHSDF 2 "register_operand" "w")]
2076 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2077 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2080 (define_insn "aarch64_reduc_plus_internal<mode>"
2081 [(set (match_operand:VDQV 0 "register_operand" "=w")
2082 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2085 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2086 [(set_attr "type" "neon_reduc_add<q>")]
2089 (define_insn "aarch64_reduc_plus_internalv2si"
2090 [(set (match_operand:V2SI 0 "register_operand" "=w")
2091 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2094 "addp\\t%0.2s, %1.2s, %1.2s"
2095 [(set_attr "type" "neon_reduc_add")]
2098 (define_insn "reduc_plus_scal_<mode>"
2099 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2100 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2103 "faddp\\t%<Vetype>0, %1.<Vtype>"
2104 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2107 (define_expand "reduc_plus_scal_v4sf"
2108 [(set (match_operand:SF 0 "register_operand")
2109 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2113 rtx elt = GEN_INT (ENDIAN_LANE_N (V4SFmode, 0));
2114 rtx scratch = gen_reg_rtx (V4SFmode);
2115 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2116 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2117 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2121 (define_insn "clrsb<mode>2"
2122 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2123 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2125 "cls\\t%0.<Vtype>, %1.<Vtype>"
2126 [(set_attr "type" "neon_cls<q>")]
2129 (define_insn "clz<mode>2"
2130 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2131 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2133 "clz\\t%0.<Vtype>, %1.<Vtype>"
2134 [(set_attr "type" "neon_cls<q>")]
2137 (define_insn "popcount<mode>2"
2138 [(set (match_operand:VB 0 "register_operand" "=w")
2139 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2141 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2142 [(set_attr "type" "neon_cnt<q>")]
2145 ;; 'across lanes' max and min ops.
2147 ;; Template for outputting a scalar, so we can create __builtins which can be
2148 ;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin).
2149 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2150 [(match_operand:<VEL> 0 "register_operand")
2151 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2155 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2156 rtx scratch = gen_reg_rtx (<MODE>mode);
2157 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2159 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2164 ;; Likewise for integer cases, signed and unsigned.
2165 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2166 [(match_operand:<VEL> 0 "register_operand")
2167 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2171 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2172 rtx scratch = gen_reg_rtx (<MODE>mode);
2173 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2175 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2180 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2181 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2182 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2185 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2186 [(set_attr "type" "neon_reduc_minmax<q>")]
2189 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2190 [(set (match_operand:V2SI 0 "register_operand" "=w")
2191 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2194 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2195 [(set_attr "type" "neon_reduc_minmax")]
2198 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2199 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2200 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2203 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2204 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2207 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2209 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2212 ;; Thus our BSL is of the form:
2213 ;; op0 = bsl (mask, op2, op3)
2214 ;; We can use any of:
2217 ;; bsl mask, op1, op2
2218 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2219 ;; bit op0, op2, mask
2220 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2221 ;; bif op0, op1, mask
2223 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2224 ;; Some forms of straight-line code may generate the equivalent form
2225 ;; in *aarch64_simd_bsl<mode>_alt.
2227 (define_insn "aarch64_simd_bsl<mode>_internal"
2228 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2232 (match_operand:<V_cmp_result> 3 "register_operand" "w,0,w")
2233 (match_operand:VSDQ_I_DI 2 "register_operand" "w,w,0"))
2234 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2235 (match_dup:<V_cmp_result> 3)
2239 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2240 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2241 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2242 [(set_attr "type" "neon_bsl<q>")]
2245 ;; We need this form in addition to the above pattern to match the case
2246 ;; when combine tries merging three insns such that the second operand of
2247 ;; the outer XOR matches the second operand of the inner XOR rather than
2248 ;; the first. The two are equivalent but since recog doesn't try all
2249 ;; permutations of commutative operations, we have to have a separate pattern.
2251 (define_insn "*aarch64_simd_bsl<mode>_alt"
2252 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2256 (match_operand:VSDQ_I_DI 3 "register_operand" "w,w,0")
2257 (match_operand:VSDQ_I_DI 2 "register_operand" "w,0,w"))
2258 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2259 (match_dup:VSDQ_I_DI 2)))]
2262 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2263 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2264 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2265 [(set_attr "type" "neon_bsl<q>")]
2268 (define_expand "aarch64_simd_bsl<mode>"
2269 [(match_operand:VALLDIF 0 "register_operand")
2270 (match_operand:<V_cmp_result> 1 "register_operand")
2271 (match_operand:VALLDIF 2 "register_operand")
2272 (match_operand:VALLDIF 3 "register_operand")]
2275 /* We can't alias operands together if they have different modes. */
2276 rtx tmp = operands[0];
2277 if (FLOAT_MODE_P (<MODE>mode))
2279 operands[2] = gen_lowpart (<V_cmp_result>mode, operands[2]);
2280 operands[3] = gen_lowpart (<V_cmp_result>mode, operands[3]);
2281 tmp = gen_reg_rtx (<V_cmp_result>mode);
2283 operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]);
2284 emit_insn (gen_aarch64_simd_bsl<v_cmp_result>_internal (tmp,
2288 if (tmp != operands[0])
2289 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2294 (define_expand "vcond_mask_<mode><v_cmp_result>"
2295 [(match_operand:VALLDI 0 "register_operand")
2296 (match_operand:VALLDI 1 "nonmemory_operand")
2297 (match_operand:VALLDI 2 "nonmemory_operand")
2298 (match_operand:<V_cmp_result> 3 "register_operand")]
2301 /* If we have (a = (P) ? -1 : 0);
2302 Then we can simply move the generated mask (result must be int). */
2303 if (operands[1] == CONSTM1_RTX (<MODE>mode)
2304 && operands[2] == CONST0_RTX (<MODE>mode))
2305 emit_move_insn (operands[0], operands[3]);
2306 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
2307 else if (operands[1] == CONST0_RTX (<MODE>mode)
2308 && operands[2] == CONSTM1_RTX (<MODE>mode))
2309 emit_insn (gen_one_cmpl<v_cmp_result>2 (operands[0], operands[3]));
2312 if (!REG_P (operands[1]))
2313 operands[1] = force_reg (<MODE>mode, operands[1]);
2314 if (!REG_P (operands[2]))
2315 operands[2] = force_reg (<MODE>mode, operands[2]);
2316 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2317 operands[1], operands[2]));
2323 ;; Patterns comparing two vectors to produce a mask.
2325 (define_expand "vec_cmp<mode><mode>"
2326 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2327 (match_operator 1 "comparison_operator"
2328 [(match_operand:VSDQ_I_DI 2 "register_operand")
2329 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2332 rtx mask = operands[0];
2333 enum rtx_code code = GET_CODE (operands[1]);
2343 if (operands[3] == CONST0_RTX (<MODE>mode))
2348 if (!REG_P (operands[3]))
2349 operands[3] = force_reg (<MODE>mode, operands[3]);
2357 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2361 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2365 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2369 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2373 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2377 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2381 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2385 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2389 /* Handle NE as !EQ. */
2390 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2391 emit_insn (gen_one_cmpl<v_cmp_result>2 (mask, mask));
2395 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2405 (define_expand "vec_cmp<mode><v_cmp_result>"
2406 [(set (match_operand:<V_cmp_result> 0 "register_operand")
2407 (match_operator 1 "comparison_operator"
2408 [(match_operand:VDQF 2 "register_operand")
2409 (match_operand:VDQF 3 "nonmemory_operand")]))]
2412 int use_zero_form = 0;
2413 enum rtx_code code = GET_CODE (operands[1]);
2414 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
2416 rtx (*comparison) (rtx, rtx, rtx);
2425 if (operands[3] == CONST0_RTX (<MODE>mode))
2432 if (!REG_P (operands[3]))
2433 operands[3] = force_reg (<MODE>mode, operands[3]);
2443 comparison = gen_aarch64_cmlt<mode>;
2446 /* Else, fall through. */
2448 std::swap (operands[2], operands[3]);
2452 comparison = gen_aarch64_cmgt<mode>;
2457 comparison = gen_aarch64_cmle<mode>;
2460 /* Else, fall through. */
2462 std::swap (operands[2], operands[3]);
2466 comparison = gen_aarch64_cmge<mode>;
2470 comparison = gen_aarch64_cmeq<mode>;
2487 /* FCM returns false for lanes which are unordered, so if we use
2488 the inverse of the comparison we actually want to emit, then
2489 invert the result, we will end up with the correct result.
2490 Note that a NE NaN and NaN NE b are true for all a, b.
2492 Our transformations are:
2493 a UNGE b -> !(b GT a)
2494 a UNGT b -> !(b GE a)
2495 a UNLE b -> !(a GT b)
2496 a UNLT b -> !(a GE b)
2497 a NE b -> !(a EQ b) */
2498 emit_insn (comparison (operands[0], operands[2], operands[3]));
2499 emit_insn (gen_one_cmpl<v_cmp_result>2 (operands[0], operands[0]));
2507 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2508 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2514 emit_insn (comparison (operands[0], operands[2], operands[3]));
2518 /* We first check (a > b || b > a) which is !UNEQ, inverting
2519 this result will then give us (a == b || a UNORDERED b). */
2520 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2521 operands[2], operands[3]));
2522 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2523 emit_insn (gen_ior<v_cmp_result>3 (operands[0], operands[0], tmp));
2524 emit_insn (gen_one_cmpl<v_cmp_result>2 (operands[0], operands[0]));
2528 /* Operands are ORDERED iff (a > b || b >= a), so we can compute
2529 UNORDERED as !ORDERED. */
2530 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2531 emit_insn (gen_aarch64_cmge<mode> (operands[0],
2532 operands[3], operands[2]));
2533 emit_insn (gen_ior<v_cmp_result>3 (operands[0], operands[0], tmp));
2534 emit_insn (gen_one_cmpl<v_cmp_result>2 (operands[0], operands[0]));
2538 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2539 emit_insn (gen_aarch64_cmge<mode> (operands[0],
2540 operands[3], operands[2]));
2541 emit_insn (gen_ior<v_cmp_result>3 (operands[0], operands[0], tmp));
2551 (define_expand "vec_cmpu<mode><mode>"
2552 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2553 (match_operator 1 "comparison_operator"
2554 [(match_operand:VSDQ_I_DI 2 "register_operand")
2555 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2558 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2559 operands[2], operands[3]));
2563 (define_expand "vcond<mode><mode>"
2564 [(set (match_operand:VALLDI 0 "register_operand")
2565 (if_then_else:VALLDI
2566 (match_operator 3 "comparison_operator"
2567 [(match_operand:VALLDI 4 "register_operand")
2568 (match_operand:VALLDI 5 "nonmemory_operand")])
2569 (match_operand:VALLDI 1 "nonmemory_operand")
2570 (match_operand:VALLDI 2 "nonmemory_operand")))]
2573 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
2574 enum rtx_code code = GET_CODE (operands[3]);
2576 emit_insn (gen_vec_cmp<mode><v_cmp_result> (mask, operands[3],
2577 operands[4], operands[5]));
2578 emit_insn (gen_vcond_mask_<mode><v_cmp_result> (operands[0], operands[1],
2579 operands[2], mask));
2584 (define_expand "vcond<v_cmp_mixed><mode>"
2585 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2586 (if_then_else:<V_cmp_mixed>
2587 (match_operator 3 "comparison_operator"
2588 [(match_operand:VDQF_COND 4 "register_operand")
2589 (match_operand:VDQF_COND 5 "nonmemory_operand")])
2590 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2591 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2594 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
2595 enum rtx_code code = GET_CODE (operands[3]);
2597 emit_insn (gen_vec_cmp<mode><v_cmp_result> (mask, operands[3],
2598 operands[4], operands[5]));
2599 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_cmp_result> (
2600 operands[0], operands[1],
2601 operands[2], mask));
2606 (define_expand "vcondu<mode><mode>"
2607 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2608 (if_then_else:VSDQ_I_DI
2609 (match_operator 3 "comparison_operator"
2610 [(match_operand:VSDQ_I_DI 4 "register_operand")
2611 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2612 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2613 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2616 rtx mask = gen_reg_rtx (<MODE>mode);
2617 enum rtx_code code = GET_CODE (operands[3]);
2619 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2620 operands[4], operands[5]));
2621 emit_insn (gen_vcond_mask_<mode><v_cmp_result> (operands[0], operands[1],
2622 operands[2], mask));
2626 (define_expand "vcondu<mode><v_cmp_mixed>"
2627 [(set (match_operand:VDQF 0 "register_operand")
2629 (match_operator 3 "comparison_operator"
2630 [(match_operand:<V_cmp_mixed> 4 "register_operand")
2631 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2632 (match_operand:VDQF 1 "nonmemory_operand")
2633 (match_operand:VDQF 2 "nonmemory_operand")))]
2636 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
2637 enum rtx_code code = GET_CODE (operands[3]);
2639 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
2641 operands[4], operands[5]));
2642 emit_insn (gen_vcond_mask_<mode><v_cmp_result> (operands[0], operands[1],
2643 operands[2], mask));
2647 ;; Patterns for AArch64 SIMD Intrinsics.
2649 ;; Lane extraction with sign extension to general purpose register.
2650 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2651 [(set (match_operand:GPI 0 "register_operand" "=r")
2654 (match_operand:VDQQH 1 "register_operand" "w")
2655 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2658 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2659 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
2661 [(set_attr "type" "neon_to_gp<q>")]
2664 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
2665 [(set (match_operand:SI 0 "register_operand" "=r")
2668 (match_operand:VDQQH 1 "register_operand" "w")
2669 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2672 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2673 return "umov\\t%w0, %1.<Vetype>[%2]";
2675 [(set_attr "type" "neon_to_gp<q>")]
2678 ;; Lane extraction of a value, neither sign nor zero extension
2679 ;; is guaranteed so upper bits should be considered undefined.
2680 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
2681 (define_insn "aarch64_get_lane<mode>"
2682 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2684 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
2685 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
2688 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2689 switch (which_alternative)
2692 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2694 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2696 return "st1\\t{%1.<Vetype>}[%2], %0";
2701 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
2704 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2707 (define_insn "*aarch64_combinez<mode>"
2708 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2710 (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")
2711 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")))]
2712 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2717 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2718 (set_attr "simd" "yes,*,yes")
2719 (set_attr "fp" "*,yes,*")]
2722 (define_insn "*aarch64_combinez_be<mode>"
2723 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2725 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")
2726 (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")))]
2727 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2732 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2733 (set_attr "simd" "yes,*,yes")
2734 (set_attr "fp" "*,yes,*")]
2737 (define_expand "aarch64_combine<mode>"
2738 [(match_operand:<VDBL> 0 "register_operand")
2739 (match_operand:VDC 1 "register_operand")
2740 (match_operand:VDC 2 "register_operand")]
2744 if (BYTES_BIG_ENDIAN)
2754 emit_insn (gen_aarch64_combine_internal<mode> (operands[0], op1, op2));
2759 (define_insn_and_split "aarch64_combine_internal<mode>"
2760 [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
2761 (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
2762 (match_operand:VDC 2 "register_operand" "w")))]
2765 "&& reload_completed"
2768 if (BYTES_BIG_ENDIAN)
2769 aarch64_split_simd_combine (operands[0], operands[2], operands[1]);
2771 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
2774 [(set_attr "type" "multiple")]
2777 (define_expand "aarch64_simd_combine<mode>"
2778 [(match_operand:<VDBL> 0 "register_operand")
2779 (match_operand:VDC 1 "register_operand")
2780 (match_operand:VDC 2 "register_operand")]
2783 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
2784 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
2787 [(set_attr "type" "multiple")]
2790 ;; <su><addsub>l<q>.
2792 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
2793 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2794 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2795 (match_operand:VQW 1 "register_operand" "w")
2796 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2797 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2798 (match_operand:VQW 2 "register_operand" "w")
2801 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2802 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2805 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
2806 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2807 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2808 (match_operand:VQW 1 "register_operand" "w")
2809 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2810 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2811 (match_operand:VQW 2 "register_operand" "w")
2814 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2815 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2819 (define_expand "aarch64_saddl2<mode>"
2820 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2821 (match_operand:VQW 1 "register_operand" "w")
2822 (match_operand:VQW 2 "register_operand" "w")]
2825 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2826 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
2831 (define_expand "aarch64_uaddl2<mode>"
2832 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2833 (match_operand:VQW 1 "register_operand" "w")
2834 (match_operand:VQW 2 "register_operand" "w")]
2837 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2838 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
2843 (define_expand "aarch64_ssubl2<mode>"
2844 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2845 (match_operand:VQW 1 "register_operand" "w")
2846 (match_operand:VQW 2 "register_operand" "w")]
2849 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2850 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
2855 (define_expand "aarch64_usubl2<mode>"
2856 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2857 (match_operand:VQW 1 "register_operand" "w")
2858 (match_operand:VQW 2 "register_operand" "w")]
2861 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2862 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
2867 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
2868 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2869 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
2870 (match_operand:VD_BHSI 1 "register_operand" "w"))
2872 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2874 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2875 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2878 ;; <su><addsub>w<q>.
2880 (define_expand "widen_ssum<mode>3"
2881 [(set (match_operand:<VDBLW> 0 "register_operand" "")
2882 (plus:<VDBLW> (sign_extend:<VDBLW>
2883 (match_operand:VQW 1 "register_operand" ""))
2884 (match_operand:<VDBLW> 2 "register_operand" "")))]
2887 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
2888 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
2890 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
2892 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
2897 (define_expand "widen_ssum<mode>3"
2898 [(set (match_operand:<VWIDE> 0 "register_operand" "")
2899 (plus:<VWIDE> (sign_extend:<VWIDE>
2900 (match_operand:VD_BHSI 1 "register_operand" ""))
2901 (match_operand:<VWIDE> 2 "register_operand" "")))]
2904 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
2908 (define_expand "widen_usum<mode>3"
2909 [(set (match_operand:<VDBLW> 0 "register_operand" "")
2910 (plus:<VDBLW> (zero_extend:<VDBLW>
2911 (match_operand:VQW 1 "register_operand" ""))
2912 (match_operand:<VDBLW> 2 "register_operand" "")))]
2915 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
2916 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
2918 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
2920 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
2925 (define_expand "widen_usum<mode>3"
2926 [(set (match_operand:<VWIDE> 0 "register_operand" "")
2927 (plus:<VWIDE> (zero_extend:<VWIDE>
2928 (match_operand:VD_BHSI 1 "register_operand" ""))
2929 (match_operand:<VWIDE> 2 "register_operand" "")))]
2932 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
2936 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
2937 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2938 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2940 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2942 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
2943 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2946 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
2947 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2948 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2951 (match_operand:VQW 2 "register_operand" "w")
2952 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
2954 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
2955 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2958 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
2959 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2960 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2963 (match_operand:VQW 2 "register_operand" "w")
2964 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
2966 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
2967 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2970 (define_expand "aarch64_saddw2<mode>"
2971 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2972 (match_operand:<VWIDE> 1 "register_operand" "w")
2973 (match_operand:VQW 2 "register_operand" "w")]
2976 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2977 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
2982 (define_expand "aarch64_uaddw2<mode>"
2983 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2984 (match_operand:<VWIDE> 1 "register_operand" "w")
2985 (match_operand:VQW 2 "register_operand" "w")]
2988 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2989 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
2995 (define_expand "aarch64_ssubw2<mode>"
2996 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2997 (match_operand:<VWIDE> 1 "register_operand" "w")
2998 (match_operand:VQW 2 "register_operand" "w")]
3001 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3002 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3007 (define_expand "aarch64_usubw2<mode>"
3008 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3009 (match_operand:<VWIDE> 1 "register_operand" "w")
3010 (match_operand:VQW 2 "register_operand" "w")]
3013 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3014 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3019 ;; <su><r>h<addsub>.
3021 (define_insn "aarch64_<sur>h<addsub><mode>"
3022 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3023 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3024 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3027 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3028 [(set_attr "type" "neon_<addsub>_halve<q>")]
3031 ;; <r><addsub>hn<q>.
3033 (define_insn "aarch64_<sur><addsub>hn<mode>"
3034 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3035 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3036 (match_operand:VQN 2 "register_operand" "w")]
3039 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3040 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3043 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3044 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3045 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3046 (match_operand:VQN 2 "register_operand" "w")
3047 (match_operand:VQN 3 "register_operand" "w")]
3050 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3051 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3056 (define_insn "aarch64_pmul<mode>"
3057 [(set (match_operand:VB 0 "register_operand" "=w")
3058 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3059 (match_operand:VB 2 "register_operand" "w")]
3062 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3063 [(set_attr "type" "neon_mul_<Vetype><q>")]
3068 (define_insn "aarch64_fmulx<mode>"
3069 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3071 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3072 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3075 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3076 [(set_attr "type" "neon_fp_mul_<stype>")]
3079 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3081 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3082 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3084 [(match_operand:VDQSF 1 "register_operand" "w")
3085 (vec_duplicate:VDQSF
3087 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3088 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3092 operands[3] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
3093 INTVAL (operands[3])));
3094 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3096 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3099 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3101 (define_insn "*aarch64_mulx_elt<mode>"
3102 [(set (match_operand:VDQF 0 "register_operand" "=w")
3104 [(match_operand:VDQF 1 "register_operand" "w")
3107 (match_operand:VDQF 2 "register_operand" "w")
3108 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3112 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
3113 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3115 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3120 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3121 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3123 [(match_operand:VHSDF 1 "register_operand" "w")
3124 (vec_duplicate:VHSDF
3125 (match_operand:<VEL> 2 "register_operand" "w"))]
3128 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3129 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3132 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3133 ;; vmulxd_lane_f64 == vmulx_lane_f64
3134 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3136 (define_insn "*aarch64_vgetfmulx<mode>"
3137 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3139 [(match_operand:<VEL> 1 "register_operand" "w")
3141 (match_operand:VDQF_DF 2 "register_operand" "w")
3142 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3146 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
3147 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3149 [(set_attr "type" "fmul<Vetype>")]
3153 (define_insn "aarch64_<su_optab><optab><mode>"
3154 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3155 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3156 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3158 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3159 [(set_attr "type" "neon_<optab><q>")]
3162 ;; suqadd and usqadd
3164 (define_insn "aarch64_<sur>qadd<mode>"
3165 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3166 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3167 (match_operand:VSDQ_I 2 "register_operand" "w")]
3170 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3171 [(set_attr "type" "neon_qadd<q>")]
3176 (define_insn "aarch64_sqmovun<mode>"
3177 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3178 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3181 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3182 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3185 ;; sqmovn and uqmovn
3187 (define_insn "aarch64_<sur>qmovn<mode>"
3188 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3189 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3192 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3193 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3198 (define_insn "aarch64_s<optab><mode>"
3199 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3201 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3203 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3204 [(set_attr "type" "neon_<optab><q>")]
3209 (define_insn "aarch64_sq<r>dmulh<mode>"
3210 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3212 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3213 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3216 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3217 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3222 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3223 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3225 [(match_operand:VDQHS 1 "register_operand" "w")
3227 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3228 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3232 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3233 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3234 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3237 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3238 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3240 [(match_operand:VDQHS 1 "register_operand" "w")
3242 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3243 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3247 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3248 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3249 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3252 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3253 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3255 [(match_operand:SD_HSI 1 "register_operand" "w")
3257 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3258 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3262 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3263 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3264 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3267 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3268 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3270 [(match_operand:SD_HSI 1 "register_operand" "w")
3272 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3273 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3277 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3278 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3279 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3284 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3285 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3287 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3288 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3289 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3292 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3293 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3296 ;; sqrdml[as]h_lane.
3298 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3299 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3301 [(match_operand:VDQHS 1 "register_operand" "0")
3302 (match_operand:VDQHS 2 "register_operand" "w")
3304 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3305 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3309 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3311 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3313 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3316 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3317 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3319 [(match_operand:SD_HSI 1 "register_operand" "0")
3320 (match_operand:SD_HSI 2 "register_operand" "w")
3322 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3323 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3327 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3329 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3331 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3334 ;; sqrdml[as]h_laneq.
3336 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3337 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3339 [(match_operand:VDQHS 1 "register_operand" "0")
3340 (match_operand:VDQHS 2 "register_operand" "w")
3342 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3343 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3347 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3349 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3351 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3354 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3355 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3357 [(match_operand:SD_HSI 1 "register_operand" "0")
3358 (match_operand:SD_HSI 2 "register_operand" "w")
3360 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3361 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3365 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3367 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3369 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3374 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3375 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3377 (match_operand:<VWIDE> 1 "register_operand" "0")
3380 (sign_extend:<VWIDE>
3381 (match_operand:VSD_HSI 2 "register_operand" "w"))
3382 (sign_extend:<VWIDE>
3383 (match_operand:VSD_HSI 3 "register_operand" "w")))
3386 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3387 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3392 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3393 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3395 (match_operand:<VWIDE> 1 "register_operand" "0")
3398 (sign_extend:<VWIDE>
3399 (match_operand:VD_HSI 2 "register_operand" "w"))
3400 (sign_extend:<VWIDE>
3401 (vec_duplicate:VD_HSI
3403 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3404 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3409 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3411 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3413 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3416 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3417 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3419 (match_operand:<VWIDE> 1 "register_operand" "0")
3422 (sign_extend:<VWIDE>
3423 (match_operand:VD_HSI 2 "register_operand" "w"))
3424 (sign_extend:<VWIDE>
3425 (vec_duplicate:VD_HSI
3427 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3428 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3433 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3435 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3437 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3440 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3441 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3443 (match_operand:<VWIDE> 1 "register_operand" "0")
3446 (sign_extend:<VWIDE>
3447 (match_operand:SD_HSI 2 "register_operand" "w"))
3448 (sign_extend:<VWIDE>
3450 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3451 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3456 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3458 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3460 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3463 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3464 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3466 (match_operand:<VWIDE> 1 "register_operand" "0")
3469 (sign_extend:<VWIDE>
3470 (match_operand:SD_HSI 2 "register_operand" "w"))
3471 (sign_extend:<VWIDE>
3473 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3474 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3479 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3481 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3483 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3488 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3489 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3491 (match_operand:<VWIDE> 1 "register_operand" "0")
3494 (sign_extend:<VWIDE>
3495 (match_operand:VD_HSI 2 "register_operand" "w"))
3496 (sign_extend:<VWIDE>
3497 (vec_duplicate:VD_HSI
3498 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3501 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3502 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3507 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3508 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3510 (match_operand:<VWIDE> 1 "register_operand" "0")
3513 (sign_extend:<VWIDE>
3515 (match_operand:VQ_HSI 2 "register_operand" "w")
3516 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3517 (sign_extend:<VWIDE>
3519 (match_operand:VQ_HSI 3 "register_operand" "w")
3523 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3524 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3527 (define_expand "aarch64_sqdmlal2<mode>"
3528 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3529 (match_operand:<VWIDE> 1 "register_operand" "w")
3530 (match_operand:VQ_HSI 2 "register_operand" "w")
3531 (match_operand:VQ_HSI 3 "register_operand" "w")]
3534 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3535 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3536 operands[2], operands[3], p));
3540 (define_expand "aarch64_sqdmlsl2<mode>"
3541 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3542 (match_operand:<VWIDE> 1 "register_operand" "w")
3543 (match_operand:VQ_HSI 2 "register_operand" "w")
3544 (match_operand:VQ_HSI 3 "register_operand" "w")]
3547 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3548 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3549 operands[2], operands[3], p));
3555 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3556 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3558 (match_operand:<VWIDE> 1 "register_operand" "0")
3561 (sign_extend:<VWIDE>
3563 (match_operand:VQ_HSI 2 "register_operand" "w")
3564 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3565 (sign_extend:<VWIDE>
3566 (vec_duplicate:<VHALF>
3568 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3569 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3574 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3576 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3578 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3581 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3582 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3584 (match_operand:<VWIDE> 1 "register_operand" "0")
3587 (sign_extend:<VWIDE>
3589 (match_operand:VQ_HSI 2 "register_operand" "w")
3590 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3591 (sign_extend:<VWIDE>
3592 (vec_duplicate:<VHALF>
3594 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3595 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3600 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3602 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3604 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3607 (define_expand "aarch64_sqdmlal2_lane<mode>"
3608 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3609 (match_operand:<VWIDE> 1 "register_operand" "w")
3610 (match_operand:VQ_HSI 2 "register_operand" "w")
3611 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3612 (match_operand:SI 4 "immediate_operand" "i")]
3615 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3616 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3617 operands[2], operands[3],
3622 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3623 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3624 (match_operand:<VWIDE> 1 "register_operand" "w")
3625 (match_operand:VQ_HSI 2 "register_operand" "w")
3626 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3627 (match_operand:SI 4 "immediate_operand" "i")]
3630 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3631 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3632 operands[2], operands[3],
3637 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3638 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3639 (match_operand:<VWIDE> 1 "register_operand" "w")
3640 (match_operand:VQ_HSI 2 "register_operand" "w")
3641 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3642 (match_operand:SI 4 "immediate_operand" "i")]
3645 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3646 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3647 operands[2], operands[3],
3652 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3653 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3654 (match_operand:<VWIDE> 1 "register_operand" "w")
3655 (match_operand:VQ_HSI 2 "register_operand" "w")
3656 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3657 (match_operand:SI 4 "immediate_operand" "i")]
3660 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3661 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
3662 operands[2], operands[3],
3667 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
3668 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3670 (match_operand:<VWIDE> 1 "register_operand" "0")
3673 (sign_extend:<VWIDE>
3675 (match_operand:VQ_HSI 2 "register_operand" "w")
3676 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3677 (sign_extend:<VWIDE>
3678 (vec_duplicate:<VHALF>
3679 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3682 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3683 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3686 (define_expand "aarch64_sqdmlal2_n<mode>"
3687 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3688 (match_operand:<VWIDE> 1 "register_operand" "w")
3689 (match_operand:VQ_HSI 2 "register_operand" "w")
3690 (match_operand:<VEL> 3 "register_operand" "w")]
3693 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3694 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
3695 operands[2], operands[3],
3700 (define_expand "aarch64_sqdmlsl2_n<mode>"
3701 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3702 (match_operand:<VWIDE> 1 "register_operand" "w")
3703 (match_operand:VQ_HSI 2 "register_operand" "w")
3704 (match_operand:<VEL> 3 "register_operand" "w")]
3707 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3708 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
3709 operands[2], operands[3],
3716 (define_insn "aarch64_sqdmull<mode>"
3717 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3720 (sign_extend:<VWIDE>
3721 (match_operand:VSD_HSI 1 "register_operand" "w"))
3722 (sign_extend:<VWIDE>
3723 (match_operand:VSD_HSI 2 "register_operand" "w")))
3726 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3727 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
3732 (define_insn "aarch64_sqdmull_lane<mode>"
3733 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3736 (sign_extend:<VWIDE>
3737 (match_operand:VD_HSI 1 "register_operand" "w"))
3738 (sign_extend:<VWIDE>
3739 (vec_duplicate:VD_HSI
3741 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3742 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3747 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3748 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3750 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3753 (define_insn "aarch64_sqdmull_laneq<mode>"
3754 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3757 (sign_extend:<VWIDE>
3758 (match_operand:VD_HSI 1 "register_operand" "w"))
3759 (sign_extend:<VWIDE>
3760 (vec_duplicate:VD_HSI
3762 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3763 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3768 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3769 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3771 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3774 (define_insn "aarch64_sqdmull_lane<mode>"
3775 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3778 (sign_extend:<VWIDE>
3779 (match_operand:SD_HSI 1 "register_operand" "w"))
3780 (sign_extend:<VWIDE>
3782 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3783 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3788 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3789 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3791 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3794 (define_insn "aarch64_sqdmull_laneq<mode>"
3795 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3798 (sign_extend:<VWIDE>
3799 (match_operand:SD_HSI 1 "register_operand" "w"))
3800 (sign_extend:<VWIDE>
3802 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3803 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3808 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3809 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3811 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3816 (define_insn "aarch64_sqdmull_n<mode>"
3817 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3820 (sign_extend:<VWIDE>
3821 (match_operand:VD_HSI 1 "register_operand" "w"))
3822 (sign_extend:<VWIDE>
3823 (vec_duplicate:VD_HSI
3824 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3828 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3829 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3836 (define_insn "aarch64_sqdmull2<mode>_internal"
3837 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3840 (sign_extend:<VWIDE>
3842 (match_operand:VQ_HSI 1 "register_operand" "w")
3843 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3844 (sign_extend:<VWIDE>
3846 (match_operand:VQ_HSI 2 "register_operand" "w")
3851 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3852 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3855 (define_expand "aarch64_sqdmull2<mode>"
3856 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3857 (match_operand:VQ_HSI 1 "register_operand" "w")
3858 (match_operand:VQ_HSI 2 "register_operand" "w")]
3861 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3862 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
3869 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
3870 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3873 (sign_extend:<VWIDE>
3875 (match_operand:VQ_HSI 1 "register_operand" "w")
3876 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3877 (sign_extend:<VWIDE>
3878 (vec_duplicate:<VHALF>
3880 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3881 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3886 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3887 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3889 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3892 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
3893 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3896 (sign_extend:<VWIDE>
3898 (match_operand:VQ_HSI 1 "register_operand" "w")
3899 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3900 (sign_extend:<VWIDE>
3901 (vec_duplicate:<VHALF>
3903 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3904 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3909 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3910 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3912 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3915 (define_expand "aarch64_sqdmull2_lane<mode>"
3916 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3917 (match_operand:VQ_HSI 1 "register_operand" "w")
3918 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3919 (match_operand:SI 3 "immediate_operand" "i")]
3922 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3923 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
3924 operands[2], operands[3],
3929 (define_expand "aarch64_sqdmull2_laneq<mode>"
3930 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3931 (match_operand:VQ_HSI 1 "register_operand" "w")
3932 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3933 (match_operand:SI 3 "immediate_operand" "i")]
3936 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3937 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
3938 operands[2], operands[3],
3945 (define_insn "aarch64_sqdmull2_n<mode>_internal"
3946 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3949 (sign_extend:<VWIDE>
3951 (match_operand:VQ_HSI 1 "register_operand" "w")
3952 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3953 (sign_extend:<VWIDE>
3954 (vec_duplicate:<VHALF>
3955 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3959 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3960 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3963 (define_expand "aarch64_sqdmull2_n<mode>"
3964 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3965 (match_operand:VQ_HSI 1 "register_operand" "w")
3966 (match_operand:<VEL> 2 "register_operand" "w")]
3969 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3970 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
3977 (define_insn "aarch64_<sur>shl<mode>"
3978 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3980 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
3981 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
3984 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
3985 [(set_attr "type" "neon_shift_reg<q>")]
3991 (define_insn "aarch64_<sur>q<r>shl<mode>"
3992 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3994 [(match_operand:VSDQ_I 1 "register_operand" "w")
3995 (match_operand:VSDQ_I 2 "register_operand" "w")]
3998 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
3999 [(set_attr "type" "neon_sat_shift_reg<q>")]
4004 (define_insn "aarch64_<sur>shll_n<mode>"
4005 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4006 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4008 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4012 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4013 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4015 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4017 [(set_attr "type" "neon_shift_imm_long")]
4022 (define_insn "aarch64_<sur>shll2_n<mode>"
4023 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4024 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4025 (match_operand:SI 2 "immediate_operand" "i")]
4029 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4030 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4032 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4034 [(set_attr "type" "neon_shift_imm_long")]
4039 (define_insn "aarch64_<sur>shr_n<mode>"
4040 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4041 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4043 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4046 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4047 [(set_attr "type" "neon_sat_shift_imm<q>")]
4052 (define_insn "aarch64_<sur>sra_n<mode>"
4053 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4054 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4055 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4057 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4060 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4061 [(set_attr "type" "neon_shift_acc<q>")]
4066 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4067 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4068 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4069 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4071 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4074 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4075 [(set_attr "type" "neon_shift_imm<q>")]
4080 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4081 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4082 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4084 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4087 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4088 [(set_attr "type" "neon_sat_shift_imm<q>")]
4094 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4095 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4096 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4098 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4101 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4102 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4106 ;; cm(eq|ge|gt|lt|le)
4107 ;; Note, we have constraints for Dz and Z as different expanders
4108 ;; have different ideas of what should be passed to this pattern.
4110 (define_insn "aarch64_cm<optab><mode>"
4111 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
4113 (COMPARISONS:<V_cmp_result>
4114 (match_operand:VDQ_I 1 "register_operand" "w,w")
4115 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4119 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4120 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4121 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4124 (define_insn_and_split "aarch64_cm<optab>di"
4125 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4128 (match_operand:DI 1 "register_operand" "w,w,r")
4129 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4131 (clobber (reg:CC CC_REGNUM))]
4135 [(set (match_operand:DI 0 "register_operand")
4138 (match_operand:DI 1 "register_operand")
4139 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4142 /* If we are in the general purpose register file,
4143 we split to a sequence of comparison and store. */
4144 if (GP_REGNUM_P (REGNO (operands[0]))
4145 && GP_REGNUM_P (REGNO (operands[1])))
4147 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4148 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4149 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4150 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4153 /* Otherwise, we expand to a similar pattern which does not
4154 clobber CC_REGNUM. */
4156 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4159 (define_insn "*aarch64_cm<optab>di"
4160 [(set (match_operand:DI 0 "register_operand" "=w,w")
4163 (match_operand:DI 1 "register_operand" "w,w")
4164 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4166 "TARGET_SIMD && reload_completed"
4168 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4169 cm<optab>\t%d0, %d1, #0"
4170 [(set_attr "type" "neon_compare, neon_compare_zero")]
4175 (define_insn "aarch64_cm<optab><mode>"
4176 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4178 (UCOMPARISONS:<V_cmp_result>
4179 (match_operand:VDQ_I 1 "register_operand" "w")
4180 (match_operand:VDQ_I 2 "register_operand" "w")
4183 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4184 [(set_attr "type" "neon_compare<q>")]
4187 (define_insn_and_split "aarch64_cm<optab>di"
4188 [(set (match_operand:DI 0 "register_operand" "=w,r")
4191 (match_operand:DI 1 "register_operand" "w,r")
4192 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4194 (clobber (reg:CC CC_REGNUM))]
4198 [(set (match_operand:DI 0 "register_operand")
4201 (match_operand:DI 1 "register_operand")
4202 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4205 /* If we are in the general purpose register file,
4206 we split to a sequence of comparison and store. */
4207 if (GP_REGNUM_P (REGNO (operands[0]))
4208 && GP_REGNUM_P (REGNO (operands[1])))
4210 machine_mode mode = CCmode;
4211 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4212 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4213 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4216 /* Otherwise, we expand to a similar pattern which does not
4217 clobber CC_REGNUM. */
4219 [(set_attr "type" "neon_compare,multiple")]
4222 (define_insn "*aarch64_cm<optab>di"
4223 [(set (match_operand:DI 0 "register_operand" "=w")
4226 (match_operand:DI 1 "register_operand" "w")
4227 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4229 "TARGET_SIMD && reload_completed"
4230 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4231 [(set_attr "type" "neon_compare")]
4236 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4237 ;; we don't have any insns using ne, and aarch64_vcond outputs
4238 ;; not (neg (eq (and x y) 0))
4239 ;; which is rewritten by simplify_rtx as
4240 ;; plus (eq (and x y) 0) -1.
4242 (define_insn "aarch64_cmtst<mode>"
4243 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4244 (plus:<V_cmp_result>
4247 (match_operand:VDQ_I 1 "register_operand" "w")
4248 (match_operand:VDQ_I 2 "register_operand" "w"))
4249 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4250 (match_operand:<V_cmp_result> 4 "aarch64_simd_imm_minus_one")))
4253 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4254 [(set_attr "type" "neon_tst<q>")]
4257 (define_insn_and_split "aarch64_cmtstdi"
4258 [(set (match_operand:DI 0 "register_operand" "=w,r")
4262 (match_operand:DI 1 "register_operand" "w,r")
4263 (match_operand:DI 2 "register_operand" "w,r"))
4265 (clobber (reg:CC CC_REGNUM))]
4269 [(set (match_operand:DI 0 "register_operand")
4273 (match_operand:DI 1 "register_operand")
4274 (match_operand:DI 2 "register_operand"))
4277 /* If we are in the general purpose register file,
4278 we split to a sequence of comparison and store. */
4279 if (GP_REGNUM_P (REGNO (operands[0]))
4280 && GP_REGNUM_P (REGNO (operands[1])))
4282 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4283 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4284 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4285 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4286 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4289 /* Otherwise, we expand to a similar pattern which does not
4290 clobber CC_REGNUM. */
4292 [(set_attr "type" "neon_tst,multiple")]
4295 (define_insn "*aarch64_cmtstdi"
4296 [(set (match_operand:DI 0 "register_operand" "=w")
4300 (match_operand:DI 1 "register_operand" "w")
4301 (match_operand:DI 2 "register_operand" "w"))
4304 "cmtst\t%d0, %d1, %d2"
4305 [(set_attr "type" "neon_tst")]
4308 ;; fcm(eq|ge|gt|le|lt)
4310 (define_insn "aarch64_cm<optab><mode>"
4311 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
4313 (COMPARISONS:<V_cmp_result>
4314 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4315 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4319 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4320 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4321 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4325 ;; Note we can also handle what would be fac(le|lt) by
4326 ;; generating fac(ge|gt).
4328 (define_insn "aarch64_fac<optab><mode>"
4329 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4331 (FAC_COMPARISONS:<V_cmp_result>
4333 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4335 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4338 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4339 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4344 (define_insn "aarch64_addp<mode>"
4345 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4347 [(match_operand:VD_BHSI 1 "register_operand" "w")
4348 (match_operand:VD_BHSI 2 "register_operand" "w")]
4351 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4352 [(set_attr "type" "neon_reduc_add<q>")]
4355 (define_insn "aarch64_addpdi"
4356 [(set (match_operand:DI 0 "register_operand" "=w")
4358 [(match_operand:V2DI 1 "register_operand" "w")]
4362 [(set_attr "type" "neon_reduc_add")]
4367 (define_expand "sqrt<mode>2"
4368 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4369 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4372 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4376 (define_insn "*sqrt<mode>2"
4377 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4378 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4380 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4381 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4384 ;; Patterns for vector struct loads and stores.
4386 (define_insn "aarch64_simd_ld2<mode>"
4387 [(set (match_operand:OI 0 "register_operand" "=w")
4388 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4389 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4392 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4393 [(set_attr "type" "neon_load2_2reg<q>")]
4396 (define_insn "aarch64_simd_ld2r<mode>"
4397 [(set (match_operand:OI 0 "register_operand" "=w")
4398 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4399 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4402 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4403 [(set_attr "type" "neon_load2_all_lanes<q>")]
4406 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4407 [(set (match_operand:OI 0 "register_operand" "=w")
4408 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4409 (match_operand:OI 2 "register_operand" "0")
4410 (match_operand:SI 3 "immediate_operand" "i")
4411 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4415 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4416 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4418 [(set_attr "type" "neon_load2_one_lane")]
4421 (define_expand "vec_load_lanesoi<mode>"
4422 [(set (match_operand:OI 0 "register_operand" "=w")
4423 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4424 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4428 if (BYTES_BIG_ENDIAN)
4430 rtx tmp = gen_reg_rtx (OImode);
4431 rtx mask = aarch64_reverse_mask (<MODE>mode);
4432 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4433 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4436 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4440 (define_insn "aarch64_simd_st2<mode>"
4441 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4442 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4443 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4446 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4447 [(set_attr "type" "neon_store2_2reg<q>")]
4450 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4451 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4452 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4453 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4454 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4455 (match_operand:SI 2 "immediate_operand" "i")]
4459 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4460 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4462 [(set_attr "type" "neon_store2_one_lane<q>")]
4465 (define_expand "vec_store_lanesoi<mode>"
4466 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4467 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4468 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4472 if (BYTES_BIG_ENDIAN)
4474 rtx tmp = gen_reg_rtx (OImode);
4475 rtx mask = aarch64_reverse_mask (<MODE>mode);
4476 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4477 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4480 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4484 (define_insn "aarch64_simd_ld3<mode>"
4485 [(set (match_operand:CI 0 "register_operand" "=w")
4486 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4487 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4490 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4491 [(set_attr "type" "neon_load3_3reg<q>")]
4494 (define_insn "aarch64_simd_ld3r<mode>"
4495 [(set (match_operand:CI 0 "register_operand" "=w")
4496 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4497 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4500 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4501 [(set_attr "type" "neon_load3_all_lanes<q>")]
4504 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4505 [(set (match_operand:CI 0 "register_operand" "=w")
4506 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4507 (match_operand:CI 2 "register_operand" "0")
4508 (match_operand:SI 3 "immediate_operand" "i")
4509 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4513 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4514 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4516 [(set_attr "type" "neon_load3_one_lane")]
4519 (define_expand "vec_load_lanesci<mode>"
4520 [(set (match_operand:CI 0 "register_operand" "=w")
4521 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4522 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4526 if (BYTES_BIG_ENDIAN)
4528 rtx tmp = gen_reg_rtx (CImode);
4529 rtx mask = aarch64_reverse_mask (<MODE>mode);
4530 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4531 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4534 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4538 (define_insn "aarch64_simd_st3<mode>"
4539 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4540 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4541 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4544 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4545 [(set_attr "type" "neon_store3_3reg<q>")]
4548 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4549 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4550 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4551 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4552 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4553 (match_operand:SI 2 "immediate_operand" "i")]
4557 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4558 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4560 [(set_attr "type" "neon_store3_one_lane<q>")]
4563 (define_expand "vec_store_lanesci<mode>"
4564 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4565 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4566 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4570 if (BYTES_BIG_ENDIAN)
4572 rtx tmp = gen_reg_rtx (CImode);
4573 rtx mask = aarch64_reverse_mask (<MODE>mode);
4574 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4575 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4578 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4582 (define_insn "aarch64_simd_ld4<mode>"
4583 [(set (match_operand:XI 0 "register_operand" "=w")
4584 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4585 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4588 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4589 [(set_attr "type" "neon_load4_4reg<q>")]
4592 (define_insn "aarch64_simd_ld4r<mode>"
4593 [(set (match_operand:XI 0 "register_operand" "=w")
4594 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4595 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4598 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4599 [(set_attr "type" "neon_load4_all_lanes<q>")]
4602 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4603 [(set (match_operand:XI 0 "register_operand" "=w")
4604 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4605 (match_operand:XI 2 "register_operand" "0")
4606 (match_operand:SI 3 "immediate_operand" "i")
4607 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4611 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4612 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4614 [(set_attr "type" "neon_load4_one_lane")]
4617 (define_expand "vec_load_lanesxi<mode>"
4618 [(set (match_operand:XI 0 "register_operand" "=w")
4619 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4620 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4624 if (BYTES_BIG_ENDIAN)
4626 rtx tmp = gen_reg_rtx (XImode);
4627 rtx mask = aarch64_reverse_mask (<MODE>mode);
4628 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4629 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4632 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4636 (define_insn "aarch64_simd_st4<mode>"
4637 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4638 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4639 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4642 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4643 [(set_attr "type" "neon_store4_4reg<q>")]
4646 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4647 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4648 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4649 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4650 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4651 (match_operand:SI 2 "immediate_operand" "i")]
4655 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4656 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
4658 [(set_attr "type" "neon_store4_one_lane<q>")]
4661 (define_expand "vec_store_lanesxi<mode>"
4662 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4663 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4664 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4668 if (BYTES_BIG_ENDIAN)
4670 rtx tmp = gen_reg_rtx (XImode);
4671 rtx mask = aarch64_reverse_mask (<MODE>mode);
4672 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
4673 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
4676 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
4680 (define_insn_and_split "aarch64_rev_reglist<mode>"
4681 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
4683 [(match_operand:VSTRUCT 1 "register_operand" "w")
4684 (match_operand:V16QI 2 "register_operand" "w")]
4685 UNSPEC_REV_REGLIST))]
4688 "&& reload_completed"
4692 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
4693 for (i = 0; i < nregs; i++)
4695 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
4696 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
4697 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
4701 [(set_attr "type" "neon_tbl1_q")
4702 (set_attr "length" "<insn_count>")]
4705 ;; Reload patterns for AdvSIMD register list operands.
4707 (define_expand "mov<mode>"
4708 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
4709 (match_operand:VSTRUCT 1 "general_operand" ""))]
4712 if (can_create_pseudo_p ())
4714 if (GET_CODE (operands[0]) != REG)
4715 operands[1] = force_reg (<MODE>mode, operands[1]);
4719 (define_insn "*aarch64_mov<mode>"
4720 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
4721 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
4722 "TARGET_SIMD && !BYTES_BIG_ENDIAN
4723 && (register_operand (operands[0], <MODE>mode)
4724 || register_operand (operands[1], <MODE>mode))"
4727 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
4728 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
4729 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
4730 neon_load<nregs>_<nregs>reg_q")
4731 (set_attr "length" "<insn_count>,4,4")]
4734 (define_insn "aarch64_be_ld1<mode>"
4735 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
4736 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
4737 "aarch64_simd_struct_operand" "Utv")]
4740 "ld1\\t{%0<Vmtype>}, %1"
4741 [(set_attr "type" "neon_load1_1reg<q>")]
4744 (define_insn "aarch64_be_st1<mode>"
4745 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
4746 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
4749 "st1\\t{%1<Vmtype>}, %0"
4750 [(set_attr "type" "neon_store1_1reg<q>")]
4753 (define_insn "*aarch64_be_movoi"
4754 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
4755 (match_operand:OI 1 "general_operand" " w,w,m"))]
4756 "TARGET_SIMD && BYTES_BIG_ENDIAN
4757 && (register_operand (operands[0], OImode)
4758 || register_operand (operands[1], OImode))"
4763 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
4764 (set_attr "length" "8,4,4")]
4767 (define_insn "*aarch64_be_movci"
4768 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
4769 (match_operand:CI 1 "general_operand" " w,w,o"))]
4770 "TARGET_SIMD && BYTES_BIG_ENDIAN
4771 && (register_operand (operands[0], CImode)
4772 || register_operand (operands[1], CImode))"
4774 [(set_attr "type" "multiple")
4775 (set_attr "length" "12,4,4")]
4778 (define_insn "*aarch64_be_movxi"
4779 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
4780 (match_operand:XI 1 "general_operand" " w,w,o"))]
4781 "TARGET_SIMD && BYTES_BIG_ENDIAN
4782 && (register_operand (operands[0], XImode)
4783 || register_operand (operands[1], XImode))"
4785 [(set_attr "type" "multiple")
4786 (set_attr "length" "16,4,4")]
4790 [(set (match_operand:OI 0 "register_operand")
4791 (match_operand:OI 1 "register_operand"))]
4792 "TARGET_SIMD && reload_completed"
4795 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
4800 [(set (match_operand:CI 0 "nonimmediate_operand")
4801 (match_operand:CI 1 "general_operand"))]
4802 "TARGET_SIMD && reload_completed"
4805 if (register_operand (operands[0], CImode)
4806 && register_operand (operands[1], CImode))
4808 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
4811 else if (BYTES_BIG_ENDIAN)
4813 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
4814 simplify_gen_subreg (OImode, operands[1], CImode, 0));
4815 emit_move_insn (gen_lowpart (V16QImode,
4816 simplify_gen_subreg (TImode, operands[0],
4818 gen_lowpart (V16QImode,
4819 simplify_gen_subreg (TImode, operands[1],
4828 [(set (match_operand:XI 0 "nonimmediate_operand")
4829 (match_operand:XI 1 "general_operand"))]
4830 "TARGET_SIMD && reload_completed"
4833 if (register_operand (operands[0], XImode)
4834 && register_operand (operands[1], XImode))
4836 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
4839 else if (BYTES_BIG_ENDIAN)
4841 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
4842 simplify_gen_subreg (OImode, operands[1], XImode, 0));
4843 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
4844 simplify_gen_subreg (OImode, operands[1], XImode, 32));
4851 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
4852 [(match_operand:VSTRUCT 0 "register_operand" "=w")
4853 (match_operand:DI 1 "register_operand" "w")
4854 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4857 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
4858 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
4861 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
4866 (define_insn "aarch64_ld2<mode>_dreg"
4867 [(set (match_operand:OI 0 "register_operand" "=w")
4872 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4874 (vec_duplicate:VD (const_int 0)))
4876 (unspec:VD [(match_dup 1)]
4878 (vec_duplicate:VD (const_int 0)))) 0))]
4880 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4881 [(set_attr "type" "neon_load2_2reg<q>")]
4884 (define_insn "aarch64_ld2<mode>_dreg"
4885 [(set (match_operand:OI 0 "register_operand" "=w")
4890 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4894 (unspec:DX [(match_dup 1)]
4896 (const_int 0))) 0))]
4898 "ld1\\t{%S0.1d - %T0.1d}, %1"
4899 [(set_attr "type" "neon_load1_2reg<q>")]
4902 (define_insn "aarch64_ld3<mode>_dreg"
4903 [(set (match_operand:CI 0 "register_operand" "=w")
4909 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4911 (vec_duplicate:VD (const_int 0)))
4913 (unspec:VD [(match_dup 1)]
4915 (vec_duplicate:VD (const_int 0))))
4917 (unspec:VD [(match_dup 1)]
4919 (vec_duplicate:VD (const_int 0)))) 0))]
4921 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4922 [(set_attr "type" "neon_load3_3reg<q>")]
4925 (define_insn "aarch64_ld3<mode>_dreg"
4926 [(set (match_operand:CI 0 "register_operand" "=w")
4932 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4936 (unspec:DX [(match_dup 1)]
4940 (unspec:DX [(match_dup 1)]
4942 (const_int 0))) 0))]
4944 "ld1\\t{%S0.1d - %U0.1d}, %1"
4945 [(set_attr "type" "neon_load1_3reg<q>")]
4948 (define_insn "aarch64_ld4<mode>_dreg"
4949 [(set (match_operand:XI 0 "register_operand" "=w")
4955 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4957 (vec_duplicate:VD (const_int 0)))
4959 (unspec:VD [(match_dup 1)]
4961 (vec_duplicate:VD (const_int 0))))
4964 (unspec:VD [(match_dup 1)]
4966 (vec_duplicate:VD (const_int 0)))
4968 (unspec:VD [(match_dup 1)]
4970 (vec_duplicate:VD (const_int 0))))) 0))]
4972 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4973 [(set_attr "type" "neon_load4_4reg<q>")]
4976 (define_insn "aarch64_ld4<mode>_dreg"
4977 [(set (match_operand:XI 0 "register_operand" "=w")
4983 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4987 (unspec:DX [(match_dup 1)]
4992 (unspec:DX [(match_dup 1)]
4996 (unspec:DX [(match_dup 1)]
4998 (const_int 0)))) 0))]
5000 "ld1\\t{%S0.1d - %V0.1d}, %1"
5001 [(set_attr "type" "neon_load1_4reg<q>")]
5004 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5005 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5006 (match_operand:DI 1 "register_operand" "r")
5007 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5010 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5011 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5013 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5017 (define_expand "aarch64_ld1<VALL_F16:mode>"
5018 [(match_operand:VALL_F16 0 "register_operand")
5019 (match_operand:DI 1 "register_operand")]
5022 machine_mode mode = <VALL_F16:MODE>mode;
5023 rtx mem = gen_rtx_MEM (mode, operands[1]);
5025 if (BYTES_BIG_ENDIAN)
5026 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5028 emit_move_insn (operands[0], mem);
5032 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5033 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5034 (match_operand:DI 1 "register_operand" "r")
5035 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5038 machine_mode mode = <VSTRUCT:MODE>mode;
5039 rtx mem = gen_rtx_MEM (mode, operands[1]);
5041 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5045 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5046 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5047 (match_operand:DI 1 "register_operand" "w")
5048 (match_operand:VSTRUCT 2 "register_operand" "0")
5049 (match_operand:SI 3 "immediate_operand" "i")
5050 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5053 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5054 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5057 aarch64_simd_lane_bounds (operands[3], 0,
5058 GET_MODE_NUNITS (<VALLDIF:MODE>mode),
5060 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5061 operands[0], mem, operands[2], operands[3]));
5065 ;; Expanders for builtins to extract vector registers from large
5066 ;; opaque integer modes.
5070 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5071 [(match_operand:VDC 0 "register_operand" "=w")
5072 (match_operand:VSTRUCT 1 "register_operand" "w")
5073 (match_operand:SI 2 "immediate_operand" "i")]
5076 int part = INTVAL (operands[2]);
5077 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5078 int offset = part * 16;
5080 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5081 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5087 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5088 [(match_operand:VQ 0 "register_operand" "=w")
5089 (match_operand:VSTRUCT 1 "register_operand" "w")
5090 (match_operand:SI 2 "immediate_operand" "i")]
5093 int part = INTVAL (operands[2]);
5094 int offset = part * 16;
5096 emit_move_insn (operands[0],
5097 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5101 ;; Permuted-store expanders for neon intrinsics.
5103 ;; Permute instructions
5107 (define_expand "vec_perm_const<mode>"
5108 [(match_operand:VALL_F16 0 "register_operand")
5109 (match_operand:VALL_F16 1 "register_operand")
5110 (match_operand:VALL_F16 2 "register_operand")
5111 (match_operand:<V_cmp_result> 3)]
5114 if (aarch64_expand_vec_perm_const (operands[0], operands[1],
5115 operands[2], operands[3]))
5121 (define_expand "vec_perm<mode>"
5122 [(match_operand:VB 0 "register_operand")
5123 (match_operand:VB 1 "register_operand")
5124 (match_operand:VB 2 "register_operand")
5125 (match_operand:VB 3 "register_operand")]
5128 aarch64_expand_vec_perm (operands[0], operands[1],
5129 operands[2], operands[3]);
5133 (define_insn "aarch64_tbl1<mode>"
5134 [(set (match_operand:VB 0 "register_operand" "=w")
5135 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5136 (match_operand:VB 2 "register_operand" "w")]
5139 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5140 [(set_attr "type" "neon_tbl1<q>")]
5143 ;; Two source registers.
5145 (define_insn "aarch64_tbl2v16qi"
5146 [(set (match_operand:V16QI 0 "register_operand" "=w")
5147 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5148 (match_operand:V16QI 2 "register_operand" "w")]
5151 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5152 [(set_attr "type" "neon_tbl2_q")]
5155 (define_insn "aarch64_tbl3<mode>"
5156 [(set (match_operand:VB 0 "register_operand" "=w")
5157 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5158 (match_operand:VB 2 "register_operand" "w")]
5161 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5162 [(set_attr "type" "neon_tbl3")]
5165 (define_insn "aarch64_tbx4<mode>"
5166 [(set (match_operand:VB 0 "register_operand" "=w")
5167 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5168 (match_operand:OI 2 "register_operand" "w")
5169 (match_operand:VB 3 "register_operand" "w")]
5172 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5173 [(set_attr "type" "neon_tbl4")]
5176 ;; Three source registers.
5178 (define_insn "aarch64_qtbl3<mode>"
5179 [(set (match_operand:VB 0 "register_operand" "=w")
5180 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5181 (match_operand:VB 2 "register_operand" "w")]
5184 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5185 [(set_attr "type" "neon_tbl3")]
5188 (define_insn "aarch64_qtbx3<mode>"
5189 [(set (match_operand:VB 0 "register_operand" "=w")
5190 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5191 (match_operand:CI 2 "register_operand" "w")
5192 (match_operand:VB 3 "register_operand" "w")]
5195 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5196 [(set_attr "type" "neon_tbl3")]
5199 ;; Four source registers.
5201 (define_insn "aarch64_qtbl4<mode>"
5202 [(set (match_operand:VB 0 "register_operand" "=w")
5203 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5204 (match_operand:VB 2 "register_operand" "w")]
5207 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5208 [(set_attr "type" "neon_tbl4")]
5211 (define_insn "aarch64_qtbx4<mode>"
5212 [(set (match_operand:VB 0 "register_operand" "=w")
5213 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5214 (match_operand:XI 2 "register_operand" "w")
5215 (match_operand:VB 3 "register_operand" "w")]
5218 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5219 [(set_attr "type" "neon_tbl4")]
5222 (define_insn_and_split "aarch64_combinev16qi"
5223 [(set (match_operand:OI 0 "register_operand" "=w")
5224 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5225 (match_operand:V16QI 2 "register_operand" "w")]
5229 "&& reload_completed"
5232 aarch64_split_combinev16qi (operands);
5235 [(set_attr "type" "multiple")]
5238 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5239 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5240 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5241 (match_operand:VALL_F16 2 "register_operand" "w")]
5244 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5245 [(set_attr "type" "neon_permute<q>")]
5248 ;; Note immediate (third) operand is lane index not byte index.
5249 (define_insn "aarch64_ext<mode>"
5250 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5251 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5252 (match_operand:VALL_F16 2 "register_operand" "w")
5253 (match_operand:SI 3 "immediate_operand" "i")]
5257 operands[3] = GEN_INT (INTVAL (operands[3])
5258 * GET_MODE_UNIT_SIZE (<MODE>mode));
5259 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5261 [(set_attr "type" "neon_ext<q>")]
5264 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5265 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5266 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5269 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5270 [(set_attr "type" "neon_rev<q>")]
5273 (define_insn "aarch64_st2<mode>_dreg"
5274 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5275 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5276 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5279 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5280 [(set_attr "type" "neon_store2_2reg")]
5283 (define_insn "aarch64_st2<mode>_dreg"
5284 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5285 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5286 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5289 "st1\\t{%S1.1d - %T1.1d}, %0"
5290 [(set_attr "type" "neon_store1_2reg")]
5293 (define_insn "aarch64_st3<mode>_dreg"
5294 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5295 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5296 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5299 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5300 [(set_attr "type" "neon_store3_3reg")]
5303 (define_insn "aarch64_st3<mode>_dreg"
5304 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5305 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5306 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5309 "st1\\t{%S1.1d - %U1.1d}, %0"
5310 [(set_attr "type" "neon_store1_3reg")]
5313 (define_insn "aarch64_st4<mode>_dreg"
5314 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5315 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5316 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5319 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5320 [(set_attr "type" "neon_store4_4reg")]
5323 (define_insn "aarch64_st4<mode>_dreg"
5324 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5325 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5326 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5329 "st1\\t{%S1.1d - %V1.1d}, %0"
5330 [(set_attr "type" "neon_store1_4reg")]
5333 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5334 [(match_operand:DI 0 "register_operand" "r")
5335 (match_operand:VSTRUCT 1 "register_operand" "w")
5336 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5339 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5340 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5342 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5346 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5347 [(match_operand:DI 0 "register_operand" "r")
5348 (match_operand:VSTRUCT 1 "register_operand" "w")
5349 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5352 machine_mode mode = <VSTRUCT:MODE>mode;
5353 rtx mem = gen_rtx_MEM (mode, operands[0]);
5355 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5359 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5360 [(match_operand:DI 0 "register_operand" "r")
5361 (match_operand:VSTRUCT 1 "register_operand" "w")
5362 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5363 (match_operand:SI 2 "immediate_operand")]
5366 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5367 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5370 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5371 mem, operands[1], operands[2]));
5375 (define_expand "aarch64_st1<VALL_F16:mode>"
5376 [(match_operand:DI 0 "register_operand")
5377 (match_operand:VALL_F16 1 "register_operand")]
5380 machine_mode mode = <VALL_F16:MODE>mode;
5381 rtx mem = gen_rtx_MEM (mode, operands[0]);
5383 if (BYTES_BIG_ENDIAN)
5384 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5386 emit_move_insn (mem, operands[1]);
5390 ;; Expander for builtins to insert vector registers into large
5391 ;; opaque integer modes.
5393 ;; Q-register list. We don't need a D-reg inserter as we zero
5394 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5396 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5397 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5398 (match_operand:VSTRUCT 1 "register_operand" "0")
5399 (match_operand:VQ 2 "register_operand" "w")
5400 (match_operand:SI 3 "immediate_operand" "i")]
5403 int part = INTVAL (operands[3]);
5404 int offset = part * 16;
5406 emit_move_insn (operands[0], operands[1]);
5407 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5412 ;; Standard pattern name vec_init<mode>.
5414 (define_expand "vec_init<mode>"
5415 [(match_operand:VALL_F16 0 "register_operand" "")
5416 (match_operand 1 "" "")]
5419 aarch64_expand_vector_init (operands[0], operands[1]);
5423 (define_insn "*aarch64_simd_ld1r<mode>"
5424 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5425 (vec_duplicate:VALL_F16
5426 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5428 "ld1r\\t{%0.<Vtype>}, %1"
5429 [(set_attr "type" "neon_load1_all_lanes")]
5432 (define_insn "aarch64_frecpe<mode>"
5433 [(set (match_operand:VHSDF 0 "register_operand" "=w")
5434 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5437 "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5438 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5441 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5442 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5443 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5446 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5447 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5450 (define_insn "aarch64_frecps<mode>"
5451 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5453 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5454 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5457 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5458 [(set_attr "type" "neon_fp_recps_<stype><q>")]
5461 (define_insn "aarch64_urecpe<mode>"
5462 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5463 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5466 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5467 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5469 ;; Standard pattern name vec_extract<mode>.
5471 (define_expand "vec_extract<mode>"
5472 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5473 (match_operand:VALL_F16 1 "register_operand" "")
5474 (match_operand:SI 2 "immediate_operand" "")]
5478 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5484 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5485 [(set (match_operand:V16QI 0 "register_operand" "=w")
5486 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5487 (match_operand:V16QI 2 "register_operand" "w")]
5489 "TARGET_SIMD && TARGET_CRYPTO"
5490 "aes<aes_op>\\t%0.16b, %2.16b"
5491 [(set_attr "type" "crypto_aese")]
5494 ;; When AES/AESMC fusion is enabled we want the register allocation to
5498 ;; So prefer to tie operand 1 to operand 0 when fusing.
5500 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5501 [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5502 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5504 "TARGET_SIMD && TARGET_CRYPTO"
5505 "aes<aesmc_op>\\t%0.16b, %1.16b"
5506 [(set_attr "type" "crypto_aesmc")
5507 (set_attr_alternative "enabled"
5508 [(if_then_else (match_test
5509 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5510 (const_string "yes" )
5511 (const_string "no"))
5512 (const_string "yes")])]
5517 (define_insn "aarch64_crypto_sha1hsi"
5518 [(set (match_operand:SI 0 "register_operand" "=w")
5519 (unspec:SI [(match_operand:SI 1
5520 "register_operand" "w")]
5522 "TARGET_SIMD && TARGET_CRYPTO"
5524 [(set_attr "type" "crypto_sha1_fast")]
5527 (define_insn "aarch64_crypto_sha1su1v4si"
5528 [(set (match_operand:V4SI 0 "register_operand" "=w")
5529 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5530 (match_operand:V4SI 2 "register_operand" "w")]
5532 "TARGET_SIMD && TARGET_CRYPTO"
5533 "sha1su1\\t%0.4s, %2.4s"
5534 [(set_attr "type" "crypto_sha1_fast")]
5537 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5538 [(set (match_operand:V4SI 0 "register_operand" "=w")
5539 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5540 (match_operand:SI 2 "register_operand" "w")
5541 (match_operand:V4SI 3 "register_operand" "w")]
5543 "TARGET_SIMD && TARGET_CRYPTO"
5544 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5545 [(set_attr "type" "crypto_sha1_slow")]
5548 (define_insn "aarch64_crypto_sha1su0v4si"
5549 [(set (match_operand:V4SI 0 "register_operand" "=w")
5550 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5551 (match_operand:V4SI 2 "register_operand" "w")
5552 (match_operand:V4SI 3 "register_operand" "w")]
5554 "TARGET_SIMD && TARGET_CRYPTO"
5555 "sha1su0\\t%0.4s, %2.4s, %3.4s"
5556 [(set_attr "type" "crypto_sha1_xor")]
5561 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
5562 [(set (match_operand:V4SI 0 "register_operand" "=w")
5563 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5564 (match_operand:V4SI 2 "register_operand" "w")
5565 (match_operand:V4SI 3 "register_operand" "w")]
5567 "TARGET_SIMD && TARGET_CRYPTO"
5568 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5569 [(set_attr "type" "crypto_sha256_slow")]
5572 (define_insn "aarch64_crypto_sha256su0v4si"
5573 [(set (match_operand:V4SI 0 "register_operand" "=w")
5574 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5575 (match_operand:V4SI 2 "register_operand" "w")]
5577 "TARGET_SIMD &&TARGET_CRYPTO"
5578 "sha256su0\\t%0.4s, %2.4s"
5579 [(set_attr "type" "crypto_sha256_fast")]
5582 (define_insn "aarch64_crypto_sha256su1v4si"
5583 [(set (match_operand:V4SI 0 "register_operand" "=w")
5584 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5585 (match_operand:V4SI 2 "register_operand" "w")
5586 (match_operand:V4SI 3 "register_operand" "w")]
5588 "TARGET_SIMD &&TARGET_CRYPTO"
5589 "sha256su1\\t%0.4s, %2.4s, %3.4s"
5590 [(set_attr "type" "crypto_sha256_slow")]
5595 (define_insn "aarch64_crypto_pmulldi"
5596 [(set (match_operand:TI 0 "register_operand" "=w")
5597 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
5598 (match_operand:DI 2 "register_operand" "w")]
5600 "TARGET_SIMD && TARGET_CRYPTO"
5601 "pmull\\t%0.1q, %1.1d, %2.1d"
5602 [(set_attr "type" "neon_mul_d_long")]
5605 (define_insn "aarch64_crypto_pmullv2di"
5606 [(set (match_operand:TI 0 "register_operand" "=w")
5607 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
5608 (match_operand:V2DI 2 "register_operand" "w")]
5610 "TARGET_SIMD && TARGET_CRYPTO"
5611 "pmull2\\t%0.1q, %1.2d, %2.2d"
5612 [(set_attr "type" "neon_mul_d_long")]