1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2016 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16 1 "general_operand" ""))]
26 if (GET_CODE (operands[0]) == MEM)
27 operands[1] = force_reg (<MODE>mode, operands[1]);
31 (define_expand "movmisalign<mode>"
32 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
33 (match_operand:VALL 1 "general_operand" ""))]
36 /* This pattern is not permitted to fail during expansion: if both arguments
37 are non-registers (e.g. memory := constant, which can be created by the
38 auto-vectorizer), force operand 1 into a register. */
39 if (!register_operand (operands[0], <MODE>mode)
40 && !register_operand (operands[1], <MODE>mode))
41 operands[1] = force_reg (<MODE>mode, operands[1]);
44 (define_insn "aarch64_simd_dup<mode>"
45 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
47 (match_operand:<VEL> 1 "register_operand" "r, w")))]
50 dup\\t%0.<Vtype>, %<vw>1
51 dup\\t%0.<Vtype>, %1.<Vetype>[0]"
52 [(set_attr "type" "neon_from_gp<q>, neon_dup<q>")]
55 (define_insn "aarch64_simd_dup<mode>"
56 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
57 (vec_duplicate:VDQF_F16
58 (match_operand:<VEL> 1 "register_operand" "w")))]
60 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
61 [(set_attr "type" "neon_dup<q>")]
64 (define_insn "aarch64_dup_lane<mode>"
65 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
66 (vec_duplicate:VALL_F16
68 (match_operand:VALL_F16 1 "register_operand" "w")
69 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
73 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
74 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
76 [(set_attr "type" "neon_dup<q>")]
79 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
80 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
81 (vec_duplicate:VALL_F16
83 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
84 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
88 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
89 INTVAL (operands[2])));
90 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
92 [(set_attr "type" "neon_dup<q>")]
95 (define_insn "*aarch64_simd_mov<mode>"
96 [(set (match_operand:VD 0 "nonimmediate_operand"
97 "=w, m, w, ?r, ?w, ?r, w")
98 (match_operand:VD 1 "general_operand"
99 "m, w, w, w, r, r, Dn"))]
101 && (register_operand (operands[0], <MODE>mode)
102 || register_operand (operands[1], <MODE>mode))"
104 switch (which_alternative)
106 case 0: return "ldr\\t%d0, %1";
107 case 1: return "str\\t%d1, %0";
108 case 2: return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
109 case 3: return "umov\t%0, %1.d[0]";
110 case 4: return "ins\t%0.d[0], %1";
111 case 5: return "mov\t%0, %1";
113 return aarch64_output_simd_mov_immediate (operands[1],
115 default: gcc_unreachable ();
118 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
119 neon_logic<q>, neon_to_gp<q>, neon_from_gp<q>,\
120 mov_reg, neon_move<q>")]
123 (define_insn "*aarch64_simd_mov<mode>"
124 [(set (match_operand:VQ 0 "nonimmediate_operand"
125 "=w, m, w, ?r, ?w, ?r, w")
126 (match_operand:VQ 1 "general_operand"
127 "m, w, w, w, r, r, Dn"))]
129 && (register_operand (operands[0], <MODE>mode)
130 || register_operand (operands[1], <MODE>mode))"
132 switch (which_alternative)
135 return "ldr\\t%q0, %1";
137 return "str\\t%q1, %0";
139 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
145 return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
150 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
151 neon_logic<q>, multiple, multiple, multiple,\
153 (set_attr "length" "4,4,4,8,8,8,4")]
156 (define_insn "load_pair<mode>"
157 [(set (match_operand:VD 0 "register_operand" "=w")
158 (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
159 (set (match_operand:VD 2 "register_operand" "=w")
160 (match_operand:VD 3 "memory_operand" "m"))]
162 && rtx_equal_p (XEXP (operands[3], 0),
163 plus_constant (Pmode,
164 XEXP (operands[1], 0),
165 GET_MODE_SIZE (<MODE>mode)))"
167 [(set_attr "type" "neon_ldp")]
170 (define_insn "store_pair<mode>"
171 [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
172 (match_operand:VD 1 "register_operand" "w"))
173 (set (match_operand:VD 2 "memory_operand" "=m")
174 (match_operand:VD 3 "register_operand" "w"))]
176 && rtx_equal_p (XEXP (operands[2], 0),
177 plus_constant (Pmode,
178 XEXP (operands[0], 0),
179 GET_MODE_SIZE (<MODE>mode)))"
181 [(set_attr "type" "neon_stp")]
185 [(set (match_operand:VQ 0 "register_operand" "")
186 (match_operand:VQ 1 "register_operand" ""))]
187 "TARGET_SIMD && reload_completed
188 && GP_REGNUM_P (REGNO (operands[0]))
189 && GP_REGNUM_P (REGNO (operands[1]))"
192 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
197 [(set (match_operand:VQ 0 "register_operand" "")
198 (match_operand:VQ 1 "register_operand" ""))]
199 "TARGET_SIMD && reload_completed
200 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
201 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
204 aarch64_split_simd_move (operands[0], operands[1]);
208 (define_expand "aarch64_split_simd_mov<mode>"
209 [(set (match_operand:VQ 0)
210 (match_operand:VQ 1))]
213 rtx dst = operands[0];
214 rtx src = operands[1];
216 if (GP_REGNUM_P (REGNO (src)))
218 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
219 rtx src_high_part = gen_highpart (<VHALF>mode, src);
222 (gen_move_lo_quad_<mode> (dst, src_low_part));
224 (gen_move_hi_quad_<mode> (dst, src_high_part));
229 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
230 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
231 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
232 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
235 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
237 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
243 (define_insn "aarch64_simd_mov_from_<mode>low"
244 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
246 (match_operand:VQ 1 "register_operand" "w")
247 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
248 "TARGET_SIMD && reload_completed"
250 [(set_attr "type" "neon_to_gp<q>")
251 (set_attr "length" "4")
254 (define_insn "aarch64_simd_mov_from_<mode>high"
255 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
257 (match_operand:VQ 1 "register_operand" "w")
258 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
259 "TARGET_SIMD && reload_completed"
261 [(set_attr "type" "neon_to_gp<q>")
262 (set_attr "length" "4")
265 (define_insn "orn<mode>3"
266 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
267 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
268 (match_operand:VDQ_I 2 "register_operand" "w")))]
270 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
271 [(set_attr "type" "neon_logic<q>")]
274 (define_insn "bic<mode>3"
275 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
276 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
277 (match_operand:VDQ_I 2 "register_operand" "w")))]
279 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
280 [(set_attr "type" "neon_logic<q>")]
283 (define_insn "add<mode>3"
284 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
285 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
286 (match_operand:VDQ_I 2 "register_operand" "w")))]
288 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
289 [(set_attr "type" "neon_add<q>")]
292 (define_insn "sub<mode>3"
293 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
294 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
295 (match_operand:VDQ_I 2 "register_operand" "w")))]
297 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
298 [(set_attr "type" "neon_sub<q>")]
301 (define_insn "mul<mode>3"
302 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
303 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
304 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
306 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
307 [(set_attr "type" "neon_mul_<Vetype><q>")]
310 (define_insn "bswap<mode>2"
311 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
312 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
314 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
315 [(set_attr "type" "neon_rev<q>")]
318 (define_insn "aarch64_rbit<mode>"
319 [(set (match_operand:VB 0 "register_operand" "=w")
320 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
323 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
324 [(set_attr "type" "neon_rbit")]
327 (define_expand "ctz<mode>2"
328 [(set (match_operand:VS 0 "register_operand")
329 (ctz:VS (match_operand:VS 1 "register_operand")))]
332 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
333 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
335 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
336 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
341 (define_insn "*aarch64_mul3_elt<mode>"
342 [(set (match_operand:VMUL 0 "register_operand" "=w")
346 (match_operand:VMUL 1 "register_operand" "<h_con>")
347 (parallel [(match_operand:SI 2 "immediate_operand")])))
348 (match_operand:VMUL 3 "register_operand" "w")))]
351 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
352 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
354 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
357 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
358 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
359 (mult:VMUL_CHANGE_NLANES
360 (vec_duplicate:VMUL_CHANGE_NLANES
362 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
363 (parallel [(match_operand:SI 2 "immediate_operand")])))
364 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
367 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
368 INTVAL (operands[2])));
369 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
371 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
374 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
375 [(set (match_operand:VMUL 0 "register_operand" "=w")
378 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
379 (match_operand:VMUL 2 "register_operand" "w")))]
381 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
382 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
385 (define_insn "aarch64_rsqrte<mode>"
386 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
387 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
390 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
391 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
393 (define_insn "aarch64_rsqrts<mode>"
394 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
395 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
396 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
399 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
400 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
402 (define_expand "rsqrt<mode>2"
403 [(set (match_operand:VALLF 0 "register_operand" "=w")
404 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
408 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
412 (define_insn "*aarch64_mul3_elt_to_64v2df"
413 [(set (match_operand:DF 0 "register_operand" "=w")
416 (match_operand:V2DF 1 "register_operand" "w")
417 (parallel [(match_operand:SI 2 "immediate_operand")]))
418 (match_operand:DF 3 "register_operand" "w")))]
421 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
422 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
424 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
427 (define_insn "neg<mode>2"
428 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
429 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
431 "neg\t%0.<Vtype>, %1.<Vtype>"
432 [(set_attr "type" "neon_neg<q>")]
435 (define_insn "abs<mode>2"
436 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
437 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
439 "abs\t%0.<Vtype>, %1.<Vtype>"
440 [(set_attr "type" "neon_abs<q>")]
443 ;; The intrinsic version of integer ABS must not be allowed to
444 ;; combine with any operation with an integerated ABS step, such
446 (define_insn "aarch64_abs<mode>"
447 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
449 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
452 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
453 [(set_attr "type" "neon_abs<q>")]
456 (define_insn "abd<mode>_3"
457 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
458 (abs:VDQ_BHSI (minus:VDQ_BHSI
459 (match_operand:VDQ_BHSI 1 "register_operand" "w")
460 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
462 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
463 [(set_attr "type" "neon_abd<q>")]
466 (define_insn "aba<mode>_3"
467 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
468 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
469 (match_operand:VDQ_BHSI 1 "register_operand" "w")
470 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
471 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
473 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
474 [(set_attr "type" "neon_arith_acc<q>")]
477 (define_insn "fabd<mode>3"
478 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
481 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
482 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
484 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
485 [(set_attr "type" "neon_fp_abd_<stype><q>")]
488 (define_insn "and<mode>3"
489 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
490 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
491 (match_operand:VDQ_I 2 "register_operand" "w")))]
493 "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
494 [(set_attr "type" "neon_logic<q>")]
497 (define_insn "ior<mode>3"
498 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
499 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
500 (match_operand:VDQ_I 2 "register_operand" "w")))]
502 "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
503 [(set_attr "type" "neon_logic<q>")]
506 (define_insn "xor<mode>3"
507 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
508 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
509 (match_operand:VDQ_I 2 "register_operand" "w")))]
511 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
512 [(set_attr "type" "neon_logic<q>")]
515 (define_insn "one_cmpl<mode>2"
516 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
517 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
519 "not\t%0.<Vbtype>, %1.<Vbtype>"
520 [(set_attr "type" "neon_logic<q>")]
523 (define_insn "aarch64_simd_vec_set<mode>"
524 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
526 (vec_duplicate:VDQ_BHSI
527 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
528 (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
529 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
532 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
533 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
534 switch (which_alternative)
537 return "ins\\t%0.<Vetype>[%p2], %w1";
539 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
541 return "ld1\\t{%0.<Vetype>}[%p2], %1";
546 [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_1reg<q>")]
549 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
550 [(set (match_operand:VALL 0 "register_operand" "=w")
554 (match_operand:VALL 3 "register_operand" "w")
556 [(match_operand:SI 4 "immediate_operand" "i")])))
557 (match_operand:VALL 1 "register_operand" "0")
558 (match_operand:SI 2 "immediate_operand" "i")))]
561 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
562 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
563 operands[4] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[4])));
565 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
567 [(set_attr "type" "neon_ins<q>")]
570 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
571 [(set (match_operand:VALL 0 "register_operand" "=w")
575 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
577 [(match_operand:SI 4 "immediate_operand" "i")])))
578 (match_operand:VALL 1 "register_operand" "0")
579 (match_operand:SI 2 "immediate_operand" "i")))]
582 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
583 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
584 operands[4] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
585 INTVAL (operands[4])));
587 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
589 [(set_attr "type" "neon_ins<q>")]
592 (define_insn "aarch64_simd_lshr<mode>"
593 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
594 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
595 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
597 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
598 [(set_attr "type" "neon_shift_imm<q>")]
601 (define_insn "aarch64_simd_ashr<mode>"
602 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
603 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
604 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
606 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
607 [(set_attr "type" "neon_shift_imm<q>")]
610 (define_insn "aarch64_simd_imm_shl<mode>"
611 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
612 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
613 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
615 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
616 [(set_attr "type" "neon_shift_imm<q>")]
619 (define_insn "aarch64_simd_reg_sshl<mode>"
620 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
621 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
622 (match_operand:VDQ_I 2 "register_operand" "w")))]
624 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
625 [(set_attr "type" "neon_shift_reg<q>")]
628 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
629 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
630 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
631 (match_operand:VDQ_I 2 "register_operand" "w")]
632 UNSPEC_ASHIFT_UNSIGNED))]
634 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
635 [(set_attr "type" "neon_shift_reg<q>")]
638 (define_insn "aarch64_simd_reg_shl<mode>_signed"
639 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
640 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
641 (match_operand:VDQ_I 2 "register_operand" "w")]
642 UNSPEC_ASHIFT_SIGNED))]
644 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
645 [(set_attr "type" "neon_shift_reg<q>")]
648 (define_expand "ashl<mode>3"
649 [(match_operand:VDQ_I 0 "register_operand" "")
650 (match_operand:VDQ_I 1 "register_operand" "")
651 (match_operand:SI 2 "general_operand" "")]
654 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
657 if (CONST_INT_P (operands[2]))
659 shift_amount = INTVAL (operands[2]);
660 if (shift_amount >= 0 && shift_amount < bit_width)
662 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
664 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
671 operands[2] = force_reg (SImode, operands[2]);
674 else if (MEM_P (operands[2]))
676 operands[2] = force_reg (SImode, operands[2]);
679 if (REG_P (operands[2]))
681 rtx tmp = gen_reg_rtx (<MODE>mode);
682 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
683 convert_to_mode (<VEL>mode,
686 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
695 (define_expand "lshr<mode>3"
696 [(match_operand:VDQ_I 0 "register_operand" "")
697 (match_operand:VDQ_I 1 "register_operand" "")
698 (match_operand:SI 2 "general_operand" "")]
701 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
704 if (CONST_INT_P (operands[2]))
706 shift_amount = INTVAL (operands[2]);
707 if (shift_amount > 0 && shift_amount <= bit_width)
709 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
711 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
717 operands[2] = force_reg (SImode, operands[2]);
719 else if (MEM_P (operands[2]))
721 operands[2] = force_reg (SImode, operands[2]);
724 if (REG_P (operands[2]))
726 rtx tmp = gen_reg_rtx (SImode);
727 rtx tmp1 = gen_reg_rtx (<MODE>mode);
728 emit_insn (gen_negsi2 (tmp, operands[2]));
729 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
730 convert_to_mode (<VEL>mode,
732 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
742 (define_expand "ashr<mode>3"
743 [(match_operand:VDQ_I 0 "register_operand" "")
744 (match_operand:VDQ_I 1 "register_operand" "")
745 (match_operand:SI 2 "general_operand" "")]
748 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
751 if (CONST_INT_P (operands[2]))
753 shift_amount = INTVAL (operands[2]);
754 if (shift_amount > 0 && shift_amount <= bit_width)
756 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
758 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
764 operands[2] = force_reg (SImode, operands[2]);
766 else if (MEM_P (operands[2]))
768 operands[2] = force_reg (SImode, operands[2]);
771 if (REG_P (operands[2]))
773 rtx tmp = gen_reg_rtx (SImode);
774 rtx tmp1 = gen_reg_rtx (<MODE>mode);
775 emit_insn (gen_negsi2 (tmp, operands[2]));
776 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
777 convert_to_mode (<VEL>mode,
779 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
789 (define_expand "vashl<mode>3"
790 [(match_operand:VDQ_I 0 "register_operand" "")
791 (match_operand:VDQ_I 1 "register_operand" "")
792 (match_operand:VDQ_I 2 "register_operand" "")]
795 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
800 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
801 ;; Negating individual lanes most certainly offsets the
802 ;; gain from vectorization.
803 (define_expand "vashr<mode>3"
804 [(match_operand:VDQ_BHSI 0 "register_operand" "")
805 (match_operand:VDQ_BHSI 1 "register_operand" "")
806 (match_operand:VDQ_BHSI 2 "register_operand" "")]
809 rtx neg = gen_reg_rtx (<MODE>mode);
810 emit (gen_neg<mode>2 (neg, operands[2]));
811 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
817 (define_expand "aarch64_ashr_simddi"
818 [(match_operand:DI 0 "register_operand" "=w")
819 (match_operand:DI 1 "register_operand" "w")
820 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
823 /* An arithmetic shift right by 64 fills the result with copies of the sign
824 bit, just like asr by 63 - however the standard pattern does not handle
826 if (INTVAL (operands[2]) == 64)
827 operands[2] = GEN_INT (63);
828 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
833 (define_expand "vlshr<mode>3"
834 [(match_operand:VDQ_BHSI 0 "register_operand" "")
835 (match_operand:VDQ_BHSI 1 "register_operand" "")
836 (match_operand:VDQ_BHSI 2 "register_operand" "")]
839 rtx neg = gen_reg_rtx (<MODE>mode);
840 emit (gen_neg<mode>2 (neg, operands[2]));
841 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
846 (define_expand "aarch64_lshr_simddi"
847 [(match_operand:DI 0 "register_operand" "=w")
848 (match_operand:DI 1 "register_operand" "w")
849 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
852 if (INTVAL (operands[2]) == 64)
853 emit_move_insn (operands[0], const0_rtx);
855 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
860 (define_expand "vec_set<mode>"
861 [(match_operand:VDQ_BHSI 0 "register_operand")
862 (match_operand:<VEL> 1 "register_operand")
863 (match_operand:SI 2 "immediate_operand")]
866 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
867 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
868 GEN_INT (elem), operands[0]));
873 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
874 (define_insn "vec_shr_<mode>"
875 [(set (match_operand:VD 0 "register_operand" "=w")
876 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
877 (match_operand:SI 2 "immediate_operand" "i")]
881 if (BYTES_BIG_ENDIAN)
882 return "shl %d0, %d1, %2";
884 return "ushr %d0, %d1, %2";
886 [(set_attr "type" "neon_shift_imm")]
889 (define_insn "aarch64_simd_vec_setv2di"
890 [(set (match_operand:V2DI 0 "register_operand" "=w,w")
893 (match_operand:DI 1 "register_operand" "r,w"))
894 (match_operand:V2DI 3 "register_operand" "0,0")
895 (match_operand:SI 2 "immediate_operand" "i,i")))]
898 int elt = ENDIAN_LANE_N (V2DImode, exact_log2 (INTVAL (operands[2])));
899 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
900 switch (which_alternative)
903 return "ins\\t%0.d[%p2], %1";
905 return "ins\\t%0.d[%p2], %1.d[0]";
910 [(set_attr "type" "neon_from_gp, neon_ins_q")]
913 (define_expand "vec_setv2di"
914 [(match_operand:V2DI 0 "register_operand")
915 (match_operand:DI 1 "register_operand")
916 (match_operand:SI 2 "immediate_operand")]
919 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
920 emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
921 GEN_INT (elem), operands[0]));
926 (define_insn "aarch64_simd_vec_set<mode>"
927 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
929 (vec_duplicate:VDQF_F16
930 (match_operand:<VEL> 1 "register_operand" "w"))
931 (match_operand:VDQF_F16 3 "register_operand" "0")
932 (match_operand:SI 2 "immediate_operand" "i")))]
935 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
937 operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
938 return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
940 [(set_attr "type" "neon_ins<q>")]
943 (define_expand "vec_set<mode>"
944 [(match_operand:VDQF_F16 0 "register_operand" "+w")
945 (match_operand:<VEL> 1 "register_operand" "w")
946 (match_operand:SI 2 "immediate_operand" "")]
949 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
950 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
951 GEN_INT (elem), operands[0]));
957 (define_insn "aarch64_mla<mode>"
958 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
959 (plus:VDQ_BHSI (mult:VDQ_BHSI
960 (match_operand:VDQ_BHSI 2 "register_operand" "w")
961 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
962 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
964 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
965 [(set_attr "type" "neon_mla_<Vetype><q>")]
968 (define_insn "*aarch64_mla_elt<mode>"
969 [(set (match_operand:VDQHS 0 "register_operand" "=w")
974 (match_operand:VDQHS 1 "register_operand" "<h_con>")
975 (parallel [(match_operand:SI 2 "immediate_operand")])))
976 (match_operand:VDQHS 3 "register_operand" "w"))
977 (match_operand:VDQHS 4 "register_operand" "0")))]
980 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
981 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
983 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
986 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
987 [(set (match_operand:VDQHS 0 "register_operand" "=w")
992 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
993 (parallel [(match_operand:SI 2 "immediate_operand")])))
994 (match_operand:VDQHS 3 "register_operand" "w"))
995 (match_operand:VDQHS 4 "register_operand" "0")))]
998 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
999 INTVAL (operands[2])));
1000 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1002 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1005 (define_insn "aarch64_mls<mode>"
1006 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1007 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1008 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1009 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1011 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1012 [(set_attr "type" "neon_mla_<Vetype><q>")]
1015 (define_insn "*aarch64_mls_elt<mode>"
1016 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1018 (match_operand:VDQHS 4 "register_operand" "0")
1020 (vec_duplicate:VDQHS
1022 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1023 (parallel [(match_operand:SI 2 "immediate_operand")])))
1024 (match_operand:VDQHS 3 "register_operand" "w"))))]
1027 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1028 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1030 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1033 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1034 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1036 (match_operand:VDQHS 4 "register_operand" "0")
1038 (vec_duplicate:VDQHS
1040 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1041 (parallel [(match_operand:SI 2 "immediate_operand")])))
1042 (match_operand:VDQHS 3 "register_operand" "w"))))]
1045 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1046 INTVAL (operands[2])));
1047 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1049 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1052 ;; Max/Min operations.
1053 (define_insn "<su><maxmin><mode>3"
1054 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1055 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1056 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1058 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1059 [(set_attr "type" "neon_minmax<q>")]
1062 (define_expand "<su><maxmin>v2di3"
1063 [(set (match_operand:V2DI 0 "register_operand" "")
1064 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1065 (match_operand:V2DI 2 "register_operand" "")))]
1068 enum rtx_code cmp_operator;
1089 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1090 emit_insn (gen_aarch64_vcond_internalv2div2di (operands[0], operands[1],
1091 operands[2], cmp_fmt, operands[1], operands[2]));
1095 ;; Pairwise Integer Max/Min operations.
1096 (define_insn "aarch64_<maxmin_uns>p<mode>"
1097 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1098 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1099 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1102 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1103 [(set_attr "type" "neon_minmax<q>")]
1106 ;; Pairwise FP Max/Min operations.
1107 (define_insn "aarch64_<maxmin_uns>p<mode>"
1108 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1109 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1110 (match_operand:VHSDF 2 "register_operand" "w")]
1113 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1114 [(set_attr "type" "neon_minmax<q>")]
1117 ;; vec_concat gives a new vector with the low elements from operand 1, and
1118 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1119 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1120 ;; What that means, is that the RTL descriptions of the below patterns
1121 ;; need to change depending on endianness.
1123 ;; Move to the low architectural bits of the register.
1124 ;; On little-endian this is { operand, zeroes }
1125 ;; On big-endian this is { zeroes, operand }
1127 (define_insn "move_lo_quad_internal_<mode>"
1128 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1130 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1131 (vec_duplicate:<VHALF> (const_int 0))))]
1132 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1137 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1138 (set_attr "simd" "yes,*,yes")
1139 (set_attr "fp" "*,yes,*")
1140 (set_attr "length" "4")]
1143 (define_insn "move_lo_quad_internal_<mode>"
1144 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1146 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1148 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1153 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1154 (set_attr "simd" "yes,*,yes")
1155 (set_attr "fp" "*,yes,*")
1156 (set_attr "length" "4")]
1159 (define_insn "move_lo_quad_internal_be_<mode>"
1160 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1162 (vec_duplicate:<VHALF> (const_int 0))
1163 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1164 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1169 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1170 (set_attr "simd" "yes,*,yes")
1171 (set_attr "fp" "*,yes,*")
1172 (set_attr "length" "4")]
1175 (define_insn "move_lo_quad_internal_be_<mode>"
1176 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1179 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1180 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1185 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1186 (set_attr "simd" "yes,*,yes")
1187 (set_attr "fp" "*,yes,*")
1188 (set_attr "length" "4")]
1191 (define_expand "move_lo_quad_<mode>"
1192 [(match_operand:VQ 0 "register_operand")
1193 (match_operand:VQ 1 "register_operand")]
1196 if (BYTES_BIG_ENDIAN)
1197 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1199 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1204 ;; Move operand1 to the high architectural bits of the register, keeping
1205 ;; the low architectural bits of operand2.
1206 ;; For little-endian this is { operand2, operand1 }
1207 ;; For big-endian this is { operand1, operand2 }
1209 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1210 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1214 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1215 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1216 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1218 ins\\t%0.d[1], %1.d[0]
1220 [(set_attr "type" "neon_ins")]
1223 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1224 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1226 (match_operand:<VHALF> 1 "register_operand" "w,r")
1229 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1230 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1232 ins\\t%0.d[1], %1.d[0]
1234 [(set_attr "type" "neon_ins")]
1237 (define_expand "move_hi_quad_<mode>"
1238 [(match_operand:VQ 0 "register_operand" "")
1239 (match_operand:<VHALF> 1 "register_operand" "")]
1242 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1243 if (BYTES_BIG_ENDIAN)
1244 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1247 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1252 ;; Narrowing operations.
1255 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1256 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1257 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1259 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1260 [(set_attr "type" "neon_shift_imm_narrow_q")]
1263 (define_expand "vec_pack_trunc_<mode>"
1264 [(match_operand:<VNARROWD> 0 "register_operand" "")
1265 (match_operand:VDN 1 "register_operand" "")
1266 (match_operand:VDN 2 "register_operand" "")]
1269 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1270 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1271 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1273 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1274 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1275 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1281 (define_insn "vec_pack_trunc_<mode>"
1282 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1283 (vec_concat:<VNARROWQ2>
1284 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1285 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1288 if (BYTES_BIG_ENDIAN)
1289 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1291 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1293 [(set_attr "type" "multiple")
1294 (set_attr "length" "8")]
1297 ;; Widening operations.
1299 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1300 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1301 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1302 (match_operand:VQW 1 "register_operand" "w")
1303 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1306 "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1307 [(set_attr "type" "neon_shift_imm_long")]
1310 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1311 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1312 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1313 (match_operand:VQW 1 "register_operand" "w")
1314 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1317 "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1318 [(set_attr "type" "neon_shift_imm_long")]
1321 (define_expand "vec_unpack<su>_hi_<mode>"
1322 [(match_operand:<VWIDE> 0 "register_operand" "")
1323 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1326 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1327 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1333 (define_expand "vec_unpack<su>_lo_<mode>"
1334 [(match_operand:<VWIDE> 0 "register_operand" "")
1335 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1338 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1339 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1345 ;; Widening arithmetic.
1347 (define_insn "*aarch64_<su>mlal_lo<mode>"
1348 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1351 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1352 (match_operand:VQW 2 "register_operand" "w")
1353 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1354 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1355 (match_operand:VQW 4 "register_operand" "w")
1357 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1359 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1360 [(set_attr "type" "neon_mla_<Vetype>_long")]
1363 (define_insn "*aarch64_<su>mlal_hi<mode>"
1364 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1367 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1368 (match_operand:VQW 2 "register_operand" "w")
1369 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1370 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1371 (match_operand:VQW 4 "register_operand" "w")
1373 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1375 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1376 [(set_attr "type" "neon_mla_<Vetype>_long")]
1379 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1380 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1382 (match_operand:<VWIDE> 1 "register_operand" "0")
1384 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1385 (match_operand:VQW 2 "register_operand" "w")
1386 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1387 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1388 (match_operand:VQW 4 "register_operand" "w")
1391 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1392 [(set_attr "type" "neon_mla_<Vetype>_long")]
1395 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1396 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1398 (match_operand:<VWIDE> 1 "register_operand" "0")
1400 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1401 (match_operand:VQW 2 "register_operand" "w")
1402 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1403 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1404 (match_operand:VQW 4 "register_operand" "w")
1407 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1408 [(set_attr "type" "neon_mla_<Vetype>_long")]
1411 (define_insn "*aarch64_<su>mlal<mode>"
1412 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1416 (match_operand:VD_BHSI 1 "register_operand" "w"))
1418 (match_operand:VD_BHSI 2 "register_operand" "w")))
1419 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1421 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1422 [(set_attr "type" "neon_mla_<Vetype>_long")]
1425 (define_insn "*aarch64_<su>mlsl<mode>"
1426 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1428 (match_operand:<VWIDE> 1 "register_operand" "0")
1431 (match_operand:VD_BHSI 2 "register_operand" "w"))
1433 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1435 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1436 [(set_attr "type" "neon_mla_<Vetype>_long")]
1439 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1440 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1441 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1442 (match_operand:VQW 1 "register_operand" "w")
1443 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1444 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1445 (match_operand:VQW 2 "register_operand" "w")
1448 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1449 [(set_attr "type" "neon_mul_<Vetype>_long")]
1452 (define_expand "vec_widen_<su>mult_lo_<mode>"
1453 [(match_operand:<VWIDE> 0 "register_operand" "")
1454 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1455 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1458 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1459 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1466 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1467 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1468 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1469 (match_operand:VQW 1 "register_operand" "w")
1470 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1471 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1472 (match_operand:VQW 2 "register_operand" "w")
1475 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1476 [(set_attr "type" "neon_mul_<Vetype>_long")]
1479 (define_expand "vec_widen_<su>mult_hi_<mode>"
1480 [(match_operand:<VWIDE> 0 "register_operand" "")
1481 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1482 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1485 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1486 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1494 ;; FP vector operations.
1495 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1496 ;; double-precision (64-bit) floating-point data types and arithmetic as
1497 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1498 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1500 ;; Floating-point operations can raise an exception. Vectorizing such
1501 ;; operations are safe because of reasons explained below.
1503 ;; ARMv8 permits an extension to enable trapped floating-point
1504 ;; exception handling, however this is an optional feature. In the
1505 ;; event of a floating-point exception being raised by vectorised
1507 ;; 1. If trapped floating-point exceptions are available, then a trap
1508 ;; will be taken when any lane raises an enabled exception. A trap
1509 ;; handler may determine which lane raised the exception.
1510 ;; 2. Alternatively a sticky exception flag is set in the
1511 ;; floating-point status register (FPSR). Software may explicitly
1512 ;; test the exception flags, in which case the tests will either
1513 ;; prevent vectorisation, allowing precise identification of the
1514 ;; failing operation, or if tested outside of vectorisable regions
1515 ;; then the specific operation and lane are not of interest.
1517 ;; FP arithmetic operations.
1519 (define_insn "add<mode>3"
1520 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1521 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1522 (match_operand:VHSDF 2 "register_operand" "w")))]
1524 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1525 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1528 (define_insn "sub<mode>3"
1529 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1530 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1531 (match_operand:VHSDF 2 "register_operand" "w")))]
1533 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1534 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1537 (define_insn "mul<mode>3"
1538 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1539 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1540 (match_operand:VHSDF 2 "register_operand" "w")))]
1542 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1543 [(set_attr "type" "neon_fp_mul_<stype><q>")]
1546 (define_expand "div<mode>3"
1547 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1548 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1549 (match_operand:VHSDF 2 "register_operand" "w")))]
1552 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1555 operands[1] = force_reg (<MODE>mode, operands[1]);
1558 (define_insn "*div<mode>3"
1559 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1560 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1561 (match_operand:VHSDF 2 "register_operand" "w")))]
1563 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1564 [(set_attr "type" "neon_fp_div_<stype><q>")]
1567 (define_insn "neg<mode>2"
1568 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1569 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1571 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1572 [(set_attr "type" "neon_fp_neg_<stype><q>")]
1575 (define_insn "abs<mode>2"
1576 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1577 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1579 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1580 [(set_attr "type" "neon_fp_abs_<stype><q>")]
1583 (define_insn "fma<mode>4"
1584 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1585 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1586 (match_operand:VHSDF 2 "register_operand" "w")
1587 (match_operand:VHSDF 3 "register_operand" "0")))]
1589 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1590 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1593 (define_insn "*aarch64_fma4_elt<mode>"
1594 [(set (match_operand:VDQF 0 "register_operand" "=w")
1598 (match_operand:VDQF 1 "register_operand" "<h_con>")
1599 (parallel [(match_operand:SI 2 "immediate_operand")])))
1600 (match_operand:VDQF 3 "register_operand" "w")
1601 (match_operand:VDQF 4 "register_operand" "0")))]
1604 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1605 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1607 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1610 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1611 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1613 (vec_duplicate:VDQSF
1615 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1616 (parallel [(match_operand:SI 2 "immediate_operand")])))
1617 (match_operand:VDQSF 3 "register_operand" "w")
1618 (match_operand:VDQSF 4 "register_operand" "0")))]
1621 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1622 INTVAL (operands[2])));
1623 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1625 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1628 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1629 [(set (match_operand:VMUL 0 "register_operand" "=w")
1632 (match_operand:<VEL> 1 "register_operand" "w"))
1633 (match_operand:VMUL 2 "register_operand" "w")
1634 (match_operand:VMUL 3 "register_operand" "0")))]
1636 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1637 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1640 (define_insn "*aarch64_fma4_elt_to_64v2df"
1641 [(set (match_operand:DF 0 "register_operand" "=w")
1644 (match_operand:V2DF 1 "register_operand" "w")
1645 (parallel [(match_operand:SI 2 "immediate_operand")]))
1646 (match_operand:DF 3 "register_operand" "w")
1647 (match_operand:DF 4 "register_operand" "0")))]
1650 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1651 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1653 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1656 (define_insn "fnma<mode>4"
1657 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1659 (match_operand:VHSDF 1 "register_operand" "w")
1661 (match_operand:VHSDF 2 "register_operand" "w"))
1662 (match_operand:VHSDF 3 "register_operand" "0")))]
1664 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1665 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1668 (define_insn "*aarch64_fnma4_elt<mode>"
1669 [(set (match_operand:VDQF 0 "register_operand" "=w")
1672 (match_operand:VDQF 3 "register_operand" "w"))
1675 (match_operand:VDQF 1 "register_operand" "<h_con>")
1676 (parallel [(match_operand:SI 2 "immediate_operand")])))
1677 (match_operand:VDQF 4 "register_operand" "0")))]
1680 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1681 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1683 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1686 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1687 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1690 (match_operand:VDQSF 3 "register_operand" "w"))
1691 (vec_duplicate:VDQSF
1693 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1694 (parallel [(match_operand:SI 2 "immediate_operand")])))
1695 (match_operand:VDQSF 4 "register_operand" "0")))]
1698 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1699 INTVAL (operands[2])));
1700 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1702 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1705 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1706 [(set (match_operand:VMUL 0 "register_operand" "=w")
1709 (match_operand:VMUL 2 "register_operand" "w"))
1711 (match_operand:<VEL> 1 "register_operand" "w"))
1712 (match_operand:VMUL 3 "register_operand" "0")))]
1714 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1715 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1718 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1719 [(set (match_operand:DF 0 "register_operand" "=w")
1722 (match_operand:V2DF 1 "register_operand" "w")
1723 (parallel [(match_operand:SI 2 "immediate_operand")]))
1725 (match_operand:DF 3 "register_operand" "w"))
1726 (match_operand:DF 4 "register_operand" "0")))]
1729 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1730 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1732 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1735 ;; Vector versions of the floating-point frint patterns.
1736 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1737 (define_insn "<frint_pattern><mode>2"
1738 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1739 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1742 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1743 [(set_attr "type" "neon_fp_round_<stype><q>")]
1746 ;; Vector versions of the fcvt standard patterns.
1747 ;; Expands to lbtrunc, lround, lceil, lfloor
1748 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1749 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1750 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1751 [(match_operand:VHSDF 1 "register_operand" "w")]
1754 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1755 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1758 ;; HF Scalar variants of related SIMD instructions.
1759 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1760 [(set (match_operand:HI 0 "register_operand" "=w")
1761 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1763 "TARGET_SIMD_F16INST"
1764 "fcvt<frint_suffix><su>\t%h0, %h1"
1765 [(set_attr "type" "neon_fp_to_int_s")]
1768 (define_insn "<optab>_trunchfhi2"
1769 [(set (match_operand:HI 0 "register_operand" "=w")
1770 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1771 "TARGET_SIMD_F16INST"
1772 "fcvtz<su>\t%h0, %h1"
1773 [(set_attr "type" "neon_fp_to_int_s")]
1776 (define_insn "<optab>hihf2"
1777 [(set (match_operand:HF 0 "register_operand" "=w")
1778 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1779 "TARGET_SIMD_F16INST"
1780 "<su_optab>cvtf\t%h0, %h1"
1781 [(set_attr "type" "neon_int_to_fp_s")]
1784 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1785 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1786 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1788 (match_operand:VDQF 1 "register_operand" "w")
1789 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1792 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1793 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1795 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1797 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1798 output_asm_insn (buf, operands);
1801 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1804 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
1805 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1806 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1807 [(match_operand:VHSDF 1 "register_operand")]
1812 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
1813 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1814 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1815 [(match_operand:VHSDF 1 "register_operand")]
1820 (define_expand "ftrunc<VHSDF:mode>2"
1821 [(set (match_operand:VHSDF 0 "register_operand")
1822 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
1827 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
1828 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1830 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
1832 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
1833 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
1836 ;; Conversions between vectors of floats and doubles.
1837 ;; Contains a mix of patterns to match standard pattern names
1838 ;; and those for intrinsics.
1840 ;; Float widening operations.
1842 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
1843 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1844 (float_extend:<VWIDE> (vec_select:<VHALF>
1845 (match_operand:VQ_HSF 1 "register_operand" "w")
1846 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
1849 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
1850 [(set_attr "type" "neon_fp_cvt_widen_s")]
1853 ;; Convert between fixed-point and floating-point (vector modes)
1855 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
1856 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
1857 (unspec:<VHSDF:FCVT_TARGET>
1858 [(match_operand:VHSDF 1 "register_operand" "w")
1859 (match_operand:SI 2 "immediate_operand" "i")]
1862 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
1863 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
1866 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
1867 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
1868 (unspec:<VDQ_HSDI:FCVT_TARGET>
1869 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
1870 (match_operand:SI 2 "immediate_operand" "i")]
1873 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
1874 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
1877 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
1878 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
1879 ;; the meaning of HI and LO changes depending on the target endianness.
1880 ;; While elsewhere we map the higher numbered elements of a vector to
1881 ;; the lower architectural lanes of the vector, for these patterns we want
1882 ;; to always treat "hi" as referring to the higher architectural lanes.
1883 ;; Consequently, while the patterns below look inconsistent with our
1884 ;; other big-endian patterns their behavior is as required.
1886 (define_expand "vec_unpacks_lo_<mode>"
1887 [(match_operand:<VWIDE> 0 "register_operand" "")
1888 (match_operand:VQ_HSF 1 "register_operand" "")]
1891 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1892 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
1898 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
1899 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1900 (float_extend:<VWIDE> (vec_select:<VHALF>
1901 (match_operand:VQ_HSF 1 "register_operand" "w")
1902 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
1905 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
1906 [(set_attr "type" "neon_fp_cvt_widen_s")]
1909 (define_expand "vec_unpacks_hi_<mode>"
1910 [(match_operand:<VWIDE> 0 "register_operand" "")
1911 (match_operand:VQ_HSF 1 "register_operand" "")]
1914 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1915 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
1920 (define_insn "aarch64_float_extend_lo_<Vwide>"
1921 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1922 (float_extend:<VWIDE>
1923 (match_operand:VDF 1 "register_operand" "w")))]
1925 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
1926 [(set_attr "type" "neon_fp_cvt_widen_s")]
1929 ;; Float narrowing operations.
1931 (define_insn "aarch64_float_truncate_lo_<mode>"
1932 [(set (match_operand:VDF 0 "register_operand" "=w")
1934 (match_operand:<VWIDE> 1 "register_operand" "w")))]
1936 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
1937 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1940 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
1941 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
1943 (match_operand:VDF 1 "register_operand" "0")
1945 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
1946 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1947 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
1948 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1951 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
1952 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
1955 (match_operand:<VWIDE> 2 "register_operand" "w"))
1956 (match_operand:VDF 1 "register_operand" "0")))]
1957 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1958 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
1959 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1962 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
1963 [(match_operand:<VDBL> 0 "register_operand" "=w")
1964 (match_operand:VDF 1 "register_operand" "0")
1965 (match_operand:<VWIDE> 2 "register_operand" "w")]
1968 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
1969 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
1970 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
1971 emit_insn (gen (operands[0], operands[1], operands[2]));
1976 (define_expand "vec_pack_trunc_v2df"
1977 [(set (match_operand:V4SF 0 "register_operand")
1979 (float_truncate:V2SF
1980 (match_operand:V2DF 1 "register_operand"))
1981 (float_truncate:V2SF
1982 (match_operand:V2DF 2 "register_operand"))
1986 rtx tmp = gen_reg_rtx (V2SFmode);
1987 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1988 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1990 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
1991 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
1992 tmp, operands[hi]));
1997 (define_expand "vec_pack_trunc_df"
1998 [(set (match_operand:V2SF 0 "register_operand")
2001 (match_operand:DF 1 "register_operand"))
2003 (match_operand:DF 2 "register_operand"))
2007 rtx tmp = gen_reg_rtx (V2SFmode);
2008 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2009 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2011 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2012 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2013 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2019 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2021 ;; a = (b < c) ? b : c;
2022 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
2023 ;; either explicitly or indirectly via -ffast-math.
2025 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2026 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2027 ;; operand will be returned when both operands are zero (i.e. they may not
2028 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2029 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2032 (define_insn "<su><maxmin><mode>3"
2033 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2034 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2035 (match_operand:VHSDF 2 "register_operand" "w")))]
2037 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2038 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2041 (define_insn "<maxmin_uns><mode>3"
2042 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2043 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2044 (match_operand:VHSDF 2 "register_operand" "w")]
2047 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2048 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2051 ;; Auto-vectorized forms for the IEEE-754 fmax()/fmin() functions
2052 (define_insn "<fmaxmin><mode>3"
2053 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2054 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2055 (match_operand:VHSDF 2 "register_operand" "w")]
2058 "<fmaxmin_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2059 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2062 ;; 'across lanes' add.
2064 (define_expand "reduc_plus_scal_<mode>"
2065 [(match_operand:<VEL> 0 "register_operand" "=w")
2066 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2070 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2071 rtx scratch = gen_reg_rtx (<MODE>mode);
2072 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2073 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2078 (define_insn "aarch64_faddp<mode>"
2079 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2080 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2081 (match_operand:VHSDF 2 "register_operand" "w")]
2084 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2085 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2088 (define_insn "aarch64_reduc_plus_internal<mode>"
2089 [(set (match_operand:VDQV 0 "register_operand" "=w")
2090 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2093 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2094 [(set_attr "type" "neon_reduc_add<q>")]
2097 (define_insn "aarch64_reduc_plus_internalv2si"
2098 [(set (match_operand:V2SI 0 "register_operand" "=w")
2099 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2102 "addp\\t%0.2s, %1.2s, %1.2s"
2103 [(set_attr "type" "neon_reduc_add")]
2106 (define_insn "reduc_plus_scal_<mode>"
2107 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2108 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2111 "faddp\\t%<Vetype>0, %1.<Vtype>"
2112 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2115 (define_expand "reduc_plus_scal_v4sf"
2116 [(set (match_operand:SF 0 "register_operand")
2117 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2121 rtx elt = GEN_INT (ENDIAN_LANE_N (V4SFmode, 0));
2122 rtx scratch = gen_reg_rtx (V4SFmode);
2123 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2124 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2125 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2129 (define_insn "clrsb<mode>2"
2130 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2131 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2133 "cls\\t%0.<Vtype>, %1.<Vtype>"
2134 [(set_attr "type" "neon_cls<q>")]
2137 (define_insn "clz<mode>2"
2138 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2139 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2141 "clz\\t%0.<Vtype>, %1.<Vtype>"
2142 [(set_attr "type" "neon_cls<q>")]
2145 (define_insn "popcount<mode>2"
2146 [(set (match_operand:VB 0 "register_operand" "=w")
2147 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2149 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2150 [(set_attr "type" "neon_cnt<q>")]
2153 ;; 'across lanes' max and min ops.
2155 ;; Template for outputting a scalar, so we can create __builtins which can be
2156 ;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin).
2157 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2158 [(match_operand:<VEL> 0 "register_operand")
2159 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2163 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2164 rtx scratch = gen_reg_rtx (<MODE>mode);
2165 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2167 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2172 ;; Likewise for integer cases, signed and unsigned.
2173 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2174 [(match_operand:<VEL> 0 "register_operand")
2175 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2179 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2180 rtx scratch = gen_reg_rtx (<MODE>mode);
2181 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2183 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2188 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2189 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2190 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2193 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2194 [(set_attr "type" "neon_reduc_minmax<q>")]
2197 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2198 [(set (match_operand:V2SI 0 "register_operand" "=w")
2199 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2202 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2203 [(set_attr "type" "neon_reduc_minmax")]
2206 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2207 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2208 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2211 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2212 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2215 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2217 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2220 ;; Thus our BSL is of the form:
2221 ;; op0 = bsl (mask, op2, op3)
2222 ;; We can use any of:
2225 ;; bsl mask, op1, op2
2226 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2227 ;; bit op0, op2, mask
2228 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2229 ;; bif op0, op1, mask
2231 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2232 ;; Some forms of straight-line code may generate the equivalent form
2233 ;; in *aarch64_simd_bsl<mode>_alt.
2235 (define_insn "aarch64_simd_bsl<mode>_internal"
2236 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2240 (match_operand:<V_cmp_result> 3 "register_operand" "w,0,w")
2241 (match_operand:VSDQ_I_DI 2 "register_operand" "w,w,0"))
2242 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2243 (match_dup:<V_cmp_result> 3)
2247 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2248 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2249 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2250 [(set_attr "type" "neon_bsl<q>")]
2253 ;; We need this form in addition to the above pattern to match the case
2254 ;; when combine tries merging three insns such that the second operand of
2255 ;; the outer XOR matches the second operand of the inner XOR rather than
2256 ;; the first. The two are equivalent but since recog doesn't try all
2257 ;; permutations of commutative operations, we have to have a separate pattern.
2259 (define_insn "*aarch64_simd_bsl<mode>_alt"
2260 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2264 (match_operand:VSDQ_I_DI 3 "register_operand" "w,w,0")
2265 (match_operand:VSDQ_I_DI 2 "register_operand" "w,0,w"))
2266 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2267 (match_dup:VSDQ_I_DI 2)))]
2270 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2271 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2272 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2273 [(set_attr "type" "neon_bsl<q>")]
2276 (define_expand "aarch64_simd_bsl<mode>"
2277 [(match_operand:VALLDIF 0 "register_operand")
2278 (match_operand:<V_cmp_result> 1 "register_operand")
2279 (match_operand:VALLDIF 2 "register_operand")
2280 (match_operand:VALLDIF 3 "register_operand")]
2283 /* We can't alias operands together if they have different modes. */
2284 rtx tmp = operands[0];
2285 if (FLOAT_MODE_P (<MODE>mode))
2287 operands[2] = gen_lowpart (<V_cmp_result>mode, operands[2]);
2288 operands[3] = gen_lowpart (<V_cmp_result>mode, operands[3]);
2289 tmp = gen_reg_rtx (<V_cmp_result>mode);
2291 operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]);
2292 emit_insn (gen_aarch64_simd_bsl<v_cmp_result>_internal (tmp,
2296 if (tmp != operands[0])
2297 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2302 (define_expand "aarch64_vcond_internal<mode><mode>"
2303 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2304 (if_then_else:VSDQ_I_DI
2305 (match_operator 3 "comparison_operator"
2306 [(match_operand:VSDQ_I_DI 4 "register_operand")
2307 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2308 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2309 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2312 rtx op1 = operands[1];
2313 rtx op2 = operands[2];
2314 rtx mask = gen_reg_rtx (<MODE>mode);
2315 enum rtx_code code = GET_CODE (operands[3]);
2317 /* Switching OP1 and OP2 is necessary for NE (to output a cmeq insn),
2318 and desirable for other comparisons if it results in FOO ? -1 : 0
2319 (this allows direct use of the comparison result without a bsl). */
2322 && op1 == CONST0_RTX (<V_cmp_result>mode)
2323 && op2 == CONSTM1_RTX (<V_cmp_result>mode)))
2329 case LE: code = GT; break;
2330 case LT: code = GE; break;
2331 case GE: code = LT; break;
2332 case GT: code = LE; break;
2334 case NE: code = EQ; break;
2335 case LTU: code = GEU; break;
2336 case LEU: code = GTU; break;
2337 case GTU: code = LEU; break;
2338 case GEU: code = LTU; break;
2339 default: gcc_unreachable ();
2343 /* Make sure we can handle the last operand. */
2347 /* Normalized to EQ above. */
2355 /* These instructions have a form taking an immediate zero. */
2356 if (operands[5] == CONST0_RTX (<MODE>mode))
2358 /* Fall through, as may need to load into register. */
2360 if (!REG_P (operands[5]))
2361 operands[5] = force_reg (<MODE>mode, operands[5]);
2368 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[4], operands[5]));
2372 emit_insn (gen_aarch64_cmge<mode> (mask, operands[4], operands[5]));
2376 emit_insn (gen_aarch64_cmle<mode> (mask, operands[4], operands[5]));
2380 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5]));
2384 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[5], operands[4]));
2388 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5]));
2392 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[5], operands[4]));
2396 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5]));
2399 /* NE has been normalized to EQ above. */
2401 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[4], operands[5]));
2408 /* If we have (a = (b CMP c) ? -1 : 0);
2409 Then we can simply move the generated mask. */
2411 if (op1 == CONSTM1_RTX (<V_cmp_result>mode)
2412 && op2 == CONST0_RTX (<V_cmp_result>mode))
2413 emit_move_insn (operands[0], mask);
2417 op1 = force_reg (<MODE>mode, op1);
2419 op2 = force_reg (<MODE>mode, op2);
2420 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask,
2427 (define_expand "aarch64_vcond_internal<VDQF_COND:mode><VDQF:mode>"
2428 [(set (match_operand:VDQF_COND 0 "register_operand")
2430 (match_operator 3 "comparison_operator"
2431 [(match_operand:VDQF 4 "register_operand")
2432 (match_operand:VDQF 5 "nonmemory_operand")])
2433 (match_operand:VDQF_COND 1 "nonmemory_operand")
2434 (match_operand:VDQF_COND 2 "nonmemory_operand")))]
2438 int use_zero_form = 0;
2439 int swap_bsl_operands = 0;
2440 rtx op1 = operands[1];
2441 rtx op2 = operands[2];
2442 rtx mask = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
2443 rtx tmp = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
2445 rtx (*base_comparison) (rtx, rtx, rtx);
2446 rtx (*complimentary_comparison) (rtx, rtx, rtx);
2448 switch (GET_CODE (operands[3]))
2455 if (operands[5] == CONST0_RTX (<MODE>mode))
2462 if (!REG_P (operands[5]))
2463 operands[5] = force_reg (<VDQF:MODE>mode, operands[5]);
2466 switch (GET_CODE (operands[3]))
2476 base_comparison = gen_aarch64_cmge<VDQF:mode>;
2477 complimentary_comparison = gen_aarch64_cmgt<VDQF:mode>;
2485 base_comparison = gen_aarch64_cmgt<VDQF:mode>;
2486 complimentary_comparison = gen_aarch64_cmge<VDQF:mode>;
2491 base_comparison = gen_aarch64_cmeq<VDQF:mode>;
2492 complimentary_comparison = gen_aarch64_cmeq<VDQF:mode>;
2498 switch (GET_CODE (operands[3]))
2505 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2506 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2512 Note that there also exist direct comparison against 0 forms,
2513 so catch those as a special case. */
2517 switch (GET_CODE (operands[3]))
2520 base_comparison = gen_aarch64_cmlt<VDQF:mode>;
2523 base_comparison = gen_aarch64_cmle<VDQF:mode>;
2526 /* Do nothing, other zero form cases already have the correct
2533 emit_insn (base_comparison (mask, operands[4], operands[5]));
2535 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
2542 /* FCM returns false for lanes which are unordered, so if we use
2543 the inverse of the comparison we actually want to emit, then
2544 swap the operands to BSL, we will end up with the correct result.
2545 Note that a NE NaN and NaN NE b are true for all a, b.
2547 Our transformations are:
2552 a NE b -> !(a EQ b) */
2555 emit_insn (base_comparison (mask, operands[4], operands[5]));
2557 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
2559 swap_bsl_operands = 1;
2562 /* We check (a > b || b > a). combining these comparisons give us
2563 true iff !(a != b && a ORDERED b), swapping the operands to BSL
2564 will then give us (a == b || a UNORDERED b) as intended. */
2566 emit_insn (gen_aarch64_cmgt<VDQF:mode> (mask, operands[4], operands[5]));
2567 emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[5], operands[4]));
2568 emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
2569 swap_bsl_operands = 1;
2572 /* Operands are ORDERED iff (a > b || b >= a).
2573 Swapping the operands to BSL will give the UNORDERED case. */
2574 swap_bsl_operands = 1;
2577 emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[4], operands[5]));
2578 emit_insn (gen_aarch64_cmge<VDQF:mode> (mask, operands[5], operands[4]));
2579 emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
2585 if (swap_bsl_operands)
2591 /* If we have (a = (b CMP c) ? -1 : 0);
2592 Then we can simply move the generated mask. */
2594 if (op1 == CONSTM1_RTX (<VDQF_COND:V_cmp_result>mode)
2595 && op2 == CONST0_RTX (<VDQF_COND:V_cmp_result>mode))
2596 emit_move_insn (operands[0], mask);
2600 op1 = force_reg (<VDQF_COND:MODE>mode, op1);
2602 op2 = force_reg (<VDQF_COND:MODE>mode, op2);
2603 emit_insn (gen_aarch64_simd_bsl<VDQF_COND:mode> (operands[0], mask,
2610 (define_expand "vcond<mode><mode>"
2611 [(set (match_operand:VALLDI 0 "register_operand")
2612 (if_then_else:VALLDI
2613 (match_operator 3 "comparison_operator"
2614 [(match_operand:VALLDI 4 "register_operand")
2615 (match_operand:VALLDI 5 "nonmemory_operand")])
2616 (match_operand:VALLDI 1 "nonmemory_operand")
2617 (match_operand:VALLDI 2 "nonmemory_operand")))]
2620 emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
2621 operands[2], operands[3],
2622 operands[4], operands[5]));
2626 (define_expand "vcond<v_cmp_result><mode>"
2627 [(set (match_operand:<V_cmp_result> 0 "register_operand")
2628 (if_then_else:<V_cmp_result>
2629 (match_operator 3 "comparison_operator"
2630 [(match_operand:VDQF 4 "register_operand")
2631 (match_operand:VDQF 5 "nonmemory_operand")])
2632 (match_operand:<V_cmp_result> 1 "nonmemory_operand")
2633 (match_operand:<V_cmp_result> 2 "nonmemory_operand")))]
2636 emit_insn (gen_aarch64_vcond_internal<v_cmp_result><mode> (
2637 operands[0], operands[1],
2638 operands[2], operands[3],
2639 operands[4], operands[5]));
2643 (define_expand "vcondu<mode><mode>"
2644 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2645 (if_then_else:VSDQ_I_DI
2646 (match_operator 3 "comparison_operator"
2647 [(match_operand:VSDQ_I_DI 4 "register_operand")
2648 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2649 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2650 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2653 emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
2654 operands[2], operands[3],
2655 operands[4], operands[5]));
2659 ;; Patterns for AArch64 SIMD Intrinsics.
2661 ;; Lane extraction with sign extension to general purpose register.
2662 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2663 [(set (match_operand:GPI 0 "register_operand" "=r")
2666 (match_operand:VDQQH 1 "register_operand" "w")
2667 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2670 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2671 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
2673 [(set_attr "type" "neon_to_gp<q>")]
2676 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
2677 [(set (match_operand:SI 0 "register_operand" "=r")
2680 (match_operand:VDQQH 1 "register_operand" "w")
2681 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2684 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2685 return "umov\\t%w0, %1.<Vetype>[%2]";
2687 [(set_attr "type" "neon_to_gp<q>")]
2690 ;; Lane extraction of a value, neither sign nor zero extension
2691 ;; is guaranteed so upper bits should be considered undefined.
2692 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
2693 (define_insn "aarch64_get_lane<mode>"
2694 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2696 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
2697 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
2700 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2701 switch (which_alternative)
2704 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2706 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2708 return "st1\\t{%1.<Vetype>}[%2], %0";
2713 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
2716 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2719 (define_insn "*aarch64_combinez<mode>"
2720 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2722 (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")
2723 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")))]
2724 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2729 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2730 (set_attr "simd" "yes,*,yes")
2731 (set_attr "fp" "*,yes,*")]
2734 (define_insn "*aarch64_combinez_be<mode>"
2735 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2737 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")
2738 (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")))]
2739 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2744 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2745 (set_attr "simd" "yes,*,yes")
2746 (set_attr "fp" "*,yes,*")]
2749 (define_expand "aarch64_combine<mode>"
2750 [(match_operand:<VDBL> 0 "register_operand")
2751 (match_operand:VDC 1 "register_operand")
2752 (match_operand:VDC 2 "register_operand")]
2756 if (BYTES_BIG_ENDIAN)
2766 emit_insn (gen_aarch64_combine_internal<mode> (operands[0], op1, op2));
2771 (define_insn_and_split "aarch64_combine_internal<mode>"
2772 [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
2773 (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
2774 (match_operand:VDC 2 "register_operand" "w")))]
2777 "&& reload_completed"
2780 if (BYTES_BIG_ENDIAN)
2781 aarch64_split_simd_combine (operands[0], operands[2], operands[1]);
2783 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
2786 [(set_attr "type" "multiple")]
2789 (define_expand "aarch64_simd_combine<mode>"
2790 [(match_operand:<VDBL> 0 "register_operand")
2791 (match_operand:VDC 1 "register_operand")
2792 (match_operand:VDC 2 "register_operand")]
2795 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
2796 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
2799 [(set_attr "type" "multiple")]
2802 ;; <su><addsub>l<q>.
2804 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
2805 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2806 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2807 (match_operand:VQW 1 "register_operand" "w")
2808 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2809 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2810 (match_operand:VQW 2 "register_operand" "w")
2813 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2814 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2817 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
2818 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2819 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2820 (match_operand:VQW 1 "register_operand" "w")
2821 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2822 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2823 (match_operand:VQW 2 "register_operand" "w")
2826 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2827 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2831 (define_expand "aarch64_saddl2<mode>"
2832 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2833 (match_operand:VQW 1 "register_operand" "w")
2834 (match_operand:VQW 2 "register_operand" "w")]
2837 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2838 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
2843 (define_expand "aarch64_uaddl2<mode>"
2844 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2845 (match_operand:VQW 1 "register_operand" "w")
2846 (match_operand:VQW 2 "register_operand" "w")]
2849 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2850 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
2855 (define_expand "aarch64_ssubl2<mode>"
2856 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2857 (match_operand:VQW 1 "register_operand" "w")
2858 (match_operand:VQW 2 "register_operand" "w")]
2861 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2862 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
2867 (define_expand "aarch64_usubl2<mode>"
2868 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2869 (match_operand:VQW 1 "register_operand" "w")
2870 (match_operand:VQW 2 "register_operand" "w")]
2873 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2874 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
2879 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
2880 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2881 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
2882 (match_operand:VD_BHSI 1 "register_operand" "w"))
2884 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2886 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2887 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2890 ;; <su><addsub>w<q>.
2892 (define_expand "widen_ssum<mode>3"
2893 [(set (match_operand:<VDBLW> 0 "register_operand" "")
2894 (plus:<VDBLW> (sign_extend:<VDBLW>
2895 (match_operand:VQW 1 "register_operand" ""))
2896 (match_operand:<VDBLW> 2 "register_operand" "")))]
2899 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
2900 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
2902 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
2904 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
2909 (define_expand "widen_ssum<mode>3"
2910 [(set (match_operand:<VWIDE> 0 "register_operand" "")
2911 (plus:<VWIDE> (sign_extend:<VWIDE>
2912 (match_operand:VD_BHSI 1 "register_operand" ""))
2913 (match_operand:<VWIDE> 2 "register_operand" "")))]
2916 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
2920 (define_expand "widen_usum<mode>3"
2921 [(set (match_operand:<VDBLW> 0 "register_operand" "")
2922 (plus:<VDBLW> (zero_extend:<VDBLW>
2923 (match_operand:VQW 1 "register_operand" ""))
2924 (match_operand:<VDBLW> 2 "register_operand" "")))]
2927 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
2928 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
2930 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
2932 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
2937 (define_expand "widen_usum<mode>3"
2938 [(set (match_operand:<VWIDE> 0 "register_operand" "")
2939 (plus:<VWIDE> (zero_extend:<VWIDE>
2940 (match_operand:VD_BHSI 1 "register_operand" ""))
2941 (match_operand:<VWIDE> 2 "register_operand" "")))]
2944 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
2948 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
2949 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2950 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2952 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2954 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
2955 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2958 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
2959 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2960 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2963 (match_operand:VQW 2 "register_operand" "w")
2964 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
2966 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
2967 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2970 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
2971 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2972 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2975 (match_operand:VQW 2 "register_operand" "w")
2976 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
2978 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
2979 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2982 (define_expand "aarch64_saddw2<mode>"
2983 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2984 (match_operand:<VWIDE> 1 "register_operand" "w")
2985 (match_operand:VQW 2 "register_operand" "w")]
2988 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2989 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
2994 (define_expand "aarch64_uaddw2<mode>"
2995 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2996 (match_operand:<VWIDE> 1 "register_operand" "w")
2997 (match_operand:VQW 2 "register_operand" "w")]
3000 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3001 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3007 (define_expand "aarch64_ssubw2<mode>"
3008 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3009 (match_operand:<VWIDE> 1 "register_operand" "w")
3010 (match_operand:VQW 2 "register_operand" "w")]
3013 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3014 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3019 (define_expand "aarch64_usubw2<mode>"
3020 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3021 (match_operand:<VWIDE> 1 "register_operand" "w")
3022 (match_operand:VQW 2 "register_operand" "w")]
3025 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3026 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3031 ;; <su><r>h<addsub>.
3033 (define_insn "aarch64_<sur>h<addsub><mode>"
3034 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3035 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3036 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3039 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3040 [(set_attr "type" "neon_<addsub>_halve<q>")]
3043 ;; <r><addsub>hn<q>.
3045 (define_insn "aarch64_<sur><addsub>hn<mode>"
3046 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3047 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3048 (match_operand:VQN 2 "register_operand" "w")]
3051 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3052 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3055 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3056 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3057 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3058 (match_operand:VQN 2 "register_operand" "w")
3059 (match_operand:VQN 3 "register_operand" "w")]
3062 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3063 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3068 (define_insn "aarch64_pmul<mode>"
3069 [(set (match_operand:VB 0 "register_operand" "=w")
3070 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3071 (match_operand:VB 2 "register_operand" "w")]
3074 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3075 [(set_attr "type" "neon_mul_<Vetype><q>")]
3080 (define_insn "aarch64_fmulx<mode>"
3081 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3083 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3084 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3087 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3088 [(set_attr "type" "neon_fp_mul_<stype>")]
3091 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3093 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3094 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3096 [(match_operand:VDQSF 1 "register_operand" "w")
3097 (vec_duplicate:VDQSF
3099 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3100 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3104 operands[3] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
3105 INTVAL (operands[3])));
3106 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3108 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3111 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3113 (define_insn "*aarch64_mulx_elt<mode>"
3114 [(set (match_operand:VDQF 0 "register_operand" "=w")
3116 [(match_operand:VDQF 1 "register_operand" "w")
3119 (match_operand:VDQF 2 "register_operand" "w")
3120 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3124 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
3125 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3127 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3132 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3133 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3135 [(match_operand:VHSDF 1 "register_operand" "w")
3136 (vec_duplicate:VHSDF
3137 (match_operand:<VEL> 2 "register_operand" "w"))]
3140 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3141 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3144 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3145 ;; vmulxd_lane_f64 == vmulx_lane_f64
3146 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3148 (define_insn "*aarch64_vgetfmulx<mode>"
3149 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3151 [(match_operand:<VEL> 1 "register_operand" "w")
3153 (match_operand:VDQF_DF 2 "register_operand" "w")
3154 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3158 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
3159 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3161 [(set_attr "type" "fmul<Vetype>")]
3165 (define_insn "aarch64_<su_optab><optab><mode>"
3166 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3167 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3168 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3170 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3171 [(set_attr "type" "neon_<optab><q>")]
3174 ;; suqadd and usqadd
3176 (define_insn "aarch64_<sur>qadd<mode>"
3177 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3178 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3179 (match_operand:VSDQ_I 2 "register_operand" "w")]
3182 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3183 [(set_attr "type" "neon_qadd<q>")]
3188 (define_insn "aarch64_sqmovun<mode>"
3189 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3190 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3193 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3194 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3197 ;; sqmovn and uqmovn
3199 (define_insn "aarch64_<sur>qmovn<mode>"
3200 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3201 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3204 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3205 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3210 (define_insn "aarch64_s<optab><mode>"
3211 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3213 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3215 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3216 [(set_attr "type" "neon_<optab><q>")]
3221 (define_insn "aarch64_sq<r>dmulh<mode>"
3222 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3224 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3225 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3228 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3229 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3234 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3235 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3237 [(match_operand:VDQHS 1 "register_operand" "w")
3239 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3240 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3244 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3245 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3246 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3249 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3250 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3252 [(match_operand:VDQHS 1 "register_operand" "w")
3254 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3255 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3259 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3260 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3261 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3264 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3265 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3267 [(match_operand:SD_HSI 1 "register_operand" "w")
3269 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3270 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3274 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3275 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3276 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3279 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3280 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3282 [(match_operand:SD_HSI 1 "register_operand" "w")
3284 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3285 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3289 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3290 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3291 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3296 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3297 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3299 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3300 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3301 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3304 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3305 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3308 ;; sqrdml[as]h_lane.
3310 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3311 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3313 [(match_operand:VDQHS 1 "register_operand" "0")
3314 (match_operand:VDQHS 2 "register_operand" "w")
3316 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3317 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3321 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3323 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3325 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3328 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3329 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3331 [(match_operand:SD_HSI 1 "register_operand" "0")
3332 (match_operand:SD_HSI 2 "register_operand" "w")
3334 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3335 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3339 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3341 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3343 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3346 ;; sqrdml[as]h_laneq.
3348 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3349 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3351 [(match_operand:VDQHS 1 "register_operand" "0")
3352 (match_operand:VDQHS 2 "register_operand" "w")
3354 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3355 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3359 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3361 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3363 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3366 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3367 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3369 [(match_operand:SD_HSI 1 "register_operand" "0")
3370 (match_operand:SD_HSI 2 "register_operand" "w")
3372 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3373 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3377 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3379 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3381 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3386 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3387 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3389 (match_operand:<VWIDE> 1 "register_operand" "0")
3392 (sign_extend:<VWIDE>
3393 (match_operand:VSD_HSI 2 "register_operand" "w"))
3394 (sign_extend:<VWIDE>
3395 (match_operand:VSD_HSI 3 "register_operand" "w")))
3398 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3399 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3404 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3405 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3407 (match_operand:<VWIDE> 1 "register_operand" "0")
3410 (sign_extend:<VWIDE>
3411 (match_operand:VD_HSI 2 "register_operand" "w"))
3412 (sign_extend:<VWIDE>
3413 (vec_duplicate:VD_HSI
3415 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3416 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3421 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3423 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3425 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3428 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3429 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3431 (match_operand:<VWIDE> 1 "register_operand" "0")
3434 (sign_extend:<VWIDE>
3435 (match_operand:VD_HSI 2 "register_operand" "w"))
3436 (sign_extend:<VWIDE>
3437 (vec_duplicate:VD_HSI
3439 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3440 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3445 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3447 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3449 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3452 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3453 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3455 (match_operand:<VWIDE> 1 "register_operand" "0")
3458 (sign_extend:<VWIDE>
3459 (match_operand:SD_HSI 2 "register_operand" "w"))
3460 (sign_extend:<VWIDE>
3462 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3463 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3468 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3470 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3472 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3475 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3476 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3478 (match_operand:<VWIDE> 1 "register_operand" "0")
3481 (sign_extend:<VWIDE>
3482 (match_operand:SD_HSI 2 "register_operand" "w"))
3483 (sign_extend:<VWIDE>
3485 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3486 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3491 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3493 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3495 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3500 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3501 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3503 (match_operand:<VWIDE> 1 "register_operand" "0")
3506 (sign_extend:<VWIDE>
3507 (match_operand:VD_HSI 2 "register_operand" "w"))
3508 (sign_extend:<VWIDE>
3509 (vec_duplicate:VD_HSI
3510 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3513 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3514 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3519 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3520 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3522 (match_operand:<VWIDE> 1 "register_operand" "0")
3525 (sign_extend:<VWIDE>
3527 (match_operand:VQ_HSI 2 "register_operand" "w")
3528 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3529 (sign_extend:<VWIDE>
3531 (match_operand:VQ_HSI 3 "register_operand" "w")
3535 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3536 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3539 (define_expand "aarch64_sqdmlal2<mode>"
3540 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3541 (match_operand:<VWIDE> 1 "register_operand" "w")
3542 (match_operand:VQ_HSI 2 "register_operand" "w")
3543 (match_operand:VQ_HSI 3 "register_operand" "w")]
3546 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3547 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3548 operands[2], operands[3], p));
3552 (define_expand "aarch64_sqdmlsl2<mode>"
3553 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3554 (match_operand:<VWIDE> 1 "register_operand" "w")
3555 (match_operand:VQ_HSI 2 "register_operand" "w")
3556 (match_operand:VQ_HSI 3 "register_operand" "w")]
3559 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3560 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3561 operands[2], operands[3], p));
3567 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3568 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3570 (match_operand:<VWIDE> 1 "register_operand" "0")
3573 (sign_extend:<VWIDE>
3575 (match_operand:VQ_HSI 2 "register_operand" "w")
3576 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3577 (sign_extend:<VWIDE>
3578 (vec_duplicate:<VHALF>
3580 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3581 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3586 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3588 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3590 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3593 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3594 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3596 (match_operand:<VWIDE> 1 "register_operand" "0")
3599 (sign_extend:<VWIDE>
3601 (match_operand:VQ_HSI 2 "register_operand" "w")
3602 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3603 (sign_extend:<VWIDE>
3604 (vec_duplicate:<VHALF>
3606 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3607 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3612 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3614 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3616 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3619 (define_expand "aarch64_sqdmlal2_lane<mode>"
3620 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3621 (match_operand:<VWIDE> 1 "register_operand" "w")
3622 (match_operand:VQ_HSI 2 "register_operand" "w")
3623 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3624 (match_operand:SI 4 "immediate_operand" "i")]
3627 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3628 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3629 operands[2], operands[3],
3634 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3635 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3636 (match_operand:<VWIDE> 1 "register_operand" "w")
3637 (match_operand:VQ_HSI 2 "register_operand" "w")
3638 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3639 (match_operand:SI 4 "immediate_operand" "i")]
3642 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3643 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3644 operands[2], operands[3],
3649 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3650 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3651 (match_operand:<VWIDE> 1 "register_operand" "w")
3652 (match_operand:VQ_HSI 2 "register_operand" "w")
3653 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3654 (match_operand:SI 4 "immediate_operand" "i")]
3657 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3658 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3659 operands[2], operands[3],
3664 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3665 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3666 (match_operand:<VWIDE> 1 "register_operand" "w")
3667 (match_operand:VQ_HSI 2 "register_operand" "w")
3668 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3669 (match_operand:SI 4 "immediate_operand" "i")]
3672 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3673 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
3674 operands[2], operands[3],
3679 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
3680 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3682 (match_operand:<VWIDE> 1 "register_operand" "0")
3685 (sign_extend:<VWIDE>
3687 (match_operand:VQ_HSI 2 "register_operand" "w")
3688 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3689 (sign_extend:<VWIDE>
3690 (vec_duplicate:<VHALF>
3691 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3694 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3695 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3698 (define_expand "aarch64_sqdmlal2_n<mode>"
3699 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3700 (match_operand:<VWIDE> 1 "register_operand" "w")
3701 (match_operand:VQ_HSI 2 "register_operand" "w")
3702 (match_operand:<VEL> 3 "register_operand" "w")]
3705 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3706 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
3707 operands[2], operands[3],
3712 (define_expand "aarch64_sqdmlsl2_n<mode>"
3713 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3714 (match_operand:<VWIDE> 1 "register_operand" "w")
3715 (match_operand:VQ_HSI 2 "register_operand" "w")
3716 (match_operand:<VEL> 3 "register_operand" "w")]
3719 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3720 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
3721 operands[2], operands[3],
3728 (define_insn "aarch64_sqdmull<mode>"
3729 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3732 (sign_extend:<VWIDE>
3733 (match_operand:VSD_HSI 1 "register_operand" "w"))
3734 (sign_extend:<VWIDE>
3735 (match_operand:VSD_HSI 2 "register_operand" "w")))
3738 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3739 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
3744 (define_insn "aarch64_sqdmull_lane<mode>"
3745 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3748 (sign_extend:<VWIDE>
3749 (match_operand:VD_HSI 1 "register_operand" "w"))
3750 (sign_extend:<VWIDE>
3751 (vec_duplicate:VD_HSI
3753 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3754 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3759 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3760 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3762 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3765 (define_insn "aarch64_sqdmull_laneq<mode>"
3766 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3769 (sign_extend:<VWIDE>
3770 (match_operand:VD_HSI 1 "register_operand" "w"))
3771 (sign_extend:<VWIDE>
3772 (vec_duplicate:VD_HSI
3774 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3775 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3780 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3781 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3783 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3786 (define_insn "aarch64_sqdmull_lane<mode>"
3787 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3790 (sign_extend:<VWIDE>
3791 (match_operand:SD_HSI 1 "register_operand" "w"))
3792 (sign_extend:<VWIDE>
3794 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3795 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3800 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3801 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3803 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3806 (define_insn "aarch64_sqdmull_laneq<mode>"
3807 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3810 (sign_extend:<VWIDE>
3811 (match_operand:SD_HSI 1 "register_operand" "w"))
3812 (sign_extend:<VWIDE>
3814 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3815 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3820 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3821 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3823 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3828 (define_insn "aarch64_sqdmull_n<mode>"
3829 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3832 (sign_extend:<VWIDE>
3833 (match_operand:VD_HSI 1 "register_operand" "w"))
3834 (sign_extend:<VWIDE>
3835 (vec_duplicate:VD_HSI
3836 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3840 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3841 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3848 (define_insn "aarch64_sqdmull2<mode>_internal"
3849 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3852 (sign_extend:<VWIDE>
3854 (match_operand:VQ_HSI 1 "register_operand" "w")
3855 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3856 (sign_extend:<VWIDE>
3858 (match_operand:VQ_HSI 2 "register_operand" "w")
3863 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3864 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3867 (define_expand "aarch64_sqdmull2<mode>"
3868 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3869 (match_operand:VQ_HSI 1 "register_operand" "w")
3870 (match_operand:VQ_HSI 2 "register_operand" "w")]
3873 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3874 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
3881 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
3882 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3885 (sign_extend:<VWIDE>
3887 (match_operand:VQ_HSI 1 "register_operand" "w")
3888 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3889 (sign_extend:<VWIDE>
3890 (vec_duplicate:<VHALF>
3892 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3893 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3898 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3899 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3901 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3904 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
3905 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3908 (sign_extend:<VWIDE>
3910 (match_operand:VQ_HSI 1 "register_operand" "w")
3911 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3912 (sign_extend:<VWIDE>
3913 (vec_duplicate:<VHALF>
3915 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3916 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3921 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3922 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3924 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3927 (define_expand "aarch64_sqdmull2_lane<mode>"
3928 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3929 (match_operand:VQ_HSI 1 "register_operand" "w")
3930 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3931 (match_operand:SI 3 "immediate_operand" "i")]
3934 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3935 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
3936 operands[2], operands[3],
3941 (define_expand "aarch64_sqdmull2_laneq<mode>"
3942 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3943 (match_operand:VQ_HSI 1 "register_operand" "w")
3944 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3945 (match_operand:SI 3 "immediate_operand" "i")]
3948 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3949 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
3950 operands[2], operands[3],
3957 (define_insn "aarch64_sqdmull2_n<mode>_internal"
3958 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3961 (sign_extend:<VWIDE>
3963 (match_operand:VQ_HSI 1 "register_operand" "w")
3964 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3965 (sign_extend:<VWIDE>
3966 (vec_duplicate:<VHALF>
3967 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3971 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3972 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3975 (define_expand "aarch64_sqdmull2_n<mode>"
3976 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3977 (match_operand:VQ_HSI 1 "register_operand" "w")
3978 (match_operand:<VEL> 2 "register_operand" "w")]
3981 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3982 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
3989 (define_insn "aarch64_<sur>shl<mode>"
3990 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3992 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
3993 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
3996 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
3997 [(set_attr "type" "neon_shift_reg<q>")]
4003 (define_insn "aarch64_<sur>q<r>shl<mode>"
4004 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4006 [(match_operand:VSDQ_I 1 "register_operand" "w")
4007 (match_operand:VSDQ_I 2 "register_operand" "w")]
4010 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4011 [(set_attr "type" "neon_sat_shift_reg<q>")]
4016 (define_insn "aarch64_<sur>shll_n<mode>"
4017 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4018 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4020 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4024 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4025 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4027 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4029 [(set_attr "type" "neon_shift_imm_long")]
4034 (define_insn "aarch64_<sur>shll2_n<mode>"
4035 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4036 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4037 (match_operand:SI 2 "immediate_operand" "i")]
4041 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4042 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4044 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4046 [(set_attr "type" "neon_shift_imm_long")]
4051 (define_insn "aarch64_<sur>shr_n<mode>"
4052 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4053 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4055 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4058 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4059 [(set_attr "type" "neon_sat_shift_imm<q>")]
4064 (define_insn "aarch64_<sur>sra_n<mode>"
4065 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4066 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4067 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4069 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4072 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4073 [(set_attr "type" "neon_shift_acc<q>")]
4078 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4079 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4080 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4081 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4083 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4086 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4087 [(set_attr "type" "neon_shift_imm<q>")]
4092 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4093 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4094 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4096 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4099 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4100 [(set_attr "type" "neon_sat_shift_imm<q>")]
4106 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4107 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4108 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4110 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4113 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4114 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4118 ;; cm(eq|ge|gt|lt|le)
4119 ;; Note, we have constraints for Dz and Z as different expanders
4120 ;; have different ideas of what should be passed to this pattern.
4122 (define_insn "aarch64_cm<optab><mode>"
4123 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
4125 (COMPARISONS:<V_cmp_result>
4126 (match_operand:VDQ_I 1 "register_operand" "w,w")
4127 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4131 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4132 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4133 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4136 (define_insn_and_split "aarch64_cm<optab>di"
4137 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4140 (match_operand:DI 1 "register_operand" "w,w,r")
4141 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4143 (clobber (reg:CC CC_REGNUM))]
4147 [(set (match_operand:DI 0 "register_operand")
4150 (match_operand:DI 1 "register_operand")
4151 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4154 /* If we are in the general purpose register file,
4155 we split to a sequence of comparison and store. */
4156 if (GP_REGNUM_P (REGNO (operands[0]))
4157 && GP_REGNUM_P (REGNO (operands[1])))
4159 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4160 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4161 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4162 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4165 /* Otherwise, we expand to a similar pattern which does not
4166 clobber CC_REGNUM. */
4168 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4171 (define_insn "*aarch64_cm<optab>di"
4172 [(set (match_operand:DI 0 "register_operand" "=w,w")
4175 (match_operand:DI 1 "register_operand" "w,w")
4176 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4178 "TARGET_SIMD && reload_completed"
4180 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4181 cm<optab>\t%d0, %d1, #0"
4182 [(set_attr "type" "neon_compare, neon_compare_zero")]
4187 (define_insn "aarch64_cm<optab><mode>"
4188 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4190 (UCOMPARISONS:<V_cmp_result>
4191 (match_operand:VDQ_I 1 "register_operand" "w")
4192 (match_operand:VDQ_I 2 "register_operand" "w")
4195 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4196 [(set_attr "type" "neon_compare<q>")]
4199 (define_insn_and_split "aarch64_cm<optab>di"
4200 [(set (match_operand:DI 0 "register_operand" "=w,r")
4203 (match_operand:DI 1 "register_operand" "w,r")
4204 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4206 (clobber (reg:CC CC_REGNUM))]
4210 [(set (match_operand:DI 0 "register_operand")
4213 (match_operand:DI 1 "register_operand")
4214 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4217 /* If we are in the general purpose register file,
4218 we split to a sequence of comparison and store. */
4219 if (GP_REGNUM_P (REGNO (operands[0]))
4220 && GP_REGNUM_P (REGNO (operands[1])))
4222 machine_mode mode = CCmode;
4223 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4224 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4225 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4228 /* Otherwise, we expand to a similar pattern which does not
4229 clobber CC_REGNUM. */
4231 [(set_attr "type" "neon_compare,multiple")]
4234 (define_insn "*aarch64_cm<optab>di"
4235 [(set (match_operand:DI 0 "register_operand" "=w")
4238 (match_operand:DI 1 "register_operand" "w")
4239 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4241 "TARGET_SIMD && reload_completed"
4242 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4243 [(set_attr "type" "neon_compare")]
4248 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4249 ;; we don't have any insns using ne, and aarch64_vcond_internal outputs
4250 ;; not (neg (eq (and x y) 0))
4251 ;; which is rewritten by simplify_rtx as
4252 ;; plus (eq (and x y) 0) -1.
4254 (define_insn "aarch64_cmtst<mode>"
4255 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4256 (plus:<V_cmp_result>
4259 (match_operand:VDQ_I 1 "register_operand" "w")
4260 (match_operand:VDQ_I 2 "register_operand" "w"))
4261 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4262 (match_operand:<V_cmp_result> 4 "aarch64_simd_imm_minus_one")))
4265 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4266 [(set_attr "type" "neon_tst<q>")]
4269 (define_insn_and_split "aarch64_cmtstdi"
4270 [(set (match_operand:DI 0 "register_operand" "=w,r")
4274 (match_operand:DI 1 "register_operand" "w,r")
4275 (match_operand:DI 2 "register_operand" "w,r"))
4277 (clobber (reg:CC CC_REGNUM))]
4281 [(set (match_operand:DI 0 "register_operand")
4285 (match_operand:DI 1 "register_operand")
4286 (match_operand:DI 2 "register_operand"))
4289 /* If we are in the general purpose register file,
4290 we split to a sequence of comparison and store. */
4291 if (GP_REGNUM_P (REGNO (operands[0]))
4292 && GP_REGNUM_P (REGNO (operands[1])))
4294 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4295 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4296 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4297 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4298 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4301 /* Otherwise, we expand to a similar pattern which does not
4302 clobber CC_REGNUM. */
4304 [(set_attr "type" "neon_tst,multiple")]
4307 (define_insn "*aarch64_cmtstdi"
4308 [(set (match_operand:DI 0 "register_operand" "=w")
4312 (match_operand:DI 1 "register_operand" "w")
4313 (match_operand:DI 2 "register_operand" "w"))
4316 "cmtst\t%d0, %d1, %d2"
4317 [(set_attr "type" "neon_tst")]
4320 ;; fcm(eq|ge|gt|le|lt)
4322 (define_insn "aarch64_cm<optab><mode>"
4323 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
4325 (COMPARISONS:<V_cmp_result>
4326 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4327 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4331 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4332 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4333 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4337 ;; Note we can also handle what would be fac(le|lt) by
4338 ;; generating fac(ge|gt).
4340 (define_insn "aarch64_fac<optab><mode>"
4341 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4343 (FAC_COMPARISONS:<V_cmp_result>
4345 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4347 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4350 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4351 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4356 (define_insn "aarch64_addp<mode>"
4357 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4359 [(match_operand:VD_BHSI 1 "register_operand" "w")
4360 (match_operand:VD_BHSI 2 "register_operand" "w")]
4363 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4364 [(set_attr "type" "neon_reduc_add<q>")]
4367 (define_insn "aarch64_addpdi"
4368 [(set (match_operand:DI 0 "register_operand" "=w")
4370 [(match_operand:V2DI 1 "register_operand" "w")]
4374 [(set_attr "type" "neon_reduc_add")]
4379 (define_expand "sqrt<mode>2"
4380 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4381 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4384 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4388 (define_insn "*sqrt<mode>2"
4389 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4390 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4392 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4393 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4396 ;; Patterns for vector struct loads and stores.
4398 (define_insn "aarch64_simd_ld2<mode>"
4399 [(set (match_operand:OI 0 "register_operand" "=w")
4400 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4401 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4404 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4405 [(set_attr "type" "neon_load2_2reg<q>")]
4408 (define_insn "aarch64_simd_ld2r<mode>"
4409 [(set (match_operand:OI 0 "register_operand" "=w")
4410 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4411 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4414 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4415 [(set_attr "type" "neon_load2_all_lanes<q>")]
4418 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4419 [(set (match_operand:OI 0 "register_operand" "=w")
4420 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4421 (match_operand:OI 2 "register_operand" "0")
4422 (match_operand:SI 3 "immediate_operand" "i")
4423 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4427 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4428 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4430 [(set_attr "type" "neon_load2_one_lane")]
4433 (define_expand "vec_load_lanesoi<mode>"
4434 [(set (match_operand:OI 0 "register_operand" "=w")
4435 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4436 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4440 if (BYTES_BIG_ENDIAN)
4442 rtx tmp = gen_reg_rtx (OImode);
4443 rtx mask = aarch64_reverse_mask (<MODE>mode);
4444 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4445 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4448 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4452 (define_insn "aarch64_simd_st2<mode>"
4453 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4454 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4455 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4458 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4459 [(set_attr "type" "neon_store2_2reg<q>")]
4462 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4463 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4464 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4465 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4466 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4467 (match_operand:SI 2 "immediate_operand" "i")]
4471 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4472 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4474 [(set_attr "type" "neon_store2_one_lane<q>")]
4477 (define_expand "vec_store_lanesoi<mode>"
4478 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4479 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4480 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4484 if (BYTES_BIG_ENDIAN)
4486 rtx tmp = gen_reg_rtx (OImode);
4487 rtx mask = aarch64_reverse_mask (<MODE>mode);
4488 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4489 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4492 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4496 (define_insn "aarch64_simd_ld3<mode>"
4497 [(set (match_operand:CI 0 "register_operand" "=w")
4498 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4499 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4502 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4503 [(set_attr "type" "neon_load3_3reg<q>")]
4506 (define_insn "aarch64_simd_ld3r<mode>"
4507 [(set (match_operand:CI 0 "register_operand" "=w")
4508 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4509 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4512 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4513 [(set_attr "type" "neon_load3_all_lanes<q>")]
4516 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4517 [(set (match_operand:CI 0 "register_operand" "=w")
4518 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4519 (match_operand:CI 2 "register_operand" "0")
4520 (match_operand:SI 3 "immediate_operand" "i")
4521 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4525 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4526 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4528 [(set_attr "type" "neon_load3_one_lane")]
4531 (define_expand "vec_load_lanesci<mode>"
4532 [(set (match_operand:CI 0 "register_operand" "=w")
4533 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4534 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4538 if (BYTES_BIG_ENDIAN)
4540 rtx tmp = gen_reg_rtx (CImode);
4541 rtx mask = aarch64_reverse_mask (<MODE>mode);
4542 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4543 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4546 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4550 (define_insn "aarch64_simd_st3<mode>"
4551 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4552 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4553 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4556 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4557 [(set_attr "type" "neon_store3_3reg<q>")]
4560 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4561 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4562 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4563 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4564 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4565 (match_operand:SI 2 "immediate_operand" "i")]
4569 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4570 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4572 [(set_attr "type" "neon_store3_one_lane<q>")]
4575 (define_expand "vec_store_lanesci<mode>"
4576 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4577 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4578 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4582 if (BYTES_BIG_ENDIAN)
4584 rtx tmp = gen_reg_rtx (CImode);
4585 rtx mask = aarch64_reverse_mask (<MODE>mode);
4586 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4587 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4590 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4594 (define_insn "aarch64_simd_ld4<mode>"
4595 [(set (match_operand:XI 0 "register_operand" "=w")
4596 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4597 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4600 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4601 [(set_attr "type" "neon_load4_4reg<q>")]
4604 (define_insn "aarch64_simd_ld4r<mode>"
4605 [(set (match_operand:XI 0 "register_operand" "=w")
4606 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4607 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4610 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4611 [(set_attr "type" "neon_load4_all_lanes<q>")]
4614 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4615 [(set (match_operand:XI 0 "register_operand" "=w")
4616 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4617 (match_operand:XI 2 "register_operand" "0")
4618 (match_operand:SI 3 "immediate_operand" "i")
4619 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4623 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4624 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4626 [(set_attr "type" "neon_load4_one_lane")]
4629 (define_expand "vec_load_lanesxi<mode>"
4630 [(set (match_operand:XI 0 "register_operand" "=w")
4631 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4632 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4636 if (BYTES_BIG_ENDIAN)
4638 rtx tmp = gen_reg_rtx (XImode);
4639 rtx mask = aarch64_reverse_mask (<MODE>mode);
4640 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4641 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4644 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4648 (define_insn "aarch64_simd_st4<mode>"
4649 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4650 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4651 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4654 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4655 [(set_attr "type" "neon_store4_4reg<q>")]
4658 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4659 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4660 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4661 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4662 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4663 (match_operand:SI 2 "immediate_operand" "i")]
4667 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4668 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
4670 [(set_attr "type" "neon_store4_one_lane<q>")]
4673 (define_expand "vec_store_lanesxi<mode>"
4674 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4675 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4676 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4680 if (BYTES_BIG_ENDIAN)
4682 rtx tmp = gen_reg_rtx (XImode);
4683 rtx mask = aarch64_reverse_mask (<MODE>mode);
4684 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
4685 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
4688 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
4692 (define_insn_and_split "aarch64_rev_reglist<mode>"
4693 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
4695 [(match_operand:VSTRUCT 1 "register_operand" "w")
4696 (match_operand:V16QI 2 "register_operand" "w")]
4697 UNSPEC_REV_REGLIST))]
4700 "&& reload_completed"
4704 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
4705 for (i = 0; i < nregs; i++)
4707 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
4708 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
4709 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
4713 [(set_attr "type" "neon_tbl1_q")
4714 (set_attr "length" "<insn_count>")]
4717 ;; Reload patterns for AdvSIMD register list operands.
4719 (define_expand "mov<mode>"
4720 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
4721 (match_operand:VSTRUCT 1 "general_operand" ""))]
4724 if (can_create_pseudo_p ())
4726 if (GET_CODE (operands[0]) != REG)
4727 operands[1] = force_reg (<MODE>mode, operands[1]);
4731 (define_insn "*aarch64_mov<mode>"
4732 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
4733 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
4734 "TARGET_SIMD && !BYTES_BIG_ENDIAN
4735 && (register_operand (operands[0], <MODE>mode)
4736 || register_operand (operands[1], <MODE>mode))"
4739 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
4740 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
4741 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
4742 neon_load<nregs>_<nregs>reg_q")
4743 (set_attr "length" "<insn_count>,4,4")]
4746 (define_insn "aarch64_be_ld1<mode>"
4747 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
4748 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
4749 "aarch64_simd_struct_operand" "Utv")]
4752 "ld1\\t{%0<Vmtype>}, %1"
4753 [(set_attr "type" "neon_load1_1reg<q>")]
4756 (define_insn "aarch64_be_st1<mode>"
4757 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
4758 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
4761 "st1\\t{%1<Vmtype>}, %0"
4762 [(set_attr "type" "neon_store1_1reg<q>")]
4765 (define_insn "*aarch64_be_movoi"
4766 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
4767 (match_operand:OI 1 "general_operand" " w,w,m"))]
4768 "TARGET_SIMD && BYTES_BIG_ENDIAN
4769 && (register_operand (operands[0], OImode)
4770 || register_operand (operands[1], OImode))"
4775 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
4776 (set_attr "length" "8,4,4")]
4779 (define_insn "*aarch64_be_movci"
4780 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
4781 (match_operand:CI 1 "general_operand" " w,w,o"))]
4782 "TARGET_SIMD && BYTES_BIG_ENDIAN
4783 && (register_operand (operands[0], CImode)
4784 || register_operand (operands[1], CImode))"
4786 [(set_attr "type" "multiple")
4787 (set_attr "length" "12,4,4")]
4790 (define_insn "*aarch64_be_movxi"
4791 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
4792 (match_operand:XI 1 "general_operand" " w,w,o"))]
4793 "TARGET_SIMD && BYTES_BIG_ENDIAN
4794 && (register_operand (operands[0], XImode)
4795 || register_operand (operands[1], XImode))"
4797 [(set_attr "type" "multiple")
4798 (set_attr "length" "16,4,4")]
4802 [(set (match_operand:OI 0 "register_operand")
4803 (match_operand:OI 1 "register_operand"))]
4804 "TARGET_SIMD && reload_completed"
4807 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
4812 [(set (match_operand:CI 0 "nonimmediate_operand")
4813 (match_operand:CI 1 "general_operand"))]
4814 "TARGET_SIMD && reload_completed"
4817 if (register_operand (operands[0], CImode)
4818 && register_operand (operands[1], CImode))
4820 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
4823 else if (BYTES_BIG_ENDIAN)
4825 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
4826 simplify_gen_subreg (OImode, operands[1], CImode, 0));
4827 emit_move_insn (gen_lowpart (V16QImode,
4828 simplify_gen_subreg (TImode, operands[0],
4830 gen_lowpart (V16QImode,
4831 simplify_gen_subreg (TImode, operands[1],
4840 [(set (match_operand:XI 0 "nonimmediate_operand")
4841 (match_operand:XI 1 "general_operand"))]
4842 "TARGET_SIMD && reload_completed"
4845 if (register_operand (operands[0], XImode)
4846 && register_operand (operands[1], XImode))
4848 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
4851 else if (BYTES_BIG_ENDIAN)
4853 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
4854 simplify_gen_subreg (OImode, operands[1], XImode, 0));
4855 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
4856 simplify_gen_subreg (OImode, operands[1], XImode, 32));
4863 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
4864 [(match_operand:VSTRUCT 0 "register_operand" "=w")
4865 (match_operand:DI 1 "register_operand" "w")
4866 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4869 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
4870 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
4873 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
4878 (define_insn "aarch64_ld2<mode>_dreg"
4879 [(set (match_operand:OI 0 "register_operand" "=w")
4884 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4886 (vec_duplicate:VD (const_int 0)))
4888 (unspec:VD [(match_dup 1)]
4890 (vec_duplicate:VD (const_int 0)))) 0))]
4892 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4893 [(set_attr "type" "neon_load2_2reg<q>")]
4896 (define_insn "aarch64_ld2<mode>_dreg"
4897 [(set (match_operand:OI 0 "register_operand" "=w")
4902 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4906 (unspec:DX [(match_dup 1)]
4908 (const_int 0))) 0))]
4910 "ld1\\t{%S0.1d - %T0.1d}, %1"
4911 [(set_attr "type" "neon_load1_2reg<q>")]
4914 (define_insn "aarch64_ld3<mode>_dreg"
4915 [(set (match_operand:CI 0 "register_operand" "=w")
4921 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4923 (vec_duplicate:VD (const_int 0)))
4925 (unspec:VD [(match_dup 1)]
4927 (vec_duplicate:VD (const_int 0))))
4929 (unspec:VD [(match_dup 1)]
4931 (vec_duplicate:VD (const_int 0)))) 0))]
4933 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4934 [(set_attr "type" "neon_load3_3reg<q>")]
4937 (define_insn "aarch64_ld3<mode>_dreg"
4938 [(set (match_operand:CI 0 "register_operand" "=w")
4944 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4948 (unspec:DX [(match_dup 1)]
4952 (unspec:DX [(match_dup 1)]
4954 (const_int 0))) 0))]
4956 "ld1\\t{%S0.1d - %U0.1d}, %1"
4957 [(set_attr "type" "neon_load1_3reg<q>")]
4960 (define_insn "aarch64_ld4<mode>_dreg"
4961 [(set (match_operand:XI 0 "register_operand" "=w")
4967 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4969 (vec_duplicate:VD (const_int 0)))
4971 (unspec:VD [(match_dup 1)]
4973 (vec_duplicate:VD (const_int 0))))
4976 (unspec:VD [(match_dup 1)]
4978 (vec_duplicate:VD (const_int 0)))
4980 (unspec:VD [(match_dup 1)]
4982 (vec_duplicate:VD (const_int 0))))) 0))]
4984 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4985 [(set_attr "type" "neon_load4_4reg<q>")]
4988 (define_insn "aarch64_ld4<mode>_dreg"
4989 [(set (match_operand:XI 0 "register_operand" "=w")
4995 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4999 (unspec:DX [(match_dup 1)]
5004 (unspec:DX [(match_dup 1)]
5008 (unspec:DX [(match_dup 1)]
5010 (const_int 0)))) 0))]
5012 "ld1\\t{%S0.1d - %V0.1d}, %1"
5013 [(set_attr "type" "neon_load1_4reg<q>")]
5016 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5017 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5018 (match_operand:DI 1 "register_operand" "r")
5019 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5022 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5023 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5025 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5029 (define_expand "aarch64_ld1<VALL_F16:mode>"
5030 [(match_operand:VALL_F16 0 "register_operand")
5031 (match_operand:DI 1 "register_operand")]
5034 machine_mode mode = <VALL_F16:MODE>mode;
5035 rtx mem = gen_rtx_MEM (mode, operands[1]);
5037 if (BYTES_BIG_ENDIAN)
5038 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5040 emit_move_insn (operands[0], mem);
5044 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5045 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5046 (match_operand:DI 1 "register_operand" "r")
5047 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5050 machine_mode mode = <VSTRUCT:MODE>mode;
5051 rtx mem = gen_rtx_MEM (mode, operands[1]);
5053 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5057 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5058 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5059 (match_operand:DI 1 "register_operand" "w")
5060 (match_operand:VSTRUCT 2 "register_operand" "0")
5061 (match_operand:SI 3 "immediate_operand" "i")
5062 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5065 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5066 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5069 aarch64_simd_lane_bounds (operands[3], 0,
5070 GET_MODE_NUNITS (<VALLDIF:MODE>mode),
5072 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5073 operands[0], mem, operands[2], operands[3]));
5077 ;; Expanders for builtins to extract vector registers from large
5078 ;; opaque integer modes.
5082 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5083 [(match_operand:VDC 0 "register_operand" "=w")
5084 (match_operand:VSTRUCT 1 "register_operand" "w")
5085 (match_operand:SI 2 "immediate_operand" "i")]
5088 int part = INTVAL (operands[2]);
5089 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5090 int offset = part * 16;
5092 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5093 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5099 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5100 [(match_operand:VQ 0 "register_operand" "=w")
5101 (match_operand:VSTRUCT 1 "register_operand" "w")
5102 (match_operand:SI 2 "immediate_operand" "i")]
5105 int part = INTVAL (operands[2]);
5106 int offset = part * 16;
5108 emit_move_insn (operands[0],
5109 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5113 ;; Permuted-store expanders for neon intrinsics.
5115 ;; Permute instructions
5119 (define_expand "vec_perm_const<mode>"
5120 [(match_operand:VALL_F16 0 "register_operand")
5121 (match_operand:VALL_F16 1 "register_operand")
5122 (match_operand:VALL_F16 2 "register_operand")
5123 (match_operand:<V_cmp_result> 3)]
5126 if (aarch64_expand_vec_perm_const (operands[0], operands[1],
5127 operands[2], operands[3]))
5133 (define_expand "vec_perm<mode>"
5134 [(match_operand:VB 0 "register_operand")
5135 (match_operand:VB 1 "register_operand")
5136 (match_operand:VB 2 "register_operand")
5137 (match_operand:VB 3 "register_operand")]
5140 aarch64_expand_vec_perm (operands[0], operands[1],
5141 operands[2], operands[3]);
5145 (define_insn "aarch64_tbl1<mode>"
5146 [(set (match_operand:VB 0 "register_operand" "=w")
5147 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5148 (match_operand:VB 2 "register_operand" "w")]
5151 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5152 [(set_attr "type" "neon_tbl1<q>")]
5155 ;; Two source registers.
5157 (define_insn "aarch64_tbl2v16qi"
5158 [(set (match_operand:V16QI 0 "register_operand" "=w")
5159 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5160 (match_operand:V16QI 2 "register_operand" "w")]
5163 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5164 [(set_attr "type" "neon_tbl2_q")]
5167 (define_insn "aarch64_tbl3<mode>"
5168 [(set (match_operand:VB 0 "register_operand" "=w")
5169 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5170 (match_operand:VB 2 "register_operand" "w")]
5173 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5174 [(set_attr "type" "neon_tbl3")]
5177 (define_insn "aarch64_tbx4<mode>"
5178 [(set (match_operand:VB 0 "register_operand" "=w")
5179 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5180 (match_operand:OI 2 "register_operand" "w")
5181 (match_operand:VB 3 "register_operand" "w")]
5184 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5185 [(set_attr "type" "neon_tbl4")]
5188 ;; Three source registers.
5190 (define_insn "aarch64_qtbl3<mode>"
5191 [(set (match_operand:VB 0 "register_operand" "=w")
5192 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5193 (match_operand:VB 2 "register_operand" "w")]
5196 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5197 [(set_attr "type" "neon_tbl3")]
5200 (define_insn "aarch64_qtbx3<mode>"
5201 [(set (match_operand:VB 0 "register_operand" "=w")
5202 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5203 (match_operand:CI 2 "register_operand" "w")
5204 (match_operand:VB 3 "register_operand" "w")]
5207 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5208 [(set_attr "type" "neon_tbl3")]
5211 ;; Four source registers.
5213 (define_insn "aarch64_qtbl4<mode>"
5214 [(set (match_operand:VB 0 "register_operand" "=w")
5215 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5216 (match_operand:VB 2 "register_operand" "w")]
5219 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5220 [(set_attr "type" "neon_tbl4")]
5223 (define_insn "aarch64_qtbx4<mode>"
5224 [(set (match_operand:VB 0 "register_operand" "=w")
5225 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5226 (match_operand:XI 2 "register_operand" "w")
5227 (match_operand:VB 3 "register_operand" "w")]
5230 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5231 [(set_attr "type" "neon_tbl4")]
5234 (define_insn_and_split "aarch64_combinev16qi"
5235 [(set (match_operand:OI 0 "register_operand" "=w")
5236 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5237 (match_operand:V16QI 2 "register_operand" "w")]
5241 "&& reload_completed"
5244 aarch64_split_combinev16qi (operands);
5247 [(set_attr "type" "multiple")]
5250 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5251 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5252 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5253 (match_operand:VALL_F16 2 "register_operand" "w")]
5256 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5257 [(set_attr "type" "neon_permute<q>")]
5260 ;; Note immediate (third) operand is lane index not byte index.
5261 (define_insn "aarch64_ext<mode>"
5262 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5263 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5264 (match_operand:VALL_F16 2 "register_operand" "w")
5265 (match_operand:SI 3 "immediate_operand" "i")]
5269 operands[3] = GEN_INT (INTVAL (operands[3])
5270 * GET_MODE_UNIT_SIZE (<MODE>mode));
5271 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5273 [(set_attr "type" "neon_ext<q>")]
5276 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5277 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5278 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5281 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5282 [(set_attr "type" "neon_rev<q>")]
5285 (define_insn "aarch64_st2<mode>_dreg"
5286 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5287 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5288 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5291 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5292 [(set_attr "type" "neon_store2_2reg")]
5295 (define_insn "aarch64_st2<mode>_dreg"
5296 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5297 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5298 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5301 "st1\\t{%S1.1d - %T1.1d}, %0"
5302 [(set_attr "type" "neon_store1_2reg")]
5305 (define_insn "aarch64_st3<mode>_dreg"
5306 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5307 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5308 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5311 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5312 [(set_attr "type" "neon_store3_3reg")]
5315 (define_insn "aarch64_st3<mode>_dreg"
5316 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5317 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5318 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5321 "st1\\t{%S1.1d - %U1.1d}, %0"
5322 [(set_attr "type" "neon_store1_3reg")]
5325 (define_insn "aarch64_st4<mode>_dreg"
5326 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5327 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5328 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5331 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5332 [(set_attr "type" "neon_store4_4reg")]
5335 (define_insn "aarch64_st4<mode>_dreg"
5336 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5337 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5338 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5341 "st1\\t{%S1.1d - %V1.1d}, %0"
5342 [(set_attr "type" "neon_store1_4reg")]
5345 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5346 [(match_operand:DI 0 "register_operand" "r")
5347 (match_operand:VSTRUCT 1 "register_operand" "w")
5348 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5351 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5352 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5354 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5358 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5359 [(match_operand:DI 0 "register_operand" "r")
5360 (match_operand:VSTRUCT 1 "register_operand" "w")
5361 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5364 machine_mode mode = <VSTRUCT:MODE>mode;
5365 rtx mem = gen_rtx_MEM (mode, operands[0]);
5367 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5371 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5372 [(match_operand:DI 0 "register_operand" "r")
5373 (match_operand:VSTRUCT 1 "register_operand" "w")
5374 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5375 (match_operand:SI 2 "immediate_operand")]
5378 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5379 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5382 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5383 mem, operands[1], operands[2]));
5387 (define_expand "aarch64_st1<VALL_F16:mode>"
5388 [(match_operand:DI 0 "register_operand")
5389 (match_operand:VALL_F16 1 "register_operand")]
5392 machine_mode mode = <VALL_F16:MODE>mode;
5393 rtx mem = gen_rtx_MEM (mode, operands[0]);
5395 if (BYTES_BIG_ENDIAN)
5396 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5398 emit_move_insn (mem, operands[1]);
5402 ;; Expander for builtins to insert vector registers into large
5403 ;; opaque integer modes.
5405 ;; Q-register list. We don't need a D-reg inserter as we zero
5406 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5408 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5409 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5410 (match_operand:VSTRUCT 1 "register_operand" "0")
5411 (match_operand:VQ 2 "register_operand" "w")
5412 (match_operand:SI 3 "immediate_operand" "i")]
5415 int part = INTVAL (operands[3]);
5416 int offset = part * 16;
5418 emit_move_insn (operands[0], operands[1]);
5419 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5424 ;; Standard pattern name vec_init<mode>.
5426 (define_expand "vec_init<mode>"
5427 [(match_operand:VALL_F16 0 "register_operand" "")
5428 (match_operand 1 "" "")]
5431 aarch64_expand_vector_init (operands[0], operands[1]);
5435 (define_insn "*aarch64_simd_ld1r<mode>"
5436 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5437 (vec_duplicate:VALL_F16
5438 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5440 "ld1r\\t{%0.<Vtype>}, %1"
5441 [(set_attr "type" "neon_load1_all_lanes")]
5444 (define_insn "aarch64_frecpe<mode>"
5445 [(set (match_operand:VHSDF 0 "register_operand" "=w")
5446 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5449 "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5450 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5453 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5454 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5455 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5458 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5459 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5462 (define_insn "aarch64_frecps<mode>"
5463 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5465 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5466 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5469 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5470 [(set_attr "type" "neon_fp_recps_<stype><q>")]
5473 (define_insn "aarch64_urecpe<mode>"
5474 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5475 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5478 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5479 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5481 ;; Standard pattern name vec_extract<mode>.
5483 (define_expand "vec_extract<mode>"
5484 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5485 (match_operand:VALL_F16 1 "register_operand" "")
5486 (match_operand:SI 2 "immediate_operand" "")]
5490 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5496 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5497 [(set (match_operand:V16QI 0 "register_operand" "=w")
5498 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5499 (match_operand:V16QI 2 "register_operand" "w")]
5501 "TARGET_SIMD && TARGET_CRYPTO"
5502 "aes<aes_op>\\t%0.16b, %2.16b"
5503 [(set_attr "type" "crypto_aese")]
5506 ;; When AES/AESMC fusion is enabled we want the register allocation to
5510 ;; So prefer to tie operand 1 to operand 0 when fusing.
5512 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5513 [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5514 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5516 "TARGET_SIMD && TARGET_CRYPTO"
5517 "aes<aesmc_op>\\t%0.16b, %1.16b"
5518 [(set_attr "type" "crypto_aesmc")
5519 (set_attr_alternative "enabled"
5520 [(if_then_else (match_test
5521 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5522 (const_string "yes" )
5523 (const_string "no"))
5524 (const_string "yes")])]
5529 (define_insn "aarch64_crypto_sha1hsi"
5530 [(set (match_operand:SI 0 "register_operand" "=w")
5531 (unspec:SI [(match_operand:SI 1
5532 "register_operand" "w")]
5534 "TARGET_SIMD && TARGET_CRYPTO"
5536 [(set_attr "type" "crypto_sha1_fast")]
5539 (define_insn "aarch64_crypto_sha1su1v4si"
5540 [(set (match_operand:V4SI 0 "register_operand" "=w")
5541 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5542 (match_operand:V4SI 2 "register_operand" "w")]
5544 "TARGET_SIMD && TARGET_CRYPTO"
5545 "sha1su1\\t%0.4s, %2.4s"
5546 [(set_attr "type" "crypto_sha1_fast")]
5549 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5550 [(set (match_operand:V4SI 0 "register_operand" "=w")
5551 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5552 (match_operand:SI 2 "register_operand" "w")
5553 (match_operand:V4SI 3 "register_operand" "w")]
5555 "TARGET_SIMD && TARGET_CRYPTO"
5556 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5557 [(set_attr "type" "crypto_sha1_slow")]
5560 (define_insn "aarch64_crypto_sha1su0v4si"
5561 [(set (match_operand:V4SI 0 "register_operand" "=w")
5562 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5563 (match_operand:V4SI 2 "register_operand" "w")
5564 (match_operand:V4SI 3 "register_operand" "w")]
5566 "TARGET_SIMD && TARGET_CRYPTO"
5567 "sha1su0\\t%0.4s, %2.4s, %3.4s"
5568 [(set_attr "type" "crypto_sha1_xor")]
5573 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
5574 [(set (match_operand:V4SI 0 "register_operand" "=w")
5575 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5576 (match_operand:V4SI 2 "register_operand" "w")
5577 (match_operand:V4SI 3 "register_operand" "w")]
5579 "TARGET_SIMD && TARGET_CRYPTO"
5580 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5581 [(set_attr "type" "crypto_sha256_slow")]
5584 (define_insn "aarch64_crypto_sha256su0v4si"
5585 [(set (match_operand:V4SI 0 "register_operand" "=w")
5586 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5587 (match_operand:V4SI 2 "register_operand" "w")]
5589 "TARGET_SIMD &&TARGET_CRYPTO"
5590 "sha256su0\\t%0.4s, %2.4s"
5591 [(set_attr "type" "crypto_sha256_fast")]
5594 (define_insn "aarch64_crypto_sha256su1v4si"
5595 [(set (match_operand:V4SI 0 "register_operand" "=w")
5596 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5597 (match_operand:V4SI 2 "register_operand" "w")
5598 (match_operand:V4SI 3 "register_operand" "w")]
5600 "TARGET_SIMD &&TARGET_CRYPTO"
5601 "sha256su1\\t%0.4s, %2.4s, %3.4s"
5602 [(set_attr "type" "crypto_sha256_slow")]
5607 (define_insn "aarch64_crypto_pmulldi"
5608 [(set (match_operand:TI 0 "register_operand" "=w")
5609 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
5610 (match_operand:DI 2 "register_operand" "w")]
5612 "TARGET_SIMD && TARGET_CRYPTO"
5613 "pmull\\t%0.1q, %1.1d, %2.1d"
5614 [(set_attr "type" "neon_mul_d_long")]
5617 (define_insn "aarch64_crypto_pmullv2di"
5618 [(set (match_operand:TI 0 "register_operand" "=w")
5619 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
5620 (match_operand:V2DI 2 "register_operand" "w")]
5622 "TARGET_SIMD && TARGET_CRYPTO"
5623 "pmull2\\t%0.1q, %1.2d, %2.2d"
5624 [(set_attr "type" "neon_mul_d_long")]