1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2017 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16 1 "general_operand" ""))]
26 if (GET_CODE (operands[0]) == MEM)
27 operands[1] = force_reg (<MODE>mode, operands[1]);
31 (define_expand "movmisalign<mode>"
32 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
33 (match_operand:VALL 1 "general_operand" ""))]
36 /* This pattern is not permitted to fail during expansion: if both arguments
37 are non-registers (e.g. memory := constant, which can be created by the
38 auto-vectorizer), force operand 1 into a register. */
39 if (!register_operand (operands[0], <MODE>mode)
40 && !register_operand (operands[1], <MODE>mode))
41 operands[1] = force_reg (<MODE>mode, operands[1]);
44 (define_insn "aarch64_simd_dup<mode>"
45 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
47 (match_operand:<VEL> 1 "register_operand" "r, w")))]
50 dup\\t%0.<Vtype>, %<vw>1
51 dup\\t%0.<Vtype>, %1.<Vetype>[0]"
52 [(set_attr "type" "neon_from_gp<q>, neon_dup<q>")]
55 (define_insn "aarch64_simd_dup<mode>"
56 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
57 (vec_duplicate:VDQF_F16
58 (match_operand:<VEL> 1 "register_operand" "w")))]
60 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
61 [(set_attr "type" "neon_dup<q>")]
64 (define_insn "aarch64_dup_lane<mode>"
65 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
66 (vec_duplicate:VALL_F16
68 (match_operand:VALL_F16 1 "register_operand" "w")
69 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
73 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
74 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
76 [(set_attr "type" "neon_dup<q>")]
79 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
80 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
81 (vec_duplicate:VALL_F16_NO_V2Q
83 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
84 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
88 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
89 INTVAL (operands[2])));
90 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
92 [(set_attr "type" "neon_dup<q>")]
95 (define_insn "*aarch64_simd_mov<mode>"
96 [(set (match_operand:VD 0 "nonimmediate_operand"
97 "=w, m, w, ?r, ?w, ?r, w")
98 (match_operand:VD 1 "general_operand"
99 "m, w, w, w, r, r, Dn"))]
101 && (register_operand (operands[0], <MODE>mode)
102 || register_operand (operands[1], <MODE>mode))"
104 switch (which_alternative)
106 case 0: return "ldr\\t%d0, %1";
107 case 1: return "str\\t%d1, %0";
108 case 2: return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
109 case 3: return "umov\t%0, %1.d[0]";
110 case 4: return "ins\t%0.d[0], %1";
111 case 5: return "mov\t%0, %1";
113 return aarch64_output_simd_mov_immediate (operands[1],
115 default: gcc_unreachable ();
118 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
119 neon_logic<q>, neon_to_gp<q>, neon_from_gp<q>,\
120 mov_reg, neon_move<q>")]
123 (define_insn "*aarch64_simd_mov<mode>"
124 [(set (match_operand:VQ 0 "nonimmediate_operand"
125 "=w, m, w, ?r, ?w, ?r, w")
126 (match_operand:VQ 1 "general_operand"
127 "m, w, w, w, r, r, Dn"))]
129 && (register_operand (operands[0], <MODE>mode)
130 || register_operand (operands[1], <MODE>mode))"
132 switch (which_alternative)
135 return "ldr\\t%q0, %1";
137 return "str\\t%q1, %0";
139 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
145 return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
150 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
151 neon_logic<q>, multiple, multiple, multiple,\
153 (set_attr "length" "4,4,4,8,8,8,4")]
156 (define_insn "load_pair<mode>"
157 [(set (match_operand:VD 0 "register_operand" "=w")
158 (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
159 (set (match_operand:VD 2 "register_operand" "=w")
160 (match_operand:VD 3 "memory_operand" "m"))]
162 && rtx_equal_p (XEXP (operands[3], 0),
163 plus_constant (Pmode,
164 XEXP (operands[1], 0),
165 GET_MODE_SIZE (<MODE>mode)))"
167 [(set_attr "type" "neon_ldp")]
170 (define_insn "store_pair<mode>"
171 [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
172 (match_operand:VD 1 "register_operand" "w"))
173 (set (match_operand:VD 2 "memory_operand" "=m")
174 (match_operand:VD 3 "register_operand" "w"))]
176 && rtx_equal_p (XEXP (operands[2], 0),
177 plus_constant (Pmode,
178 XEXP (operands[0], 0),
179 GET_MODE_SIZE (<MODE>mode)))"
181 [(set_attr "type" "neon_stp")]
185 [(set (match_operand:VQ 0 "register_operand" "")
186 (match_operand:VQ 1 "register_operand" ""))]
187 "TARGET_SIMD && reload_completed
188 && GP_REGNUM_P (REGNO (operands[0]))
189 && GP_REGNUM_P (REGNO (operands[1]))"
192 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
197 [(set (match_operand:VQ 0 "register_operand" "")
198 (match_operand:VQ 1 "register_operand" ""))]
199 "TARGET_SIMD && reload_completed
200 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
201 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
204 aarch64_split_simd_move (operands[0], operands[1]);
208 (define_expand "aarch64_split_simd_mov<mode>"
209 [(set (match_operand:VQ 0)
210 (match_operand:VQ 1))]
213 rtx dst = operands[0];
214 rtx src = operands[1];
216 if (GP_REGNUM_P (REGNO (src)))
218 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
219 rtx src_high_part = gen_highpart (<VHALF>mode, src);
222 (gen_move_lo_quad_<mode> (dst, src_low_part));
224 (gen_move_hi_quad_<mode> (dst, src_high_part));
229 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
230 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
231 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
232 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
235 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
237 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
243 (define_insn "aarch64_simd_mov_from_<mode>low"
244 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
246 (match_operand:VQ 1 "register_operand" "w")
247 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
248 "TARGET_SIMD && reload_completed"
250 [(set_attr "type" "neon_to_gp<q>")
251 (set_attr "length" "4")
254 (define_insn "aarch64_simd_mov_from_<mode>high"
255 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
257 (match_operand:VQ 1 "register_operand" "w")
258 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
259 "TARGET_SIMD && reload_completed"
261 [(set_attr "type" "neon_to_gp<q>")
262 (set_attr "length" "4")
265 (define_insn "orn<mode>3"
266 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
267 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
268 (match_operand:VDQ_I 2 "register_operand" "w")))]
270 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
271 [(set_attr "type" "neon_logic<q>")]
274 (define_insn "bic<mode>3"
275 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
276 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
277 (match_operand:VDQ_I 2 "register_operand" "w")))]
279 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
280 [(set_attr "type" "neon_logic<q>")]
283 (define_insn "add<mode>3"
284 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
285 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
286 (match_operand:VDQ_I 2 "register_operand" "w")))]
288 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
289 [(set_attr "type" "neon_add<q>")]
292 (define_insn "sub<mode>3"
293 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
294 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
295 (match_operand:VDQ_I 2 "register_operand" "w")))]
297 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
298 [(set_attr "type" "neon_sub<q>")]
301 (define_insn "mul<mode>3"
302 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
303 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
304 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
306 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
307 [(set_attr "type" "neon_mul_<Vetype><q>")]
310 (define_insn "bswap<mode>2"
311 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
312 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
314 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
315 [(set_attr "type" "neon_rev<q>")]
318 (define_insn "aarch64_rbit<mode>"
319 [(set (match_operand:VB 0 "register_operand" "=w")
320 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
323 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
324 [(set_attr "type" "neon_rbit")]
327 (define_expand "ctz<mode>2"
328 [(set (match_operand:VS 0 "register_operand")
329 (ctz:VS (match_operand:VS 1 "register_operand")))]
332 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
333 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
335 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
336 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
341 (define_expand "copysign<mode>3"
342 [(match_operand:VHSDF 0 "register_operand")
343 (match_operand:VHSDF 1 "register_operand")
344 (match_operand:VHSDF 2 "register_operand")]
345 "TARGET_FLOAT && TARGET_SIMD"
347 rtx v_bitmask = gen_reg_rtx (<V_cmp_result>mode);
348 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
350 emit_move_insn (v_bitmask,
351 aarch64_simd_gen_const_vector_dup (<V_cmp_result>mode,
352 HOST_WIDE_INT_M1U << bits));
353 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
354 operands[2], operands[1]));
359 (define_insn "*aarch64_mul3_elt<mode>"
360 [(set (match_operand:VMUL 0 "register_operand" "=w")
364 (match_operand:VMUL 1 "register_operand" "<h_con>")
365 (parallel [(match_operand:SI 2 "immediate_operand")])))
366 (match_operand:VMUL 3 "register_operand" "w")))]
369 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
370 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
372 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
375 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
376 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
377 (mult:VMUL_CHANGE_NLANES
378 (vec_duplicate:VMUL_CHANGE_NLANES
380 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
381 (parallel [(match_operand:SI 2 "immediate_operand")])))
382 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
385 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
386 INTVAL (operands[2])));
387 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
389 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
392 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
393 [(set (match_operand:VMUL 0 "register_operand" "=w")
396 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
397 (match_operand:VMUL 2 "register_operand" "w")))]
399 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
400 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
403 (define_insn "aarch64_rsqrte<mode>"
404 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
405 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
408 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
409 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
411 (define_insn "aarch64_rsqrts<mode>"
412 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
413 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
414 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
417 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
418 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
420 (define_expand "rsqrt<mode>2"
421 [(set (match_operand:VALLF 0 "register_operand" "=w")
422 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
426 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
430 (define_insn "*aarch64_mul3_elt_to_64v2df"
431 [(set (match_operand:DF 0 "register_operand" "=w")
434 (match_operand:V2DF 1 "register_operand" "w")
435 (parallel [(match_operand:SI 2 "immediate_operand")]))
436 (match_operand:DF 3 "register_operand" "w")))]
439 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
440 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
442 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
445 (define_insn "neg<mode>2"
446 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
447 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
449 "neg\t%0.<Vtype>, %1.<Vtype>"
450 [(set_attr "type" "neon_neg<q>")]
453 (define_insn "abs<mode>2"
454 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
455 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
457 "abs\t%0.<Vtype>, %1.<Vtype>"
458 [(set_attr "type" "neon_abs<q>")]
461 ;; The intrinsic version of integer ABS must not be allowed to
462 ;; combine with any operation with an integerated ABS step, such
464 (define_insn "aarch64_abs<mode>"
465 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
467 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
470 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
471 [(set_attr "type" "neon_abs<q>")]
474 (define_insn "abd<mode>_3"
475 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
476 (abs:VDQ_BHSI (minus:VDQ_BHSI
477 (match_operand:VDQ_BHSI 1 "register_operand" "w")
478 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
480 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
481 [(set_attr "type" "neon_abd<q>")]
484 (define_insn "aba<mode>_3"
485 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
486 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
487 (match_operand:VDQ_BHSI 1 "register_operand" "w")
488 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
489 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
491 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
492 [(set_attr "type" "neon_arith_acc<q>")]
495 (define_insn "fabd<mode>3"
496 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
499 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
500 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
502 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
503 [(set_attr "type" "neon_fp_abd_<stype><q>")]
506 (define_insn "and<mode>3"
507 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
508 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
509 (match_operand:VDQ_I 2 "register_operand" "w")))]
511 "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
512 [(set_attr "type" "neon_logic<q>")]
515 (define_insn "ior<mode>3"
516 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
517 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
518 (match_operand:VDQ_I 2 "register_operand" "w")))]
520 "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
521 [(set_attr "type" "neon_logic<q>")]
524 (define_insn "xor<mode>3"
525 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
526 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
527 (match_operand:VDQ_I 2 "register_operand" "w")))]
529 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
530 [(set_attr "type" "neon_logic<q>")]
533 (define_insn "one_cmpl<mode>2"
534 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
535 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
537 "not\t%0.<Vbtype>, %1.<Vbtype>"
538 [(set_attr "type" "neon_logic<q>")]
541 (define_insn "aarch64_simd_vec_set<mode>"
542 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
544 (vec_duplicate:VDQ_BHSI
545 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
546 (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
547 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
550 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
551 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
552 switch (which_alternative)
555 return "ins\\t%0.<Vetype>[%p2], %w1";
557 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
559 return "ld1\\t{%0.<Vetype>}[%p2], %1";
564 [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_1reg<q>")]
567 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
568 [(set (match_operand:VALL 0 "register_operand" "=w")
572 (match_operand:VALL 3 "register_operand" "w")
574 [(match_operand:SI 4 "immediate_operand" "i")])))
575 (match_operand:VALL 1 "register_operand" "0")
576 (match_operand:SI 2 "immediate_operand" "i")))]
579 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
580 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
581 operands[4] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[4])));
583 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
585 [(set_attr "type" "neon_ins<q>")]
588 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
589 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
590 (vec_merge:VALL_F16_NO_V2Q
591 (vec_duplicate:VALL_F16_NO_V2Q
593 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
595 [(match_operand:SI 4 "immediate_operand" "i")])))
596 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
597 (match_operand:SI 2 "immediate_operand" "i")))]
600 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
601 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
602 operands[4] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
603 INTVAL (operands[4])));
605 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
607 [(set_attr "type" "neon_ins<q>")]
610 (define_insn "aarch64_simd_lshr<mode>"
611 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
612 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
613 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
615 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
616 [(set_attr "type" "neon_shift_imm<q>")]
619 (define_insn "aarch64_simd_ashr<mode>"
620 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
621 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
622 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
624 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
625 [(set_attr "type" "neon_shift_imm<q>")]
628 (define_insn "aarch64_simd_imm_shl<mode>"
629 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
630 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
631 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
633 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
634 [(set_attr "type" "neon_shift_imm<q>")]
637 (define_insn "aarch64_simd_reg_sshl<mode>"
638 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
639 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
640 (match_operand:VDQ_I 2 "register_operand" "w")))]
642 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
643 [(set_attr "type" "neon_shift_reg<q>")]
646 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
647 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
648 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
649 (match_operand:VDQ_I 2 "register_operand" "w")]
650 UNSPEC_ASHIFT_UNSIGNED))]
652 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
653 [(set_attr "type" "neon_shift_reg<q>")]
656 (define_insn "aarch64_simd_reg_shl<mode>_signed"
657 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
658 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
659 (match_operand:VDQ_I 2 "register_operand" "w")]
660 UNSPEC_ASHIFT_SIGNED))]
662 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
663 [(set_attr "type" "neon_shift_reg<q>")]
666 (define_expand "ashl<mode>3"
667 [(match_operand:VDQ_I 0 "register_operand" "")
668 (match_operand:VDQ_I 1 "register_operand" "")
669 (match_operand:SI 2 "general_operand" "")]
672 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
675 if (CONST_INT_P (operands[2]))
677 shift_amount = INTVAL (operands[2]);
678 if (shift_amount >= 0 && shift_amount < bit_width)
680 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
682 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
689 operands[2] = force_reg (SImode, operands[2]);
692 else if (MEM_P (operands[2]))
694 operands[2] = force_reg (SImode, operands[2]);
697 if (REG_P (operands[2]))
699 rtx tmp = gen_reg_rtx (<MODE>mode);
700 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
701 convert_to_mode (<VEL>mode,
704 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
713 (define_expand "lshr<mode>3"
714 [(match_operand:VDQ_I 0 "register_operand" "")
715 (match_operand:VDQ_I 1 "register_operand" "")
716 (match_operand:SI 2 "general_operand" "")]
719 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
722 if (CONST_INT_P (operands[2]))
724 shift_amount = INTVAL (operands[2]);
725 if (shift_amount > 0 && shift_amount <= bit_width)
727 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
729 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
735 operands[2] = force_reg (SImode, operands[2]);
737 else if (MEM_P (operands[2]))
739 operands[2] = force_reg (SImode, operands[2]);
742 if (REG_P (operands[2]))
744 rtx tmp = gen_reg_rtx (SImode);
745 rtx tmp1 = gen_reg_rtx (<MODE>mode);
746 emit_insn (gen_negsi2 (tmp, operands[2]));
747 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
748 convert_to_mode (<VEL>mode,
750 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
760 (define_expand "ashr<mode>3"
761 [(match_operand:VDQ_I 0 "register_operand" "")
762 (match_operand:VDQ_I 1 "register_operand" "")
763 (match_operand:SI 2 "general_operand" "")]
766 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
769 if (CONST_INT_P (operands[2]))
771 shift_amount = INTVAL (operands[2]);
772 if (shift_amount > 0 && shift_amount <= bit_width)
774 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
776 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
782 operands[2] = force_reg (SImode, operands[2]);
784 else if (MEM_P (operands[2]))
786 operands[2] = force_reg (SImode, operands[2]);
789 if (REG_P (operands[2]))
791 rtx tmp = gen_reg_rtx (SImode);
792 rtx tmp1 = gen_reg_rtx (<MODE>mode);
793 emit_insn (gen_negsi2 (tmp, operands[2]));
794 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
795 convert_to_mode (<VEL>mode,
797 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
807 (define_expand "vashl<mode>3"
808 [(match_operand:VDQ_I 0 "register_operand" "")
809 (match_operand:VDQ_I 1 "register_operand" "")
810 (match_operand:VDQ_I 2 "register_operand" "")]
813 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
818 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
819 ;; Negating individual lanes most certainly offsets the
820 ;; gain from vectorization.
821 (define_expand "vashr<mode>3"
822 [(match_operand:VDQ_BHSI 0 "register_operand" "")
823 (match_operand:VDQ_BHSI 1 "register_operand" "")
824 (match_operand:VDQ_BHSI 2 "register_operand" "")]
827 rtx neg = gen_reg_rtx (<MODE>mode);
828 emit (gen_neg<mode>2 (neg, operands[2]));
829 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
835 (define_expand "aarch64_ashr_simddi"
836 [(match_operand:DI 0 "register_operand" "=w")
837 (match_operand:DI 1 "register_operand" "w")
838 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
841 /* An arithmetic shift right by 64 fills the result with copies of the sign
842 bit, just like asr by 63 - however the standard pattern does not handle
844 if (INTVAL (operands[2]) == 64)
845 operands[2] = GEN_INT (63);
846 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
851 (define_expand "vlshr<mode>3"
852 [(match_operand:VDQ_BHSI 0 "register_operand" "")
853 (match_operand:VDQ_BHSI 1 "register_operand" "")
854 (match_operand:VDQ_BHSI 2 "register_operand" "")]
857 rtx neg = gen_reg_rtx (<MODE>mode);
858 emit (gen_neg<mode>2 (neg, operands[2]));
859 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
864 (define_expand "aarch64_lshr_simddi"
865 [(match_operand:DI 0 "register_operand" "=w")
866 (match_operand:DI 1 "register_operand" "w")
867 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
870 if (INTVAL (operands[2]) == 64)
871 emit_move_insn (operands[0], const0_rtx);
873 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
878 (define_expand "vec_set<mode>"
879 [(match_operand:VDQ_BHSI 0 "register_operand")
880 (match_operand:<VEL> 1 "register_operand")
881 (match_operand:SI 2 "immediate_operand")]
884 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
885 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
886 GEN_INT (elem), operands[0]));
891 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
892 (define_insn "vec_shr_<mode>"
893 [(set (match_operand:VD 0 "register_operand" "=w")
894 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
895 (match_operand:SI 2 "immediate_operand" "i")]
899 if (BYTES_BIG_ENDIAN)
900 return "shl %d0, %d1, %2";
902 return "ushr %d0, %d1, %2";
904 [(set_attr "type" "neon_shift_imm")]
907 (define_insn "aarch64_simd_vec_setv2di"
908 [(set (match_operand:V2DI 0 "register_operand" "=w,w")
911 (match_operand:DI 1 "register_operand" "r,w"))
912 (match_operand:V2DI 3 "register_operand" "0,0")
913 (match_operand:SI 2 "immediate_operand" "i,i")))]
916 int elt = ENDIAN_LANE_N (V2DImode, exact_log2 (INTVAL (operands[2])));
917 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
918 switch (which_alternative)
921 return "ins\\t%0.d[%p2], %1";
923 return "ins\\t%0.d[%p2], %1.d[0]";
928 [(set_attr "type" "neon_from_gp, neon_ins_q")]
931 (define_expand "vec_setv2di"
932 [(match_operand:V2DI 0 "register_operand")
933 (match_operand:DI 1 "register_operand")
934 (match_operand:SI 2 "immediate_operand")]
937 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
938 emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
939 GEN_INT (elem), operands[0]));
944 (define_insn "aarch64_simd_vec_set<mode>"
945 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
947 (vec_duplicate:VDQF_F16
948 (match_operand:<VEL> 1 "register_operand" "w"))
949 (match_operand:VDQF_F16 3 "register_operand" "0")
950 (match_operand:SI 2 "immediate_operand" "i")))]
953 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
955 operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
956 return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
958 [(set_attr "type" "neon_ins<q>")]
961 (define_expand "vec_set<mode>"
962 [(match_operand:VDQF_F16 0 "register_operand" "+w")
963 (match_operand:<VEL> 1 "register_operand" "w")
964 (match_operand:SI 2 "immediate_operand" "")]
967 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
968 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
969 GEN_INT (elem), operands[0]));
975 (define_insn "aarch64_mla<mode>"
976 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
977 (plus:VDQ_BHSI (mult:VDQ_BHSI
978 (match_operand:VDQ_BHSI 2 "register_operand" "w")
979 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
980 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
982 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
983 [(set_attr "type" "neon_mla_<Vetype><q>")]
986 (define_insn "*aarch64_mla_elt<mode>"
987 [(set (match_operand:VDQHS 0 "register_operand" "=w")
992 (match_operand:VDQHS 1 "register_operand" "<h_con>")
993 (parallel [(match_operand:SI 2 "immediate_operand")])))
994 (match_operand:VDQHS 3 "register_operand" "w"))
995 (match_operand:VDQHS 4 "register_operand" "0")))]
998 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
999 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1001 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1004 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1005 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1008 (vec_duplicate:VDQHS
1010 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1011 (parallel [(match_operand:SI 2 "immediate_operand")])))
1012 (match_operand:VDQHS 3 "register_operand" "w"))
1013 (match_operand:VDQHS 4 "register_operand" "0")))]
1016 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1017 INTVAL (operands[2])));
1018 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1020 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1023 (define_insn "aarch64_mls<mode>"
1024 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1025 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1026 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1027 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1029 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1030 [(set_attr "type" "neon_mla_<Vetype><q>")]
1033 (define_insn "*aarch64_mls_elt<mode>"
1034 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1036 (match_operand:VDQHS 4 "register_operand" "0")
1038 (vec_duplicate:VDQHS
1040 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1041 (parallel [(match_operand:SI 2 "immediate_operand")])))
1042 (match_operand:VDQHS 3 "register_operand" "w"))))]
1045 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1046 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1048 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1051 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1052 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1054 (match_operand:VDQHS 4 "register_operand" "0")
1056 (vec_duplicate:VDQHS
1058 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1059 (parallel [(match_operand:SI 2 "immediate_operand")])))
1060 (match_operand:VDQHS 3 "register_operand" "w"))))]
1063 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1064 INTVAL (operands[2])));
1065 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1067 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1070 ;; Max/Min operations.
1071 (define_insn "<su><maxmin><mode>3"
1072 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1073 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1074 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1076 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1077 [(set_attr "type" "neon_minmax<q>")]
1080 (define_expand "<su><maxmin>v2di3"
1081 [(set (match_operand:V2DI 0 "register_operand" "")
1082 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1083 (match_operand:V2DI 2 "register_operand" "")))]
1086 enum rtx_code cmp_operator;
1107 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1108 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1109 operands[2], cmp_fmt, operands[1], operands[2]));
1113 ;; Pairwise Integer Max/Min operations.
1114 (define_insn "aarch64_<maxmin_uns>p<mode>"
1115 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1116 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1117 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1120 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1121 [(set_attr "type" "neon_minmax<q>")]
1124 ;; Pairwise FP Max/Min operations.
1125 (define_insn "aarch64_<maxmin_uns>p<mode>"
1126 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1127 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1128 (match_operand:VHSDF 2 "register_operand" "w")]
1131 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1132 [(set_attr "type" "neon_minmax<q>")]
1135 ;; vec_concat gives a new vector with the low elements from operand 1, and
1136 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1137 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1138 ;; What that means, is that the RTL descriptions of the below patterns
1139 ;; need to change depending on endianness.
1141 ;; Move to the low architectural bits of the register.
1142 ;; On little-endian this is { operand, zeroes }
1143 ;; On big-endian this is { zeroes, operand }
1145 (define_insn "move_lo_quad_internal_<mode>"
1146 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1148 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1149 (vec_duplicate:<VHALF> (const_int 0))))]
1150 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1155 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1156 (set_attr "simd" "yes,*,yes")
1157 (set_attr "fp" "*,yes,*")
1158 (set_attr "length" "4")]
1161 (define_insn "move_lo_quad_internal_<mode>"
1162 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1164 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1166 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1171 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1172 (set_attr "simd" "yes,*,yes")
1173 (set_attr "fp" "*,yes,*")
1174 (set_attr "length" "4")]
1177 (define_insn "move_lo_quad_internal_be_<mode>"
1178 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1180 (vec_duplicate:<VHALF> (const_int 0))
1181 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1182 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1187 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1188 (set_attr "simd" "yes,*,yes")
1189 (set_attr "fp" "*,yes,*")
1190 (set_attr "length" "4")]
1193 (define_insn "move_lo_quad_internal_be_<mode>"
1194 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1197 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1198 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1203 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1204 (set_attr "simd" "yes,*,yes")
1205 (set_attr "fp" "*,yes,*")
1206 (set_attr "length" "4")]
1209 (define_expand "move_lo_quad_<mode>"
1210 [(match_operand:VQ 0 "register_operand")
1211 (match_operand:VQ 1 "register_operand")]
1214 if (BYTES_BIG_ENDIAN)
1215 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1217 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1222 ;; Move operand1 to the high architectural bits of the register, keeping
1223 ;; the low architectural bits of operand2.
1224 ;; For little-endian this is { operand2, operand1 }
1225 ;; For big-endian this is { operand1, operand2 }
1227 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1228 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1232 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1233 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1234 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1236 ins\\t%0.d[1], %1.d[0]
1238 [(set_attr "type" "neon_ins")]
1241 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1242 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1244 (match_operand:<VHALF> 1 "register_operand" "w,r")
1247 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1248 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1250 ins\\t%0.d[1], %1.d[0]
1252 [(set_attr "type" "neon_ins")]
1255 (define_expand "move_hi_quad_<mode>"
1256 [(match_operand:VQ 0 "register_operand" "")
1257 (match_operand:<VHALF> 1 "register_operand" "")]
1260 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1261 if (BYTES_BIG_ENDIAN)
1262 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1265 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1270 ;; Narrowing operations.
1273 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1274 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1275 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1277 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1278 [(set_attr "type" "neon_shift_imm_narrow_q")]
1281 (define_expand "vec_pack_trunc_<mode>"
1282 [(match_operand:<VNARROWD> 0 "register_operand" "")
1283 (match_operand:VDN 1 "register_operand" "")
1284 (match_operand:VDN 2 "register_operand" "")]
1287 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1288 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1289 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1291 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1292 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1293 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1299 (define_insn "vec_pack_trunc_<mode>"
1300 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1301 (vec_concat:<VNARROWQ2>
1302 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1303 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1306 if (BYTES_BIG_ENDIAN)
1307 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1309 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1311 [(set_attr "type" "multiple")
1312 (set_attr "length" "8")]
1315 ;; Widening operations.
1317 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1318 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1319 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1320 (match_operand:VQW 1 "register_operand" "w")
1321 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1324 "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1325 [(set_attr "type" "neon_shift_imm_long")]
1328 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1329 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1330 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1331 (match_operand:VQW 1 "register_operand" "w")
1332 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1335 "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1336 [(set_attr "type" "neon_shift_imm_long")]
1339 (define_expand "vec_unpack<su>_hi_<mode>"
1340 [(match_operand:<VWIDE> 0 "register_operand" "")
1341 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1344 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1345 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1351 (define_expand "vec_unpack<su>_lo_<mode>"
1352 [(match_operand:<VWIDE> 0 "register_operand" "")
1353 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1356 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1357 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1363 ;; Widening arithmetic.
1365 (define_insn "*aarch64_<su>mlal_lo<mode>"
1366 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1369 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1370 (match_operand:VQW 2 "register_operand" "w")
1371 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1372 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1373 (match_operand:VQW 4 "register_operand" "w")
1375 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1377 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1378 [(set_attr "type" "neon_mla_<Vetype>_long")]
1381 (define_insn "*aarch64_<su>mlal_hi<mode>"
1382 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1385 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1386 (match_operand:VQW 2 "register_operand" "w")
1387 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1388 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1389 (match_operand:VQW 4 "register_operand" "w")
1391 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1393 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1394 [(set_attr "type" "neon_mla_<Vetype>_long")]
1397 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1398 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1400 (match_operand:<VWIDE> 1 "register_operand" "0")
1402 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1403 (match_operand:VQW 2 "register_operand" "w")
1404 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1405 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1406 (match_operand:VQW 4 "register_operand" "w")
1409 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1410 [(set_attr "type" "neon_mla_<Vetype>_long")]
1413 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1414 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1416 (match_operand:<VWIDE> 1 "register_operand" "0")
1418 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1419 (match_operand:VQW 2 "register_operand" "w")
1420 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1421 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1422 (match_operand:VQW 4 "register_operand" "w")
1425 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1426 [(set_attr "type" "neon_mla_<Vetype>_long")]
1429 (define_insn "*aarch64_<su>mlal<mode>"
1430 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1434 (match_operand:VD_BHSI 1 "register_operand" "w"))
1436 (match_operand:VD_BHSI 2 "register_operand" "w")))
1437 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1439 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1440 [(set_attr "type" "neon_mla_<Vetype>_long")]
1443 (define_insn "*aarch64_<su>mlsl<mode>"
1444 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1446 (match_operand:<VWIDE> 1 "register_operand" "0")
1449 (match_operand:VD_BHSI 2 "register_operand" "w"))
1451 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1453 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1454 [(set_attr "type" "neon_mla_<Vetype>_long")]
1457 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1458 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1459 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1460 (match_operand:VQW 1 "register_operand" "w")
1461 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1462 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1463 (match_operand:VQW 2 "register_operand" "w")
1466 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1467 [(set_attr "type" "neon_mul_<Vetype>_long")]
1470 (define_expand "vec_widen_<su>mult_lo_<mode>"
1471 [(match_operand:<VWIDE> 0 "register_operand" "")
1472 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1473 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1476 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1477 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1484 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1485 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1486 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1487 (match_operand:VQW 1 "register_operand" "w")
1488 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1489 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1490 (match_operand:VQW 2 "register_operand" "w")
1493 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1494 [(set_attr "type" "neon_mul_<Vetype>_long")]
1497 (define_expand "vec_widen_<su>mult_hi_<mode>"
1498 [(match_operand:<VWIDE> 0 "register_operand" "")
1499 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1500 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1503 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1504 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1512 ;; FP vector operations.
1513 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1514 ;; double-precision (64-bit) floating-point data types and arithmetic as
1515 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1516 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1518 ;; Floating-point operations can raise an exception. Vectorizing such
1519 ;; operations are safe because of reasons explained below.
1521 ;; ARMv8 permits an extension to enable trapped floating-point
1522 ;; exception handling, however this is an optional feature. In the
1523 ;; event of a floating-point exception being raised by vectorised
1525 ;; 1. If trapped floating-point exceptions are available, then a trap
1526 ;; will be taken when any lane raises an enabled exception. A trap
1527 ;; handler may determine which lane raised the exception.
1528 ;; 2. Alternatively a sticky exception flag is set in the
1529 ;; floating-point status register (FPSR). Software may explicitly
1530 ;; test the exception flags, in which case the tests will either
1531 ;; prevent vectorisation, allowing precise identification of the
1532 ;; failing operation, or if tested outside of vectorisable regions
1533 ;; then the specific operation and lane are not of interest.
1535 ;; FP arithmetic operations.
1537 (define_insn "add<mode>3"
1538 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1539 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1540 (match_operand:VHSDF 2 "register_operand" "w")))]
1542 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1543 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1546 (define_insn "sub<mode>3"
1547 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1548 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1549 (match_operand:VHSDF 2 "register_operand" "w")))]
1551 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1552 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1555 (define_insn "mul<mode>3"
1556 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1557 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1558 (match_operand:VHSDF 2 "register_operand" "w")))]
1560 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1561 [(set_attr "type" "neon_fp_mul_<stype><q>")]
1564 (define_expand "div<mode>3"
1565 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1566 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1567 (match_operand:VHSDF 2 "register_operand" "w")))]
1570 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1573 operands[1] = force_reg (<MODE>mode, operands[1]);
1576 (define_insn "*div<mode>3"
1577 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1578 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1579 (match_operand:VHSDF 2 "register_operand" "w")))]
1581 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1582 [(set_attr "type" "neon_fp_div_<stype><q>")]
1585 (define_insn "neg<mode>2"
1586 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1587 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1589 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1590 [(set_attr "type" "neon_fp_neg_<stype><q>")]
1593 (define_insn "abs<mode>2"
1594 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1595 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1597 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1598 [(set_attr "type" "neon_fp_abs_<stype><q>")]
1601 (define_insn "fma<mode>4"
1602 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1603 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1604 (match_operand:VHSDF 2 "register_operand" "w")
1605 (match_operand:VHSDF 3 "register_operand" "0")))]
1607 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1608 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1611 (define_insn "*aarch64_fma4_elt<mode>"
1612 [(set (match_operand:VDQF 0 "register_operand" "=w")
1616 (match_operand:VDQF 1 "register_operand" "<h_con>")
1617 (parallel [(match_operand:SI 2 "immediate_operand")])))
1618 (match_operand:VDQF 3 "register_operand" "w")
1619 (match_operand:VDQF 4 "register_operand" "0")))]
1622 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1623 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1625 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1628 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1629 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1631 (vec_duplicate:VDQSF
1633 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1634 (parallel [(match_operand:SI 2 "immediate_operand")])))
1635 (match_operand:VDQSF 3 "register_operand" "w")
1636 (match_operand:VDQSF 4 "register_operand" "0")))]
1639 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1640 INTVAL (operands[2])));
1641 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1643 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1646 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1647 [(set (match_operand:VMUL 0 "register_operand" "=w")
1650 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1651 (match_operand:VMUL 2 "register_operand" "w")
1652 (match_operand:VMUL 3 "register_operand" "0")))]
1654 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1655 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1658 (define_insn "*aarch64_fma4_elt_to_64v2df"
1659 [(set (match_operand:DF 0 "register_operand" "=w")
1662 (match_operand:V2DF 1 "register_operand" "w")
1663 (parallel [(match_operand:SI 2 "immediate_operand")]))
1664 (match_operand:DF 3 "register_operand" "w")
1665 (match_operand:DF 4 "register_operand" "0")))]
1668 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1669 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1671 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1674 (define_insn "fnma<mode>4"
1675 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1677 (match_operand:VHSDF 1 "register_operand" "w")
1679 (match_operand:VHSDF 2 "register_operand" "w"))
1680 (match_operand:VHSDF 3 "register_operand" "0")))]
1682 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1683 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1686 (define_insn "*aarch64_fnma4_elt<mode>"
1687 [(set (match_operand:VDQF 0 "register_operand" "=w")
1690 (match_operand:VDQF 3 "register_operand" "w"))
1693 (match_operand:VDQF 1 "register_operand" "<h_con>")
1694 (parallel [(match_operand:SI 2 "immediate_operand")])))
1695 (match_operand:VDQF 4 "register_operand" "0")))]
1698 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1699 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1701 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1704 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1705 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1708 (match_operand:VDQSF 3 "register_operand" "w"))
1709 (vec_duplicate:VDQSF
1711 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1712 (parallel [(match_operand:SI 2 "immediate_operand")])))
1713 (match_operand:VDQSF 4 "register_operand" "0")))]
1716 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1717 INTVAL (operands[2])));
1718 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1720 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1723 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1724 [(set (match_operand:VMUL 0 "register_operand" "=w")
1727 (match_operand:VMUL 2 "register_operand" "w"))
1729 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1730 (match_operand:VMUL 3 "register_operand" "0")))]
1732 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1733 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1736 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1737 [(set (match_operand:DF 0 "register_operand" "=w")
1740 (match_operand:V2DF 1 "register_operand" "w")
1741 (parallel [(match_operand:SI 2 "immediate_operand")]))
1743 (match_operand:DF 3 "register_operand" "w"))
1744 (match_operand:DF 4 "register_operand" "0")))]
1747 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1748 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1750 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1753 ;; Vector versions of the floating-point frint patterns.
1754 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1755 (define_insn "<frint_pattern><mode>2"
1756 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1757 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1760 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1761 [(set_attr "type" "neon_fp_round_<stype><q>")]
1764 ;; Vector versions of the fcvt standard patterns.
1765 ;; Expands to lbtrunc, lround, lceil, lfloor
1766 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1767 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1768 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1769 [(match_operand:VHSDF 1 "register_operand" "w")]
1772 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1773 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1776 ;; HF Scalar variants of related SIMD instructions.
1777 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1778 [(set (match_operand:HI 0 "register_operand" "=w")
1779 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1781 "TARGET_SIMD_F16INST"
1782 "fcvt<frint_suffix><su>\t%h0, %h1"
1783 [(set_attr "type" "neon_fp_to_int_s")]
1786 (define_insn "<optab>_trunchfhi2"
1787 [(set (match_operand:HI 0 "register_operand" "=w")
1788 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1789 "TARGET_SIMD_F16INST"
1790 "fcvtz<su>\t%h0, %h1"
1791 [(set_attr "type" "neon_fp_to_int_s")]
1794 (define_insn "<optab>hihf2"
1795 [(set (match_operand:HF 0 "register_operand" "=w")
1796 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1797 "TARGET_SIMD_F16INST"
1798 "<su_optab>cvtf\t%h0, %h1"
1799 [(set_attr "type" "neon_int_to_fp_s")]
1802 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1803 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1804 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1806 (match_operand:VDQF 1 "register_operand" "w")
1807 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1810 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1811 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1813 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1815 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1816 output_asm_insn (buf, operands);
1819 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1822 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
1823 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1824 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1825 [(match_operand:VHSDF 1 "register_operand")]
1830 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
1831 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1832 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1833 [(match_operand:VHSDF 1 "register_operand")]
1838 (define_expand "ftrunc<VHSDF:mode>2"
1839 [(set (match_operand:VHSDF 0 "register_operand")
1840 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
1845 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
1846 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1848 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
1850 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
1851 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
1854 ;; Conversions between vectors of floats and doubles.
1855 ;; Contains a mix of patterns to match standard pattern names
1856 ;; and those for intrinsics.
1858 ;; Float widening operations.
1860 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
1861 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1862 (float_extend:<VWIDE> (vec_select:<VHALF>
1863 (match_operand:VQ_HSF 1 "register_operand" "w")
1864 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
1867 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
1868 [(set_attr "type" "neon_fp_cvt_widen_s")]
1871 ;; Convert between fixed-point and floating-point (vector modes)
1873 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
1874 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
1875 (unspec:<VHSDF:FCVT_TARGET>
1876 [(match_operand:VHSDF 1 "register_operand" "w")
1877 (match_operand:SI 2 "immediate_operand" "i")]
1880 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
1881 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
1884 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
1885 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
1886 (unspec:<VDQ_HSDI:FCVT_TARGET>
1887 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
1888 (match_operand:SI 2 "immediate_operand" "i")]
1891 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
1892 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
1895 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
1896 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
1897 ;; the meaning of HI and LO changes depending on the target endianness.
1898 ;; While elsewhere we map the higher numbered elements of a vector to
1899 ;; the lower architectural lanes of the vector, for these patterns we want
1900 ;; to always treat "hi" as referring to the higher architectural lanes.
1901 ;; Consequently, while the patterns below look inconsistent with our
1902 ;; other big-endian patterns their behavior is as required.
1904 (define_expand "vec_unpacks_lo_<mode>"
1905 [(match_operand:<VWIDE> 0 "register_operand" "")
1906 (match_operand:VQ_HSF 1 "register_operand" "")]
1909 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1910 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
1916 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
1917 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1918 (float_extend:<VWIDE> (vec_select:<VHALF>
1919 (match_operand:VQ_HSF 1 "register_operand" "w")
1920 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
1923 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
1924 [(set_attr "type" "neon_fp_cvt_widen_s")]
1927 (define_expand "vec_unpacks_hi_<mode>"
1928 [(match_operand:<VWIDE> 0 "register_operand" "")
1929 (match_operand:VQ_HSF 1 "register_operand" "")]
1932 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1933 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
1938 (define_insn "aarch64_float_extend_lo_<Vwide>"
1939 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1940 (float_extend:<VWIDE>
1941 (match_operand:VDF 1 "register_operand" "w")))]
1943 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
1944 [(set_attr "type" "neon_fp_cvt_widen_s")]
1947 ;; Float narrowing operations.
1949 (define_insn "aarch64_float_truncate_lo_<mode>"
1950 [(set (match_operand:VDF 0 "register_operand" "=w")
1952 (match_operand:<VWIDE> 1 "register_operand" "w")))]
1954 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
1955 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1958 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
1959 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
1961 (match_operand:VDF 1 "register_operand" "0")
1963 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
1964 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1965 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
1966 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1969 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
1970 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
1973 (match_operand:<VWIDE> 2 "register_operand" "w"))
1974 (match_operand:VDF 1 "register_operand" "0")))]
1975 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1976 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
1977 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1980 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
1981 [(match_operand:<VDBL> 0 "register_operand" "=w")
1982 (match_operand:VDF 1 "register_operand" "0")
1983 (match_operand:<VWIDE> 2 "register_operand" "w")]
1986 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
1987 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
1988 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
1989 emit_insn (gen (operands[0], operands[1], operands[2]));
1994 (define_expand "vec_pack_trunc_v2df"
1995 [(set (match_operand:V4SF 0 "register_operand")
1997 (float_truncate:V2SF
1998 (match_operand:V2DF 1 "register_operand"))
1999 (float_truncate:V2SF
2000 (match_operand:V2DF 2 "register_operand"))
2004 rtx tmp = gen_reg_rtx (V2SFmode);
2005 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2006 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2008 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2009 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2010 tmp, operands[hi]));
2015 (define_expand "vec_pack_trunc_df"
2016 [(set (match_operand:V2SF 0 "register_operand")
2019 (match_operand:DF 1 "register_operand"))
2021 (match_operand:DF 2 "register_operand"))
2025 rtx tmp = gen_reg_rtx (V2SFmode);
2026 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2027 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2029 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2030 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2031 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2037 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2039 ;; a = (b < c) ? b : c;
2040 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
2041 ;; either explicitly or indirectly via -ffast-math.
2043 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2044 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2045 ;; operand will be returned when both operands are zero (i.e. they may not
2046 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2047 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2050 (define_insn "<su><maxmin><mode>3"
2051 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2052 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2053 (match_operand:VHSDF 2 "register_operand" "w")))]
2055 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2056 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2059 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2060 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2061 ;; which implement the IEEE fmax ()/fmin () functions.
2062 (define_insn "<maxmin_uns><mode>3"
2063 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2064 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2065 (match_operand:VHSDF 2 "register_operand" "w")]
2068 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2069 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2072 ;; 'across lanes' add.
2074 (define_expand "reduc_plus_scal_<mode>"
2075 [(match_operand:<VEL> 0 "register_operand" "=w")
2076 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2080 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2081 rtx scratch = gen_reg_rtx (<MODE>mode);
2082 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2083 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2088 (define_insn "aarch64_faddp<mode>"
2089 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2090 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2091 (match_operand:VHSDF 2 "register_operand" "w")]
2094 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2095 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2098 (define_insn "aarch64_reduc_plus_internal<mode>"
2099 [(set (match_operand:VDQV 0 "register_operand" "=w")
2100 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2103 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2104 [(set_attr "type" "neon_reduc_add<q>")]
2107 (define_insn "aarch64_reduc_plus_internalv2si"
2108 [(set (match_operand:V2SI 0 "register_operand" "=w")
2109 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2112 "addp\\t%0.2s, %1.2s, %1.2s"
2113 [(set_attr "type" "neon_reduc_add")]
2116 (define_insn "reduc_plus_scal_<mode>"
2117 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2118 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2121 "faddp\\t%<Vetype>0, %1.<Vtype>"
2122 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2125 (define_expand "reduc_plus_scal_v4sf"
2126 [(set (match_operand:SF 0 "register_operand")
2127 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2131 rtx elt = GEN_INT (ENDIAN_LANE_N (V4SFmode, 0));
2132 rtx scratch = gen_reg_rtx (V4SFmode);
2133 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2134 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2135 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2139 (define_insn "clrsb<mode>2"
2140 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2141 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2143 "cls\\t%0.<Vtype>, %1.<Vtype>"
2144 [(set_attr "type" "neon_cls<q>")]
2147 (define_insn "clz<mode>2"
2148 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2149 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2151 "clz\\t%0.<Vtype>, %1.<Vtype>"
2152 [(set_attr "type" "neon_cls<q>")]
2155 (define_insn "popcount<mode>2"
2156 [(set (match_operand:VB 0 "register_operand" "=w")
2157 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2159 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2160 [(set_attr "type" "neon_cnt<q>")]
2163 ;; 'across lanes' max and min ops.
2165 ;; Template for outputting a scalar, so we can create __builtins which can be
2166 ;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin).
2167 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2168 [(match_operand:<VEL> 0 "register_operand")
2169 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2173 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2174 rtx scratch = gen_reg_rtx (<MODE>mode);
2175 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2177 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2182 ;; Likewise for integer cases, signed and unsigned.
2183 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2184 [(match_operand:<VEL> 0 "register_operand")
2185 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2189 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2190 rtx scratch = gen_reg_rtx (<MODE>mode);
2191 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2193 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2198 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2199 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2200 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2203 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2204 [(set_attr "type" "neon_reduc_minmax<q>")]
2207 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2208 [(set (match_operand:V2SI 0 "register_operand" "=w")
2209 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2212 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2213 [(set_attr "type" "neon_reduc_minmax")]
2216 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2217 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2218 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2221 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2222 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2225 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2227 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2230 ;; Thus our BSL is of the form:
2231 ;; op0 = bsl (mask, op2, op3)
2232 ;; We can use any of:
2235 ;; bsl mask, op1, op2
2236 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2237 ;; bit op0, op2, mask
2238 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2239 ;; bif op0, op1, mask
2241 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2242 ;; Some forms of straight-line code may generate the equivalent form
2243 ;; in *aarch64_simd_bsl<mode>_alt.
2245 (define_insn "aarch64_simd_bsl<mode>_internal"
2246 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2250 (match_operand:<V_cmp_result> 3 "register_operand" "w,0,w")
2251 (match_operand:VSDQ_I_DI 2 "register_operand" "w,w,0"))
2252 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2253 (match_dup:<V_cmp_result> 3)
2257 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2258 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2259 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2260 [(set_attr "type" "neon_bsl<q>")]
2263 ;; We need this form in addition to the above pattern to match the case
2264 ;; when combine tries merging three insns such that the second operand of
2265 ;; the outer XOR matches the second operand of the inner XOR rather than
2266 ;; the first. The two are equivalent but since recog doesn't try all
2267 ;; permutations of commutative operations, we have to have a separate pattern.
2269 (define_insn "*aarch64_simd_bsl<mode>_alt"
2270 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2274 (match_operand:VSDQ_I_DI 3 "register_operand" "w,w,0")
2275 (match_operand:VSDQ_I_DI 2 "register_operand" "w,0,w"))
2276 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2277 (match_dup:VSDQ_I_DI 2)))]
2280 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2281 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2282 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2283 [(set_attr "type" "neon_bsl<q>")]
2286 (define_expand "aarch64_simd_bsl<mode>"
2287 [(match_operand:VALLDIF 0 "register_operand")
2288 (match_operand:<V_cmp_result> 1 "register_operand")
2289 (match_operand:VALLDIF 2 "register_operand")
2290 (match_operand:VALLDIF 3 "register_operand")]
2293 /* We can't alias operands together if they have different modes. */
2294 rtx tmp = operands[0];
2295 if (FLOAT_MODE_P (<MODE>mode))
2297 operands[2] = gen_lowpart (<V_cmp_result>mode, operands[2]);
2298 operands[3] = gen_lowpart (<V_cmp_result>mode, operands[3]);
2299 tmp = gen_reg_rtx (<V_cmp_result>mode);
2301 operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]);
2302 emit_insn (gen_aarch64_simd_bsl<v_cmp_result>_internal (tmp,
2306 if (tmp != operands[0])
2307 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2312 (define_expand "vcond_mask_<mode><v_cmp_result>"
2313 [(match_operand:VALLDI 0 "register_operand")
2314 (match_operand:VALLDI 1 "nonmemory_operand")
2315 (match_operand:VALLDI 2 "nonmemory_operand")
2316 (match_operand:<V_cmp_result> 3 "register_operand")]
2319 /* If we have (a = (P) ? -1 : 0);
2320 Then we can simply move the generated mask (result must be int). */
2321 if (operands[1] == CONSTM1_RTX (<MODE>mode)
2322 && operands[2] == CONST0_RTX (<MODE>mode))
2323 emit_move_insn (operands[0], operands[3]);
2324 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
2325 else if (operands[1] == CONST0_RTX (<MODE>mode)
2326 && operands[2] == CONSTM1_RTX (<MODE>mode))
2327 emit_insn (gen_one_cmpl<v_cmp_result>2 (operands[0], operands[3]));
2330 if (!REG_P (operands[1]))
2331 operands[1] = force_reg (<MODE>mode, operands[1]);
2332 if (!REG_P (operands[2]))
2333 operands[2] = force_reg (<MODE>mode, operands[2]);
2334 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2335 operands[1], operands[2]));
2341 ;; Patterns comparing two vectors to produce a mask.
2343 (define_expand "vec_cmp<mode><mode>"
2344 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2345 (match_operator 1 "comparison_operator"
2346 [(match_operand:VSDQ_I_DI 2 "register_operand")
2347 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2350 rtx mask = operands[0];
2351 enum rtx_code code = GET_CODE (operands[1]);
2361 if (operands[3] == CONST0_RTX (<MODE>mode))
2366 if (!REG_P (operands[3]))
2367 operands[3] = force_reg (<MODE>mode, operands[3]);
2375 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2379 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2383 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2387 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2391 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2395 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2399 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2403 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2407 /* Handle NE as !EQ. */
2408 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2409 emit_insn (gen_one_cmpl<v_cmp_result>2 (mask, mask));
2413 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2423 (define_expand "vec_cmp<mode><v_cmp_result>"
2424 [(set (match_operand:<V_cmp_result> 0 "register_operand")
2425 (match_operator 1 "comparison_operator"
2426 [(match_operand:VDQF 2 "register_operand")
2427 (match_operand:VDQF 3 "nonmemory_operand")]))]
2430 int use_zero_form = 0;
2431 enum rtx_code code = GET_CODE (operands[1]);
2432 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
2434 rtx (*comparison) (rtx, rtx, rtx) = NULL;
2443 if (operands[3] == CONST0_RTX (<MODE>mode))
2450 if (!REG_P (operands[3]))
2451 operands[3] = force_reg (<MODE>mode, operands[3]);
2461 comparison = gen_aarch64_cmlt<mode>;
2466 std::swap (operands[2], operands[3]);
2470 comparison = gen_aarch64_cmgt<mode>;
2475 comparison = gen_aarch64_cmle<mode>;
2480 std::swap (operands[2], operands[3]);
2484 comparison = gen_aarch64_cmge<mode>;
2488 comparison = gen_aarch64_cmeq<mode>;
2505 /* FCM returns false for lanes which are unordered, so if we use
2506 the inverse of the comparison we actually want to emit, then
2507 invert the result, we will end up with the correct result.
2508 Note that a NE NaN and NaN NE b are true for all a, b.
2510 Our transformations are:
2511 a UNGE b -> !(b GT a)
2512 a UNGT b -> !(b GE a)
2513 a UNLE b -> !(a GT b)
2514 a UNLT b -> !(a GE b)
2515 a NE b -> !(a EQ b) */
2516 gcc_assert (comparison != NULL);
2517 emit_insn (comparison (operands[0], operands[2], operands[3]));
2518 emit_insn (gen_one_cmpl<v_cmp_result>2 (operands[0], operands[0]));
2526 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2527 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2533 gcc_assert (comparison != NULL);
2534 emit_insn (comparison (operands[0], operands[2], operands[3]));
2538 /* We first check (a > b || b > a) which is !UNEQ, inverting
2539 this result will then give us (a == b || a UNORDERED b). */
2540 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2541 operands[2], operands[3]));
2542 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2543 emit_insn (gen_ior<v_cmp_result>3 (operands[0], operands[0], tmp));
2544 emit_insn (gen_one_cmpl<v_cmp_result>2 (operands[0], operands[0]));
2548 /* Operands are ORDERED iff (a > b || b >= a), so we can compute
2549 UNORDERED as !ORDERED. */
2550 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2551 emit_insn (gen_aarch64_cmge<mode> (operands[0],
2552 operands[3], operands[2]));
2553 emit_insn (gen_ior<v_cmp_result>3 (operands[0], operands[0], tmp));
2554 emit_insn (gen_one_cmpl<v_cmp_result>2 (operands[0], operands[0]));
2558 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2559 emit_insn (gen_aarch64_cmge<mode> (operands[0],
2560 operands[3], operands[2]));
2561 emit_insn (gen_ior<v_cmp_result>3 (operands[0], operands[0], tmp));
2571 (define_expand "vec_cmpu<mode><mode>"
2572 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2573 (match_operator 1 "comparison_operator"
2574 [(match_operand:VSDQ_I_DI 2 "register_operand")
2575 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2578 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2579 operands[2], operands[3]));
2583 (define_expand "vcond<mode><mode>"
2584 [(set (match_operand:VALLDI 0 "register_operand")
2585 (if_then_else:VALLDI
2586 (match_operator 3 "comparison_operator"
2587 [(match_operand:VALLDI 4 "register_operand")
2588 (match_operand:VALLDI 5 "nonmemory_operand")])
2589 (match_operand:VALLDI 1 "nonmemory_operand")
2590 (match_operand:VALLDI 2 "nonmemory_operand")))]
2593 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
2594 enum rtx_code code = GET_CODE (operands[3]);
2596 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2597 it as well as switch operands 1/2 in order to avoid the additional
2601 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2602 operands[4], operands[5]);
2603 std::swap (operands[1], operands[2]);
2605 emit_insn (gen_vec_cmp<mode><v_cmp_result> (mask, operands[3],
2606 operands[4], operands[5]));
2607 emit_insn (gen_vcond_mask_<mode><v_cmp_result> (operands[0], operands[1],
2608 operands[2], mask));
2613 (define_expand "vcond<v_cmp_mixed><mode>"
2614 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2615 (if_then_else:<V_cmp_mixed>
2616 (match_operator 3 "comparison_operator"
2617 [(match_operand:VDQF_COND 4 "register_operand")
2618 (match_operand:VDQF_COND 5 "nonmemory_operand")])
2619 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2620 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2623 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
2624 enum rtx_code code = GET_CODE (operands[3]);
2626 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2627 it as well as switch operands 1/2 in order to avoid the additional
2631 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2632 operands[4], operands[5]);
2633 std::swap (operands[1], operands[2]);
2635 emit_insn (gen_vec_cmp<mode><v_cmp_result> (mask, operands[3],
2636 operands[4], operands[5]));
2637 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_cmp_result> (
2638 operands[0], operands[1],
2639 operands[2], mask));
2644 (define_expand "vcondu<mode><mode>"
2645 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2646 (if_then_else:VSDQ_I_DI
2647 (match_operator 3 "comparison_operator"
2648 [(match_operand:VSDQ_I_DI 4 "register_operand")
2649 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2650 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2651 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2654 rtx mask = gen_reg_rtx (<MODE>mode);
2655 enum rtx_code code = GET_CODE (operands[3]);
2657 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2658 it as well as switch operands 1/2 in order to avoid the additional
2662 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2663 operands[4], operands[5]);
2664 std::swap (operands[1], operands[2]);
2666 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2667 operands[4], operands[5]));
2668 emit_insn (gen_vcond_mask_<mode><v_cmp_result> (operands[0], operands[1],
2669 operands[2], mask));
2673 (define_expand "vcondu<mode><v_cmp_mixed>"
2674 [(set (match_operand:VDQF 0 "register_operand")
2676 (match_operator 3 "comparison_operator"
2677 [(match_operand:<V_cmp_mixed> 4 "register_operand")
2678 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2679 (match_operand:VDQF 1 "nonmemory_operand")
2680 (match_operand:VDQF 2 "nonmemory_operand")))]
2683 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
2684 enum rtx_code code = GET_CODE (operands[3]);
2686 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2687 it as well as switch operands 1/2 in order to avoid the additional
2691 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2692 operands[4], operands[5]);
2693 std::swap (operands[1], operands[2]);
2695 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
2697 operands[4], operands[5]));
2698 emit_insn (gen_vcond_mask_<mode><v_cmp_result> (operands[0], operands[1],
2699 operands[2], mask));
2703 ;; Patterns for AArch64 SIMD Intrinsics.
2705 ;; Lane extraction with sign extension to general purpose register.
2706 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2707 [(set (match_operand:GPI 0 "register_operand" "=r")
2710 (match_operand:VDQQH 1 "register_operand" "w")
2711 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2714 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2715 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
2717 [(set_attr "type" "neon_to_gp<q>")]
2720 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
2721 [(set (match_operand:SI 0 "register_operand" "=r")
2724 (match_operand:VDQQH 1 "register_operand" "w")
2725 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2728 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2729 return "umov\\t%w0, %1.<Vetype>[%2]";
2731 [(set_attr "type" "neon_to_gp<q>")]
2734 ;; Lane extraction of a value, neither sign nor zero extension
2735 ;; is guaranteed so upper bits should be considered undefined.
2736 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
2737 (define_insn "aarch64_get_lane<mode>"
2738 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2740 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
2741 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
2744 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2745 switch (which_alternative)
2748 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2750 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2752 return "st1\\t{%1.<Vetype>}[%2], %0";
2757 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
2760 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2763 (define_insn "*aarch64_combinez<mode>"
2764 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2766 (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")
2767 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")))]
2768 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2773 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2774 (set_attr "simd" "yes,*,yes")
2775 (set_attr "fp" "*,yes,*")]
2778 (define_insn "*aarch64_combinez_be<mode>"
2779 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2781 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")
2782 (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")))]
2783 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2788 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2789 (set_attr "simd" "yes,*,yes")
2790 (set_attr "fp" "*,yes,*")]
2793 (define_expand "aarch64_combine<mode>"
2794 [(match_operand:<VDBL> 0 "register_operand")
2795 (match_operand:VDC 1 "register_operand")
2796 (match_operand:VDC 2 "register_operand")]
2800 if (BYTES_BIG_ENDIAN)
2810 emit_insn (gen_aarch64_combine_internal<mode> (operands[0], op1, op2));
2815 (define_insn_and_split "aarch64_combine_internal<mode>"
2816 [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
2817 (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
2818 (match_operand:VDC 2 "register_operand" "w")))]
2821 "&& reload_completed"
2824 if (BYTES_BIG_ENDIAN)
2825 aarch64_split_simd_combine (operands[0], operands[2], operands[1]);
2827 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
2830 [(set_attr "type" "multiple")]
2833 (define_expand "aarch64_simd_combine<mode>"
2834 [(match_operand:<VDBL> 0 "register_operand")
2835 (match_operand:VDC 1 "register_operand")
2836 (match_operand:VDC 2 "register_operand")]
2839 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
2840 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
2843 [(set_attr "type" "multiple")]
2846 ;; <su><addsub>l<q>.
2848 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
2849 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2850 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2851 (match_operand:VQW 1 "register_operand" "w")
2852 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2853 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2854 (match_operand:VQW 2 "register_operand" "w")
2857 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2858 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2861 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
2862 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2863 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2864 (match_operand:VQW 1 "register_operand" "w")
2865 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2866 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2867 (match_operand:VQW 2 "register_operand" "w")
2870 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2871 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2875 (define_expand "aarch64_saddl2<mode>"
2876 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2877 (match_operand:VQW 1 "register_operand" "w")
2878 (match_operand:VQW 2 "register_operand" "w")]
2881 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2882 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
2887 (define_expand "aarch64_uaddl2<mode>"
2888 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2889 (match_operand:VQW 1 "register_operand" "w")
2890 (match_operand:VQW 2 "register_operand" "w")]
2893 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2894 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
2899 (define_expand "aarch64_ssubl2<mode>"
2900 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2901 (match_operand:VQW 1 "register_operand" "w")
2902 (match_operand:VQW 2 "register_operand" "w")]
2905 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2906 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
2911 (define_expand "aarch64_usubl2<mode>"
2912 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2913 (match_operand:VQW 1 "register_operand" "w")
2914 (match_operand:VQW 2 "register_operand" "w")]
2917 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2918 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
2923 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
2924 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2925 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
2926 (match_operand:VD_BHSI 1 "register_operand" "w"))
2928 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2930 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2931 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2934 ;; <su><addsub>w<q>.
2936 (define_expand "widen_ssum<mode>3"
2937 [(set (match_operand:<VDBLW> 0 "register_operand" "")
2938 (plus:<VDBLW> (sign_extend:<VDBLW>
2939 (match_operand:VQW 1 "register_operand" ""))
2940 (match_operand:<VDBLW> 2 "register_operand" "")))]
2943 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
2944 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
2946 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
2948 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
2953 (define_expand "widen_ssum<mode>3"
2954 [(set (match_operand:<VWIDE> 0 "register_operand" "")
2955 (plus:<VWIDE> (sign_extend:<VWIDE>
2956 (match_operand:VD_BHSI 1 "register_operand" ""))
2957 (match_operand:<VWIDE> 2 "register_operand" "")))]
2960 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
2964 (define_expand "widen_usum<mode>3"
2965 [(set (match_operand:<VDBLW> 0 "register_operand" "")
2966 (plus:<VDBLW> (zero_extend:<VDBLW>
2967 (match_operand:VQW 1 "register_operand" ""))
2968 (match_operand:<VDBLW> 2 "register_operand" "")))]
2971 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
2972 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
2974 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
2976 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
2981 (define_expand "widen_usum<mode>3"
2982 [(set (match_operand:<VWIDE> 0 "register_operand" "")
2983 (plus:<VWIDE> (zero_extend:<VWIDE>
2984 (match_operand:VD_BHSI 1 "register_operand" ""))
2985 (match_operand:<VWIDE> 2 "register_operand" "")))]
2988 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
2992 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
2993 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2994 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2996 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2998 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
2999 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3002 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
3003 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3004 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3007 (match_operand:VQW 2 "register_operand" "w")
3008 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3010 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3011 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3014 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
3015 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3016 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3019 (match_operand:VQW 2 "register_operand" "w")
3020 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3022 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3023 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3026 (define_expand "aarch64_saddw2<mode>"
3027 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3028 (match_operand:<VWIDE> 1 "register_operand" "w")
3029 (match_operand:VQW 2 "register_operand" "w")]
3032 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3033 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3038 (define_expand "aarch64_uaddw2<mode>"
3039 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3040 (match_operand:<VWIDE> 1 "register_operand" "w")
3041 (match_operand:VQW 2 "register_operand" "w")]
3044 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3045 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3051 (define_expand "aarch64_ssubw2<mode>"
3052 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3053 (match_operand:<VWIDE> 1 "register_operand" "w")
3054 (match_operand:VQW 2 "register_operand" "w")]
3057 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3058 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3063 (define_expand "aarch64_usubw2<mode>"
3064 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3065 (match_operand:<VWIDE> 1 "register_operand" "w")
3066 (match_operand:VQW 2 "register_operand" "w")]
3069 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3070 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3075 ;; <su><r>h<addsub>.
3077 (define_insn "aarch64_<sur>h<addsub><mode>"
3078 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3079 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3080 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3083 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3084 [(set_attr "type" "neon_<addsub>_halve<q>")]
3087 ;; <r><addsub>hn<q>.
3089 (define_insn "aarch64_<sur><addsub>hn<mode>"
3090 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3091 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3092 (match_operand:VQN 2 "register_operand" "w")]
3095 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3096 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3099 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3100 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3101 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3102 (match_operand:VQN 2 "register_operand" "w")
3103 (match_operand:VQN 3 "register_operand" "w")]
3106 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3107 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3112 (define_insn "aarch64_pmul<mode>"
3113 [(set (match_operand:VB 0 "register_operand" "=w")
3114 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3115 (match_operand:VB 2 "register_operand" "w")]
3118 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3119 [(set_attr "type" "neon_mul_<Vetype><q>")]
3124 (define_insn "aarch64_fmulx<mode>"
3125 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3127 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3128 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3131 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3132 [(set_attr "type" "neon_fp_mul_<stype>")]
3135 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3137 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3138 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3140 [(match_operand:VDQSF 1 "register_operand" "w")
3141 (vec_duplicate:VDQSF
3143 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3144 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3148 operands[3] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
3149 INTVAL (operands[3])));
3150 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3152 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3155 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3157 (define_insn "*aarch64_mulx_elt<mode>"
3158 [(set (match_operand:VDQF 0 "register_operand" "=w")
3160 [(match_operand:VDQF 1 "register_operand" "w")
3163 (match_operand:VDQF 2 "register_operand" "w")
3164 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3168 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
3169 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3171 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3176 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3177 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3179 [(match_operand:VHSDF 1 "register_operand" "w")
3180 (vec_duplicate:VHSDF
3181 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3184 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3185 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3188 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3189 ;; vmulxd_lane_f64 == vmulx_lane_f64
3190 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3192 (define_insn "*aarch64_vgetfmulx<mode>"
3193 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3195 [(match_operand:<VEL> 1 "register_operand" "w")
3197 (match_operand:VDQF 2 "register_operand" "w")
3198 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3202 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
3203 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3205 [(set_attr "type" "fmul<Vetype>")]
3209 (define_insn "aarch64_<su_optab><optab><mode>"
3210 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3211 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3212 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3214 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3215 [(set_attr "type" "neon_<optab><q>")]
3218 ;; suqadd and usqadd
3220 (define_insn "aarch64_<sur>qadd<mode>"
3221 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3222 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3223 (match_operand:VSDQ_I 2 "register_operand" "w")]
3226 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3227 [(set_attr "type" "neon_qadd<q>")]
3232 (define_insn "aarch64_sqmovun<mode>"
3233 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3234 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3237 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3238 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3241 ;; sqmovn and uqmovn
3243 (define_insn "aarch64_<sur>qmovn<mode>"
3244 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3245 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3248 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3249 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3254 (define_insn "aarch64_s<optab><mode>"
3255 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3257 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3259 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3260 [(set_attr "type" "neon_<optab><q>")]
3265 (define_insn "aarch64_sq<r>dmulh<mode>"
3266 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3268 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3269 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3272 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3273 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3278 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3279 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3281 [(match_operand:VDQHS 1 "register_operand" "w")
3283 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3284 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3288 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3289 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3290 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3293 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3294 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3296 [(match_operand:VDQHS 1 "register_operand" "w")
3298 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3299 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3303 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3304 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3305 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3308 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3309 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3311 [(match_operand:SD_HSI 1 "register_operand" "w")
3313 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3314 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3318 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3319 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3320 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3323 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3324 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3326 [(match_operand:SD_HSI 1 "register_operand" "w")
3328 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3329 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3333 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3334 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3335 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3340 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3341 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3343 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3344 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3345 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3348 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3349 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3352 ;; sqrdml[as]h_lane.
3354 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3355 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3357 [(match_operand:VDQHS 1 "register_operand" "0")
3358 (match_operand:VDQHS 2 "register_operand" "w")
3360 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3361 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3365 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3367 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3369 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3372 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3373 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3375 [(match_operand:SD_HSI 1 "register_operand" "0")
3376 (match_operand:SD_HSI 2 "register_operand" "w")
3378 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3379 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3383 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3385 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3387 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3390 ;; sqrdml[as]h_laneq.
3392 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3393 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3395 [(match_operand:VDQHS 1 "register_operand" "0")
3396 (match_operand:VDQHS 2 "register_operand" "w")
3398 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3399 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3403 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3405 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3407 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3410 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3411 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3413 [(match_operand:SD_HSI 1 "register_operand" "0")
3414 (match_operand:SD_HSI 2 "register_operand" "w")
3416 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3417 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3421 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3423 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3425 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3430 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3431 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3433 (match_operand:<VWIDE> 1 "register_operand" "0")
3436 (sign_extend:<VWIDE>
3437 (match_operand:VSD_HSI 2 "register_operand" "w"))
3438 (sign_extend:<VWIDE>
3439 (match_operand:VSD_HSI 3 "register_operand" "w")))
3442 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3443 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3448 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3449 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3451 (match_operand:<VWIDE> 1 "register_operand" "0")
3454 (sign_extend:<VWIDE>
3455 (match_operand:VD_HSI 2 "register_operand" "w"))
3456 (sign_extend:<VWIDE>
3457 (vec_duplicate:VD_HSI
3459 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3460 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3465 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3467 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3469 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3472 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3473 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3475 (match_operand:<VWIDE> 1 "register_operand" "0")
3478 (sign_extend:<VWIDE>
3479 (match_operand:VD_HSI 2 "register_operand" "w"))
3480 (sign_extend:<VWIDE>
3481 (vec_duplicate:VD_HSI
3483 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3484 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3489 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3491 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3493 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3496 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3497 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3499 (match_operand:<VWIDE> 1 "register_operand" "0")
3502 (sign_extend:<VWIDE>
3503 (match_operand:SD_HSI 2 "register_operand" "w"))
3504 (sign_extend:<VWIDE>
3506 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3507 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3512 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3514 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3516 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3519 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3520 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3522 (match_operand:<VWIDE> 1 "register_operand" "0")
3525 (sign_extend:<VWIDE>
3526 (match_operand:SD_HSI 2 "register_operand" "w"))
3527 (sign_extend:<VWIDE>
3529 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3530 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3535 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3537 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3539 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3544 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3545 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3547 (match_operand:<VWIDE> 1 "register_operand" "0")
3550 (sign_extend:<VWIDE>
3551 (match_operand:VD_HSI 2 "register_operand" "w"))
3552 (sign_extend:<VWIDE>
3553 (vec_duplicate:VD_HSI
3554 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3557 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3558 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3563 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3564 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3566 (match_operand:<VWIDE> 1 "register_operand" "0")
3569 (sign_extend:<VWIDE>
3571 (match_operand:VQ_HSI 2 "register_operand" "w")
3572 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3573 (sign_extend:<VWIDE>
3575 (match_operand:VQ_HSI 3 "register_operand" "w")
3579 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3580 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3583 (define_expand "aarch64_sqdmlal2<mode>"
3584 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3585 (match_operand:<VWIDE> 1 "register_operand" "w")
3586 (match_operand:VQ_HSI 2 "register_operand" "w")
3587 (match_operand:VQ_HSI 3 "register_operand" "w")]
3590 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3591 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3592 operands[2], operands[3], p));
3596 (define_expand "aarch64_sqdmlsl2<mode>"
3597 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3598 (match_operand:<VWIDE> 1 "register_operand" "w")
3599 (match_operand:VQ_HSI 2 "register_operand" "w")
3600 (match_operand:VQ_HSI 3 "register_operand" "w")]
3603 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3604 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3605 operands[2], operands[3], p));
3611 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3612 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3614 (match_operand:<VWIDE> 1 "register_operand" "0")
3617 (sign_extend:<VWIDE>
3619 (match_operand:VQ_HSI 2 "register_operand" "w")
3620 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3621 (sign_extend:<VWIDE>
3622 (vec_duplicate:<VHALF>
3624 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3625 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3630 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3632 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3634 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3637 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3638 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3640 (match_operand:<VWIDE> 1 "register_operand" "0")
3643 (sign_extend:<VWIDE>
3645 (match_operand:VQ_HSI 2 "register_operand" "w")
3646 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3647 (sign_extend:<VWIDE>
3648 (vec_duplicate:<VHALF>
3650 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3651 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3656 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3658 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3660 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3663 (define_expand "aarch64_sqdmlal2_lane<mode>"
3664 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3665 (match_operand:<VWIDE> 1 "register_operand" "w")
3666 (match_operand:VQ_HSI 2 "register_operand" "w")
3667 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3668 (match_operand:SI 4 "immediate_operand" "i")]
3671 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3672 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3673 operands[2], operands[3],
3678 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3679 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3680 (match_operand:<VWIDE> 1 "register_operand" "w")
3681 (match_operand:VQ_HSI 2 "register_operand" "w")
3682 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3683 (match_operand:SI 4 "immediate_operand" "i")]
3686 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3687 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3688 operands[2], operands[3],
3693 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3694 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3695 (match_operand:<VWIDE> 1 "register_operand" "w")
3696 (match_operand:VQ_HSI 2 "register_operand" "w")
3697 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3698 (match_operand:SI 4 "immediate_operand" "i")]
3701 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3702 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3703 operands[2], operands[3],
3708 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3709 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3710 (match_operand:<VWIDE> 1 "register_operand" "w")
3711 (match_operand:VQ_HSI 2 "register_operand" "w")
3712 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3713 (match_operand:SI 4 "immediate_operand" "i")]
3716 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3717 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
3718 operands[2], operands[3],
3723 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
3724 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3726 (match_operand:<VWIDE> 1 "register_operand" "0")
3729 (sign_extend:<VWIDE>
3731 (match_operand:VQ_HSI 2 "register_operand" "w")
3732 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3733 (sign_extend:<VWIDE>
3734 (vec_duplicate:<VHALF>
3735 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3738 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3739 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3742 (define_expand "aarch64_sqdmlal2_n<mode>"
3743 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3744 (match_operand:<VWIDE> 1 "register_operand" "w")
3745 (match_operand:VQ_HSI 2 "register_operand" "w")
3746 (match_operand:<VEL> 3 "register_operand" "w")]
3749 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3750 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
3751 operands[2], operands[3],
3756 (define_expand "aarch64_sqdmlsl2_n<mode>"
3757 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3758 (match_operand:<VWIDE> 1 "register_operand" "w")
3759 (match_operand:VQ_HSI 2 "register_operand" "w")
3760 (match_operand:<VEL> 3 "register_operand" "w")]
3763 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3764 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
3765 operands[2], operands[3],
3772 (define_insn "aarch64_sqdmull<mode>"
3773 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3776 (sign_extend:<VWIDE>
3777 (match_operand:VSD_HSI 1 "register_operand" "w"))
3778 (sign_extend:<VWIDE>
3779 (match_operand:VSD_HSI 2 "register_operand" "w")))
3782 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3783 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
3788 (define_insn "aarch64_sqdmull_lane<mode>"
3789 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3792 (sign_extend:<VWIDE>
3793 (match_operand:VD_HSI 1 "register_operand" "w"))
3794 (sign_extend:<VWIDE>
3795 (vec_duplicate:VD_HSI
3797 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3798 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3803 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3804 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3806 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3809 (define_insn "aarch64_sqdmull_laneq<mode>"
3810 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3813 (sign_extend:<VWIDE>
3814 (match_operand:VD_HSI 1 "register_operand" "w"))
3815 (sign_extend:<VWIDE>
3816 (vec_duplicate:VD_HSI
3818 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3819 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3824 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3825 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3827 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3830 (define_insn "aarch64_sqdmull_lane<mode>"
3831 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3834 (sign_extend:<VWIDE>
3835 (match_operand:SD_HSI 1 "register_operand" "w"))
3836 (sign_extend:<VWIDE>
3838 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3839 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3844 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3845 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3847 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3850 (define_insn "aarch64_sqdmull_laneq<mode>"
3851 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3854 (sign_extend:<VWIDE>
3855 (match_operand:SD_HSI 1 "register_operand" "w"))
3856 (sign_extend:<VWIDE>
3858 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3859 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3864 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3865 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3867 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3872 (define_insn "aarch64_sqdmull_n<mode>"
3873 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3876 (sign_extend:<VWIDE>
3877 (match_operand:VD_HSI 1 "register_operand" "w"))
3878 (sign_extend:<VWIDE>
3879 (vec_duplicate:VD_HSI
3880 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3884 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3885 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3892 (define_insn "aarch64_sqdmull2<mode>_internal"
3893 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3896 (sign_extend:<VWIDE>
3898 (match_operand:VQ_HSI 1 "register_operand" "w")
3899 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3900 (sign_extend:<VWIDE>
3902 (match_operand:VQ_HSI 2 "register_operand" "w")
3907 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3908 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3911 (define_expand "aarch64_sqdmull2<mode>"
3912 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3913 (match_operand:VQ_HSI 1 "register_operand" "w")
3914 (match_operand:VQ_HSI 2 "register_operand" "w")]
3917 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3918 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
3925 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
3926 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3929 (sign_extend:<VWIDE>
3931 (match_operand:VQ_HSI 1 "register_operand" "w")
3932 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3933 (sign_extend:<VWIDE>
3934 (vec_duplicate:<VHALF>
3936 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3937 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3942 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3943 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3945 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3948 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
3949 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3952 (sign_extend:<VWIDE>
3954 (match_operand:VQ_HSI 1 "register_operand" "w")
3955 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3956 (sign_extend:<VWIDE>
3957 (vec_duplicate:<VHALF>
3959 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3960 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3965 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3966 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3968 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3971 (define_expand "aarch64_sqdmull2_lane<mode>"
3972 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3973 (match_operand:VQ_HSI 1 "register_operand" "w")
3974 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3975 (match_operand:SI 3 "immediate_operand" "i")]
3978 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3979 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
3980 operands[2], operands[3],
3985 (define_expand "aarch64_sqdmull2_laneq<mode>"
3986 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3987 (match_operand:VQ_HSI 1 "register_operand" "w")
3988 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3989 (match_operand:SI 3 "immediate_operand" "i")]
3992 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3993 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
3994 operands[2], operands[3],
4001 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4002 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4005 (sign_extend:<VWIDE>
4007 (match_operand:VQ_HSI 1 "register_operand" "w")
4008 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4009 (sign_extend:<VWIDE>
4010 (vec_duplicate:<VHALF>
4011 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4015 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4016 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4019 (define_expand "aarch64_sqdmull2_n<mode>"
4020 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4021 (match_operand:VQ_HSI 1 "register_operand" "w")
4022 (match_operand:<VEL> 2 "register_operand" "w")]
4025 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
4026 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4033 (define_insn "aarch64_<sur>shl<mode>"
4034 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4036 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4037 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4040 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4041 [(set_attr "type" "neon_shift_reg<q>")]
4047 (define_insn "aarch64_<sur>q<r>shl<mode>"
4048 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4050 [(match_operand:VSDQ_I 1 "register_operand" "w")
4051 (match_operand:VSDQ_I 2 "register_operand" "w")]
4054 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4055 [(set_attr "type" "neon_sat_shift_reg<q>")]
4060 (define_insn "aarch64_<sur>shll_n<mode>"
4061 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4062 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4064 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4068 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4069 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4071 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4073 [(set_attr "type" "neon_shift_imm_long")]
4078 (define_insn "aarch64_<sur>shll2_n<mode>"
4079 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4080 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4081 (match_operand:SI 2 "immediate_operand" "i")]
4085 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4086 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4088 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4090 [(set_attr "type" "neon_shift_imm_long")]
4095 (define_insn "aarch64_<sur>shr_n<mode>"
4096 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4097 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4099 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4102 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4103 [(set_attr "type" "neon_sat_shift_imm<q>")]
4108 (define_insn "aarch64_<sur>sra_n<mode>"
4109 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4110 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4111 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4113 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4116 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4117 [(set_attr "type" "neon_shift_acc<q>")]
4122 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4123 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4124 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4125 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4127 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4130 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4131 [(set_attr "type" "neon_shift_imm<q>")]
4136 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4137 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4138 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4140 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4143 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4144 [(set_attr "type" "neon_sat_shift_imm<q>")]
4150 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4151 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4152 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4154 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4157 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4158 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4162 ;; cm(eq|ge|gt|lt|le)
4163 ;; Note, we have constraints for Dz and Z as different expanders
4164 ;; have different ideas of what should be passed to this pattern.
4166 (define_insn "aarch64_cm<optab><mode>"
4167 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
4169 (COMPARISONS:<V_cmp_result>
4170 (match_operand:VDQ_I 1 "register_operand" "w,w")
4171 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4175 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4176 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4177 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4180 (define_insn_and_split "aarch64_cm<optab>di"
4181 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4184 (match_operand:DI 1 "register_operand" "w,w,r")
4185 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4187 (clobber (reg:CC CC_REGNUM))]
4191 [(set (match_operand:DI 0 "register_operand")
4194 (match_operand:DI 1 "register_operand")
4195 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4198 /* If we are in the general purpose register file,
4199 we split to a sequence of comparison and store. */
4200 if (GP_REGNUM_P (REGNO (operands[0]))
4201 && GP_REGNUM_P (REGNO (operands[1])))
4203 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4204 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4205 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4206 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4209 /* Otherwise, we expand to a similar pattern which does not
4210 clobber CC_REGNUM. */
4212 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4215 (define_insn "*aarch64_cm<optab>di"
4216 [(set (match_operand:DI 0 "register_operand" "=w,w")
4219 (match_operand:DI 1 "register_operand" "w,w")
4220 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4222 "TARGET_SIMD && reload_completed"
4224 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4225 cm<optab>\t%d0, %d1, #0"
4226 [(set_attr "type" "neon_compare, neon_compare_zero")]
4231 (define_insn "aarch64_cm<optab><mode>"
4232 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4234 (UCOMPARISONS:<V_cmp_result>
4235 (match_operand:VDQ_I 1 "register_operand" "w")
4236 (match_operand:VDQ_I 2 "register_operand" "w")
4239 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4240 [(set_attr "type" "neon_compare<q>")]
4243 (define_insn_and_split "aarch64_cm<optab>di"
4244 [(set (match_operand:DI 0 "register_operand" "=w,r")
4247 (match_operand:DI 1 "register_operand" "w,r")
4248 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4250 (clobber (reg:CC CC_REGNUM))]
4254 [(set (match_operand:DI 0 "register_operand")
4257 (match_operand:DI 1 "register_operand")
4258 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4261 /* If we are in the general purpose register file,
4262 we split to a sequence of comparison and store. */
4263 if (GP_REGNUM_P (REGNO (operands[0]))
4264 && GP_REGNUM_P (REGNO (operands[1])))
4266 machine_mode mode = CCmode;
4267 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4268 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4269 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4272 /* Otherwise, we expand to a similar pattern which does not
4273 clobber CC_REGNUM. */
4275 [(set_attr "type" "neon_compare,multiple")]
4278 (define_insn "*aarch64_cm<optab>di"
4279 [(set (match_operand:DI 0 "register_operand" "=w")
4282 (match_operand:DI 1 "register_operand" "w")
4283 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4285 "TARGET_SIMD && reload_completed"
4286 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4287 [(set_attr "type" "neon_compare")]
4292 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4293 ;; we don't have any insns using ne, and aarch64_vcond outputs
4294 ;; not (neg (eq (and x y) 0))
4295 ;; which is rewritten by simplify_rtx as
4296 ;; plus (eq (and x y) 0) -1.
4298 (define_insn "aarch64_cmtst<mode>"
4299 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4300 (plus:<V_cmp_result>
4303 (match_operand:VDQ_I 1 "register_operand" "w")
4304 (match_operand:VDQ_I 2 "register_operand" "w"))
4305 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4306 (match_operand:<V_cmp_result> 4 "aarch64_simd_imm_minus_one")))
4309 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4310 [(set_attr "type" "neon_tst<q>")]
4313 (define_insn_and_split "aarch64_cmtstdi"
4314 [(set (match_operand:DI 0 "register_operand" "=w,r")
4318 (match_operand:DI 1 "register_operand" "w,r")
4319 (match_operand:DI 2 "register_operand" "w,r"))
4321 (clobber (reg:CC CC_REGNUM))]
4325 [(set (match_operand:DI 0 "register_operand")
4329 (match_operand:DI 1 "register_operand")
4330 (match_operand:DI 2 "register_operand"))
4333 /* If we are in the general purpose register file,
4334 we split to a sequence of comparison and store. */
4335 if (GP_REGNUM_P (REGNO (operands[0]))
4336 && GP_REGNUM_P (REGNO (operands[1])))
4338 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4339 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4340 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4341 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4342 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4345 /* Otherwise, we expand to a similar pattern which does not
4346 clobber CC_REGNUM. */
4348 [(set_attr "type" "neon_tst,multiple")]
4351 (define_insn "*aarch64_cmtstdi"
4352 [(set (match_operand:DI 0 "register_operand" "=w")
4356 (match_operand:DI 1 "register_operand" "w")
4357 (match_operand:DI 2 "register_operand" "w"))
4360 "cmtst\t%d0, %d1, %d2"
4361 [(set_attr "type" "neon_tst")]
4364 ;; fcm(eq|ge|gt|le|lt)
4366 (define_insn "aarch64_cm<optab><mode>"
4367 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
4369 (COMPARISONS:<V_cmp_result>
4370 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4371 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4375 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4376 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4377 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4381 ;; Note we can also handle what would be fac(le|lt) by
4382 ;; generating fac(ge|gt).
4384 (define_insn "aarch64_fac<optab><mode>"
4385 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
4387 (FAC_COMPARISONS:<V_cmp_result>
4389 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4391 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4394 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4395 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4400 (define_insn "aarch64_addp<mode>"
4401 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4403 [(match_operand:VD_BHSI 1 "register_operand" "w")
4404 (match_operand:VD_BHSI 2 "register_operand" "w")]
4407 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4408 [(set_attr "type" "neon_reduc_add<q>")]
4411 (define_insn "aarch64_addpdi"
4412 [(set (match_operand:DI 0 "register_operand" "=w")
4414 [(match_operand:V2DI 1 "register_operand" "w")]
4418 [(set_attr "type" "neon_reduc_add")]
4423 (define_expand "sqrt<mode>2"
4424 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4425 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4428 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4432 (define_insn "*sqrt<mode>2"
4433 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4434 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4436 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4437 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4440 ;; Patterns for vector struct loads and stores.
4442 (define_insn "aarch64_simd_ld2<mode>"
4443 [(set (match_operand:OI 0 "register_operand" "=w")
4444 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4445 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4448 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4449 [(set_attr "type" "neon_load2_2reg<q>")]
4452 (define_insn "aarch64_simd_ld2r<mode>"
4453 [(set (match_operand:OI 0 "register_operand" "=w")
4454 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4455 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4458 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4459 [(set_attr "type" "neon_load2_all_lanes<q>")]
4462 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4463 [(set (match_operand:OI 0 "register_operand" "=w")
4464 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4465 (match_operand:OI 2 "register_operand" "0")
4466 (match_operand:SI 3 "immediate_operand" "i")
4467 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4471 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4472 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4474 [(set_attr "type" "neon_load2_one_lane")]
4477 (define_expand "vec_load_lanesoi<mode>"
4478 [(set (match_operand:OI 0 "register_operand" "=w")
4479 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4480 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4484 if (BYTES_BIG_ENDIAN)
4486 rtx tmp = gen_reg_rtx (OImode);
4487 rtx mask = aarch64_reverse_mask (<MODE>mode);
4488 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4489 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4492 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4496 (define_insn "aarch64_simd_st2<mode>"
4497 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4498 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4499 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4502 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4503 [(set_attr "type" "neon_store2_2reg<q>")]
4506 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4507 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4508 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4509 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4510 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4511 (match_operand:SI 2 "immediate_operand" "i")]
4515 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4516 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4518 [(set_attr "type" "neon_store2_one_lane<q>")]
4521 (define_expand "vec_store_lanesoi<mode>"
4522 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4523 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4524 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4528 if (BYTES_BIG_ENDIAN)
4530 rtx tmp = gen_reg_rtx (OImode);
4531 rtx mask = aarch64_reverse_mask (<MODE>mode);
4532 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4533 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4536 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4540 (define_insn "aarch64_simd_ld3<mode>"
4541 [(set (match_operand:CI 0 "register_operand" "=w")
4542 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4543 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4546 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4547 [(set_attr "type" "neon_load3_3reg<q>")]
4550 (define_insn "aarch64_simd_ld3r<mode>"
4551 [(set (match_operand:CI 0 "register_operand" "=w")
4552 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4553 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4556 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4557 [(set_attr "type" "neon_load3_all_lanes<q>")]
4560 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4561 [(set (match_operand:CI 0 "register_operand" "=w")
4562 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4563 (match_operand:CI 2 "register_operand" "0")
4564 (match_operand:SI 3 "immediate_operand" "i")
4565 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4569 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4570 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4572 [(set_attr "type" "neon_load3_one_lane")]
4575 (define_expand "vec_load_lanesci<mode>"
4576 [(set (match_operand:CI 0 "register_operand" "=w")
4577 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4578 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4582 if (BYTES_BIG_ENDIAN)
4584 rtx tmp = gen_reg_rtx (CImode);
4585 rtx mask = aarch64_reverse_mask (<MODE>mode);
4586 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4587 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4590 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4594 (define_insn "aarch64_simd_st3<mode>"
4595 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4596 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4597 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4600 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4601 [(set_attr "type" "neon_store3_3reg<q>")]
4604 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4605 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4606 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4607 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4608 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4609 (match_operand:SI 2 "immediate_operand" "i")]
4613 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4614 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4616 [(set_attr "type" "neon_store3_one_lane<q>")]
4619 (define_expand "vec_store_lanesci<mode>"
4620 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4621 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4622 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4626 if (BYTES_BIG_ENDIAN)
4628 rtx tmp = gen_reg_rtx (CImode);
4629 rtx mask = aarch64_reverse_mask (<MODE>mode);
4630 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4631 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4634 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4638 (define_insn "aarch64_simd_ld4<mode>"
4639 [(set (match_operand:XI 0 "register_operand" "=w")
4640 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4641 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4644 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4645 [(set_attr "type" "neon_load4_4reg<q>")]
4648 (define_insn "aarch64_simd_ld4r<mode>"
4649 [(set (match_operand:XI 0 "register_operand" "=w")
4650 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4651 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4654 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4655 [(set_attr "type" "neon_load4_all_lanes<q>")]
4658 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4659 [(set (match_operand:XI 0 "register_operand" "=w")
4660 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4661 (match_operand:XI 2 "register_operand" "0")
4662 (match_operand:SI 3 "immediate_operand" "i")
4663 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4667 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4668 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4670 [(set_attr "type" "neon_load4_one_lane")]
4673 (define_expand "vec_load_lanesxi<mode>"
4674 [(set (match_operand:XI 0 "register_operand" "=w")
4675 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4676 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4680 if (BYTES_BIG_ENDIAN)
4682 rtx tmp = gen_reg_rtx (XImode);
4683 rtx mask = aarch64_reverse_mask (<MODE>mode);
4684 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4685 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4688 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4692 (define_insn "aarch64_simd_st4<mode>"
4693 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4694 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4695 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4698 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4699 [(set_attr "type" "neon_store4_4reg<q>")]
4702 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4703 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4704 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4705 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4706 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4707 (match_operand:SI 2 "immediate_operand" "i")]
4711 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4712 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
4714 [(set_attr "type" "neon_store4_one_lane<q>")]
4717 (define_expand "vec_store_lanesxi<mode>"
4718 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4719 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4720 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4724 if (BYTES_BIG_ENDIAN)
4726 rtx tmp = gen_reg_rtx (XImode);
4727 rtx mask = aarch64_reverse_mask (<MODE>mode);
4728 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
4729 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
4732 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
4736 (define_insn_and_split "aarch64_rev_reglist<mode>"
4737 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
4739 [(match_operand:VSTRUCT 1 "register_operand" "w")
4740 (match_operand:V16QI 2 "register_operand" "w")]
4741 UNSPEC_REV_REGLIST))]
4744 "&& reload_completed"
4748 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
4749 for (i = 0; i < nregs; i++)
4751 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
4752 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
4753 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
4757 [(set_attr "type" "neon_tbl1_q")
4758 (set_attr "length" "<insn_count>")]
4761 ;; Reload patterns for AdvSIMD register list operands.
4763 (define_expand "mov<mode>"
4764 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
4765 (match_operand:VSTRUCT 1 "general_operand" ""))]
4768 if (can_create_pseudo_p ())
4770 if (GET_CODE (operands[0]) != REG)
4771 operands[1] = force_reg (<MODE>mode, operands[1]);
4775 (define_insn "*aarch64_mov<mode>"
4776 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
4777 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
4778 "TARGET_SIMD && !BYTES_BIG_ENDIAN
4779 && (register_operand (operands[0], <MODE>mode)
4780 || register_operand (operands[1], <MODE>mode))"
4783 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
4784 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
4785 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
4786 neon_load<nregs>_<nregs>reg_q")
4787 (set_attr "length" "<insn_count>,4,4")]
4790 (define_insn "aarch64_be_ld1<mode>"
4791 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
4792 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
4793 "aarch64_simd_struct_operand" "Utv")]
4796 "ld1\\t{%0<Vmtype>}, %1"
4797 [(set_attr "type" "neon_load1_1reg<q>")]
4800 (define_insn "aarch64_be_st1<mode>"
4801 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
4802 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
4805 "st1\\t{%1<Vmtype>}, %0"
4806 [(set_attr "type" "neon_store1_1reg<q>")]
4809 (define_insn "*aarch64_be_movoi"
4810 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
4811 (match_operand:OI 1 "general_operand" " w,w,m"))]
4812 "TARGET_SIMD && BYTES_BIG_ENDIAN
4813 && (register_operand (operands[0], OImode)
4814 || register_operand (operands[1], OImode))"
4819 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
4820 (set_attr "length" "8,4,4")]
4823 (define_insn "*aarch64_be_movci"
4824 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
4825 (match_operand:CI 1 "general_operand" " w,w,o"))]
4826 "TARGET_SIMD && BYTES_BIG_ENDIAN
4827 && (register_operand (operands[0], CImode)
4828 || register_operand (operands[1], CImode))"
4830 [(set_attr "type" "multiple")
4831 (set_attr "length" "12,4,4")]
4834 (define_insn "*aarch64_be_movxi"
4835 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
4836 (match_operand:XI 1 "general_operand" " w,w,o"))]
4837 "TARGET_SIMD && BYTES_BIG_ENDIAN
4838 && (register_operand (operands[0], XImode)
4839 || register_operand (operands[1], XImode))"
4841 [(set_attr "type" "multiple")
4842 (set_attr "length" "16,4,4")]
4846 [(set (match_operand:OI 0 "register_operand")
4847 (match_operand:OI 1 "register_operand"))]
4848 "TARGET_SIMD && reload_completed"
4851 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
4856 [(set (match_operand:CI 0 "nonimmediate_operand")
4857 (match_operand:CI 1 "general_operand"))]
4858 "TARGET_SIMD && reload_completed"
4861 if (register_operand (operands[0], CImode)
4862 && register_operand (operands[1], CImode))
4864 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
4867 else if (BYTES_BIG_ENDIAN)
4869 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
4870 simplify_gen_subreg (OImode, operands[1], CImode, 0));
4871 emit_move_insn (gen_lowpart (V16QImode,
4872 simplify_gen_subreg (TImode, operands[0],
4874 gen_lowpart (V16QImode,
4875 simplify_gen_subreg (TImode, operands[1],
4884 [(set (match_operand:XI 0 "nonimmediate_operand")
4885 (match_operand:XI 1 "general_operand"))]
4886 "TARGET_SIMD && reload_completed"
4889 if (register_operand (operands[0], XImode)
4890 && register_operand (operands[1], XImode))
4892 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
4895 else if (BYTES_BIG_ENDIAN)
4897 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
4898 simplify_gen_subreg (OImode, operands[1], XImode, 0));
4899 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
4900 simplify_gen_subreg (OImode, operands[1], XImode, 32));
4907 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
4908 [(match_operand:VSTRUCT 0 "register_operand" "=w")
4909 (match_operand:DI 1 "register_operand" "w")
4910 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4913 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
4914 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
4917 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
4922 (define_insn "aarch64_ld2<mode>_dreg_le"
4923 [(set (match_operand:OI 0 "register_operand" "=w")
4928 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4930 (vec_duplicate:VD (const_int 0)))
4932 (unspec:VD [(match_dup 1)]
4934 (vec_duplicate:VD (const_int 0)))) 0))]
4935 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4936 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4937 [(set_attr "type" "neon_load2_2reg<q>")]
4940 (define_insn "aarch64_ld2<mode>_dreg_be"
4941 [(set (match_operand:OI 0 "register_operand" "=w")
4945 (vec_duplicate:VD (const_int 0))
4947 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4950 (vec_duplicate:VD (const_int 0))
4951 (unspec:VD [(match_dup 1)]
4953 "TARGET_SIMD && BYTES_BIG_ENDIAN"
4954 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4955 [(set_attr "type" "neon_load2_2reg<q>")]
4958 (define_insn "aarch64_ld2<mode>_dreg_le"
4959 [(set (match_operand:OI 0 "register_operand" "=w")
4964 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4968 (unspec:DX [(match_dup 1)]
4970 (const_int 0))) 0))]
4971 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4972 "ld1\\t{%S0.1d - %T0.1d}, %1"
4973 [(set_attr "type" "neon_load1_2reg<q>")]
4976 (define_insn "aarch64_ld2<mode>_dreg_be"
4977 [(set (match_operand:OI 0 "register_operand" "=w")
4983 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
4987 (unspec:DX [(match_dup 1)]
4989 "TARGET_SIMD && BYTES_BIG_ENDIAN"
4990 "ld1\\t{%S0.1d - %T0.1d}, %1"
4991 [(set_attr "type" "neon_load1_2reg<q>")]
4994 (define_insn "aarch64_ld3<mode>_dreg_le"
4995 [(set (match_operand:CI 0 "register_operand" "=w")
5001 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5003 (vec_duplicate:VD (const_int 0)))
5005 (unspec:VD [(match_dup 1)]
5007 (vec_duplicate:VD (const_int 0))))
5009 (unspec:VD [(match_dup 1)]
5011 (vec_duplicate:VD (const_int 0)))) 0))]
5012 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5013 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5014 [(set_attr "type" "neon_load3_3reg<q>")]
5017 (define_insn "aarch64_ld3<mode>_dreg_be"
5018 [(set (match_operand:CI 0 "register_operand" "=w")
5023 (vec_duplicate:VD (const_int 0))
5025 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5028 (vec_duplicate:VD (const_int 0))
5029 (unspec:VD [(match_dup 1)]
5032 (vec_duplicate:VD (const_int 0))
5033 (unspec:VD [(match_dup 1)]
5035 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5036 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5037 [(set_attr "type" "neon_load3_3reg<q>")]
5040 (define_insn "aarch64_ld3<mode>_dreg_le"
5041 [(set (match_operand:CI 0 "register_operand" "=w")
5047 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5051 (unspec:DX [(match_dup 1)]
5055 (unspec:DX [(match_dup 1)]
5057 (const_int 0))) 0))]
5058 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5059 "ld1\\t{%S0.1d - %U0.1d}, %1"
5060 [(set_attr "type" "neon_load1_3reg<q>")]
5063 (define_insn "aarch64_ld3<mode>_dreg_be"
5064 [(set (match_operand:CI 0 "register_operand" "=w")
5071 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5075 (unspec:DX [(match_dup 1)]
5079 (unspec:DX [(match_dup 1)]
5081 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5082 "ld1\\t{%S0.1d - %U0.1d}, %1"
5083 [(set_attr "type" "neon_load1_3reg<q>")]
5086 (define_insn "aarch64_ld4<mode>_dreg_le"
5087 [(set (match_operand:XI 0 "register_operand" "=w")
5093 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5095 (vec_duplicate:VD (const_int 0)))
5097 (unspec:VD [(match_dup 1)]
5099 (vec_duplicate:VD (const_int 0))))
5102 (unspec:VD [(match_dup 1)]
5104 (vec_duplicate:VD (const_int 0)))
5106 (unspec:VD [(match_dup 1)]
5108 (vec_duplicate:VD (const_int 0))))) 0))]
5109 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5110 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5111 [(set_attr "type" "neon_load4_4reg<q>")]
5114 (define_insn "aarch64_ld4<mode>_dreg_be"
5115 [(set (match_operand:XI 0 "register_operand" "=w")
5120 (vec_duplicate:VD (const_int 0))
5122 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5125 (vec_duplicate:VD (const_int 0))
5126 (unspec:VD [(match_dup 1)]
5130 (vec_duplicate:VD (const_int 0))
5131 (unspec:VD [(match_dup 1)]
5134 (vec_duplicate:VD (const_int 0))
5135 (unspec:VD [(match_dup 1)]
5137 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5138 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5139 [(set_attr "type" "neon_load4_4reg<q>")]
5142 (define_insn "aarch64_ld4<mode>_dreg_le"
5143 [(set (match_operand:XI 0 "register_operand" "=w")
5149 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5153 (unspec:DX [(match_dup 1)]
5158 (unspec:DX [(match_dup 1)]
5162 (unspec:DX [(match_dup 1)]
5164 (const_int 0)))) 0))]
5165 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5166 "ld1\\t{%S0.1d - %V0.1d}, %1"
5167 [(set_attr "type" "neon_load1_4reg<q>")]
5170 (define_insn "aarch64_ld4<mode>_dreg_be"
5171 [(set (match_operand:XI 0 "register_operand" "=w")
5178 [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
5182 (unspec:DX [(match_dup 1)]
5187 (unspec:DX [(match_dup 1)]
5191 (unspec:DX [(match_dup 1)]
5193 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5194 "ld1\\t{%S0.1d - %V0.1d}, %1"
5195 [(set_attr "type" "neon_load1_4reg<q>")]
5198 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5199 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5200 (match_operand:DI 1 "register_operand" "r")
5201 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5204 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5205 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5207 if (BYTES_BIG_ENDIAN)
5208 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg_be (operands[0],
5211 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg_le (operands[0],
5216 (define_expand "aarch64_ld1<VALL_F16:mode>"
5217 [(match_operand:VALL_F16 0 "register_operand")
5218 (match_operand:DI 1 "register_operand")]
5221 machine_mode mode = <VALL_F16:MODE>mode;
5222 rtx mem = gen_rtx_MEM (mode, operands[1]);
5224 if (BYTES_BIG_ENDIAN)
5225 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5227 emit_move_insn (operands[0], mem);
5231 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5232 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5233 (match_operand:DI 1 "register_operand" "r")
5234 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5237 machine_mode mode = <VSTRUCT:MODE>mode;
5238 rtx mem = gen_rtx_MEM (mode, operands[1]);
5240 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5244 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5245 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5246 (match_operand:DI 1 "register_operand" "w")
5247 (match_operand:VSTRUCT 2 "register_operand" "0")
5248 (match_operand:SI 3 "immediate_operand" "i")
5249 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5252 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5253 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5256 aarch64_simd_lane_bounds (operands[3], 0,
5257 GET_MODE_NUNITS (<VALLDIF:MODE>mode),
5259 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5260 operands[0], mem, operands[2], operands[3]));
5264 ;; Expanders for builtins to extract vector registers from large
5265 ;; opaque integer modes.
5269 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5270 [(match_operand:VDC 0 "register_operand" "=w")
5271 (match_operand:VSTRUCT 1 "register_operand" "w")
5272 (match_operand:SI 2 "immediate_operand" "i")]
5275 int part = INTVAL (operands[2]);
5276 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5277 int offset = part * 16;
5279 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5280 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5286 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5287 [(match_operand:VQ 0 "register_operand" "=w")
5288 (match_operand:VSTRUCT 1 "register_operand" "w")
5289 (match_operand:SI 2 "immediate_operand" "i")]
5292 int part = INTVAL (operands[2]);
5293 int offset = part * 16;
5295 emit_move_insn (operands[0],
5296 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5300 ;; Permuted-store expanders for neon intrinsics.
5302 ;; Permute instructions
5306 (define_expand "vec_perm_const<mode>"
5307 [(match_operand:VALL_F16 0 "register_operand")
5308 (match_operand:VALL_F16 1 "register_operand")
5309 (match_operand:VALL_F16 2 "register_operand")
5310 (match_operand:<V_cmp_result> 3)]
5313 if (aarch64_expand_vec_perm_const (operands[0], operands[1],
5314 operands[2], operands[3]))
5320 (define_expand "vec_perm<mode>"
5321 [(match_operand:VB 0 "register_operand")
5322 (match_operand:VB 1 "register_operand")
5323 (match_operand:VB 2 "register_operand")
5324 (match_operand:VB 3 "register_operand")]
5327 aarch64_expand_vec_perm (operands[0], operands[1],
5328 operands[2], operands[3]);
5332 (define_insn "aarch64_tbl1<mode>"
5333 [(set (match_operand:VB 0 "register_operand" "=w")
5334 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5335 (match_operand:VB 2 "register_operand" "w")]
5338 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5339 [(set_attr "type" "neon_tbl1<q>")]
5342 ;; Two source registers.
5344 (define_insn "aarch64_tbl2v16qi"
5345 [(set (match_operand:V16QI 0 "register_operand" "=w")
5346 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5347 (match_operand:V16QI 2 "register_operand" "w")]
5350 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5351 [(set_attr "type" "neon_tbl2_q")]
5354 (define_insn "aarch64_tbl3<mode>"
5355 [(set (match_operand:VB 0 "register_operand" "=w")
5356 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5357 (match_operand:VB 2 "register_operand" "w")]
5360 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5361 [(set_attr "type" "neon_tbl3")]
5364 (define_insn "aarch64_tbx4<mode>"
5365 [(set (match_operand:VB 0 "register_operand" "=w")
5366 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5367 (match_operand:OI 2 "register_operand" "w")
5368 (match_operand:VB 3 "register_operand" "w")]
5371 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5372 [(set_attr "type" "neon_tbl4")]
5375 ;; Three source registers.
5377 (define_insn "aarch64_qtbl3<mode>"
5378 [(set (match_operand:VB 0 "register_operand" "=w")
5379 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5380 (match_operand:VB 2 "register_operand" "w")]
5383 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5384 [(set_attr "type" "neon_tbl3")]
5387 (define_insn "aarch64_qtbx3<mode>"
5388 [(set (match_operand:VB 0 "register_operand" "=w")
5389 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5390 (match_operand:CI 2 "register_operand" "w")
5391 (match_operand:VB 3 "register_operand" "w")]
5394 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5395 [(set_attr "type" "neon_tbl3")]
5398 ;; Four source registers.
5400 (define_insn "aarch64_qtbl4<mode>"
5401 [(set (match_operand:VB 0 "register_operand" "=w")
5402 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5403 (match_operand:VB 2 "register_operand" "w")]
5406 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5407 [(set_attr "type" "neon_tbl4")]
5410 (define_insn "aarch64_qtbx4<mode>"
5411 [(set (match_operand:VB 0 "register_operand" "=w")
5412 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5413 (match_operand:XI 2 "register_operand" "w")
5414 (match_operand:VB 3 "register_operand" "w")]
5417 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5418 [(set_attr "type" "neon_tbl4")]
5421 (define_insn_and_split "aarch64_combinev16qi"
5422 [(set (match_operand:OI 0 "register_operand" "=w")
5423 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5424 (match_operand:V16QI 2 "register_operand" "w")]
5428 "&& reload_completed"
5431 aarch64_split_combinev16qi (operands);
5434 [(set_attr "type" "multiple")]
5437 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5438 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5439 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5440 (match_operand:VALL_F16 2 "register_operand" "w")]
5443 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5444 [(set_attr "type" "neon_permute<q>")]
5447 ;; Note immediate (third) operand is lane index not byte index.
5448 (define_insn "aarch64_ext<mode>"
5449 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5450 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5451 (match_operand:VALL_F16 2 "register_operand" "w")
5452 (match_operand:SI 3 "immediate_operand" "i")]
5456 operands[3] = GEN_INT (INTVAL (operands[3])
5457 * GET_MODE_UNIT_SIZE (<MODE>mode));
5458 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5460 [(set_attr "type" "neon_ext<q>")]
5463 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5464 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5465 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5468 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5469 [(set_attr "type" "neon_rev<q>")]
5472 (define_insn "aarch64_st2<mode>_dreg"
5473 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5474 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5475 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5478 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5479 [(set_attr "type" "neon_store2_2reg")]
5482 (define_insn "aarch64_st2<mode>_dreg"
5483 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5484 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5485 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5488 "st1\\t{%S1.1d - %T1.1d}, %0"
5489 [(set_attr "type" "neon_store1_2reg")]
5492 (define_insn "aarch64_st3<mode>_dreg"
5493 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5494 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5495 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5498 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5499 [(set_attr "type" "neon_store3_3reg")]
5502 (define_insn "aarch64_st3<mode>_dreg"
5503 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5504 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5505 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5508 "st1\\t{%S1.1d - %U1.1d}, %0"
5509 [(set_attr "type" "neon_store1_3reg")]
5512 (define_insn "aarch64_st4<mode>_dreg"
5513 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5514 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5515 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5518 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5519 [(set_attr "type" "neon_store4_4reg")]
5522 (define_insn "aarch64_st4<mode>_dreg"
5523 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5524 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5525 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5528 "st1\\t{%S1.1d - %V1.1d}, %0"
5529 [(set_attr "type" "neon_store1_4reg")]
5532 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5533 [(match_operand:DI 0 "register_operand" "r")
5534 (match_operand:VSTRUCT 1 "register_operand" "w")
5535 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5538 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5539 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5541 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5545 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5546 [(match_operand:DI 0 "register_operand" "r")
5547 (match_operand:VSTRUCT 1 "register_operand" "w")
5548 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5551 machine_mode mode = <VSTRUCT:MODE>mode;
5552 rtx mem = gen_rtx_MEM (mode, operands[0]);
5554 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5558 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5559 [(match_operand:DI 0 "register_operand" "r")
5560 (match_operand:VSTRUCT 1 "register_operand" "w")
5561 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5562 (match_operand:SI 2 "immediate_operand")]
5565 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5566 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5569 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5570 mem, operands[1], operands[2]));
5574 (define_expand "aarch64_st1<VALL_F16:mode>"
5575 [(match_operand:DI 0 "register_operand")
5576 (match_operand:VALL_F16 1 "register_operand")]
5579 machine_mode mode = <VALL_F16:MODE>mode;
5580 rtx mem = gen_rtx_MEM (mode, operands[0]);
5582 if (BYTES_BIG_ENDIAN)
5583 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5585 emit_move_insn (mem, operands[1]);
5589 ;; Expander for builtins to insert vector registers into large
5590 ;; opaque integer modes.
5592 ;; Q-register list. We don't need a D-reg inserter as we zero
5593 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5595 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5596 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5597 (match_operand:VSTRUCT 1 "register_operand" "0")
5598 (match_operand:VQ 2 "register_operand" "w")
5599 (match_operand:SI 3 "immediate_operand" "i")]
5602 int part = INTVAL (operands[3]);
5603 int offset = part * 16;
5605 emit_move_insn (operands[0], operands[1]);
5606 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5611 ;; Standard pattern name vec_init<mode>.
5613 (define_expand "vec_init<mode>"
5614 [(match_operand:VALL_F16 0 "register_operand" "")
5615 (match_operand 1 "" "")]
5618 aarch64_expand_vector_init (operands[0], operands[1]);
5622 (define_insn "*aarch64_simd_ld1r<mode>"
5623 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5624 (vec_duplicate:VALL_F16
5625 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5627 "ld1r\\t{%0.<Vtype>}, %1"
5628 [(set_attr "type" "neon_load1_all_lanes")]
5631 (define_insn "aarch64_frecpe<mode>"
5632 [(set (match_operand:VHSDF 0 "register_operand" "=w")
5633 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5636 "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5637 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5640 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5641 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5642 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5645 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5646 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5649 (define_insn "aarch64_frecps<mode>"
5650 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5652 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5653 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5656 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5657 [(set_attr "type" "neon_fp_recps_<stype><q>")]
5660 (define_insn "aarch64_urecpe<mode>"
5661 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5662 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5665 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5666 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5668 ;; Standard pattern name vec_extract<mode>.
5670 (define_expand "vec_extract<mode>"
5671 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5672 (match_operand:VALL_F16 1 "register_operand" "")
5673 (match_operand:SI 2 "immediate_operand" "")]
5677 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5683 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5684 [(set (match_operand:V16QI 0 "register_operand" "=w")
5685 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5686 (match_operand:V16QI 2 "register_operand" "w")]
5688 "TARGET_SIMD && TARGET_CRYPTO"
5689 "aes<aes_op>\\t%0.16b, %2.16b"
5690 [(set_attr "type" "crypto_aese")]
5693 ;; When AES/AESMC fusion is enabled we want the register allocation to
5697 ;; So prefer to tie operand 1 to operand 0 when fusing.
5699 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5700 [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5701 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5703 "TARGET_SIMD && TARGET_CRYPTO"
5704 "aes<aesmc_op>\\t%0.16b, %1.16b"
5705 [(set_attr "type" "crypto_aesmc")
5706 (set_attr_alternative "enabled"
5707 [(if_then_else (match_test
5708 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5709 (const_string "yes" )
5710 (const_string "no"))
5711 (const_string "yes")])]
5716 (define_insn "aarch64_crypto_sha1hsi"
5717 [(set (match_operand:SI 0 "register_operand" "=w")
5718 (unspec:SI [(match_operand:SI 1
5719 "register_operand" "w")]
5721 "TARGET_SIMD && TARGET_CRYPTO"
5723 [(set_attr "type" "crypto_sha1_fast")]
5726 (define_insn "aarch64_crypto_sha1hv4si"
5727 [(set (match_operand:SI 0 "register_operand" "=w")
5728 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5729 (parallel [(const_int 0)]))]
5731 "TARGET_SIMD && TARGET_CRYPTO && !BYTES_BIG_ENDIAN"
5733 [(set_attr "type" "crypto_sha1_fast")]
5736 (define_insn "aarch64_be_crypto_sha1hv4si"
5737 [(set (match_operand:SI 0 "register_operand" "=w")
5738 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5739 (parallel [(const_int 3)]))]
5741 "TARGET_SIMD && TARGET_CRYPTO && BYTES_BIG_ENDIAN"
5743 [(set_attr "type" "crypto_sha1_fast")]
5746 (define_insn "aarch64_crypto_sha1su1v4si"
5747 [(set (match_operand:V4SI 0 "register_operand" "=w")
5748 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5749 (match_operand:V4SI 2 "register_operand" "w")]
5751 "TARGET_SIMD && TARGET_CRYPTO"
5752 "sha1su1\\t%0.4s, %2.4s"
5753 [(set_attr "type" "crypto_sha1_fast")]
5756 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5757 [(set (match_operand:V4SI 0 "register_operand" "=w")
5758 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5759 (match_operand:SI 2 "register_operand" "w")
5760 (match_operand:V4SI 3 "register_operand" "w")]
5762 "TARGET_SIMD && TARGET_CRYPTO"
5763 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5764 [(set_attr "type" "crypto_sha1_slow")]
5767 (define_insn "aarch64_crypto_sha1su0v4si"
5768 [(set (match_operand:V4SI 0 "register_operand" "=w")
5769 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5770 (match_operand:V4SI 2 "register_operand" "w")
5771 (match_operand:V4SI 3 "register_operand" "w")]
5773 "TARGET_SIMD && TARGET_CRYPTO"
5774 "sha1su0\\t%0.4s, %2.4s, %3.4s"
5775 [(set_attr "type" "crypto_sha1_xor")]
5780 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
5781 [(set (match_operand:V4SI 0 "register_operand" "=w")
5782 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5783 (match_operand:V4SI 2 "register_operand" "w")
5784 (match_operand:V4SI 3 "register_operand" "w")]
5786 "TARGET_SIMD && TARGET_CRYPTO"
5787 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5788 [(set_attr "type" "crypto_sha256_slow")]
5791 (define_insn "aarch64_crypto_sha256su0v4si"
5792 [(set (match_operand:V4SI 0 "register_operand" "=w")
5793 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5794 (match_operand:V4SI 2 "register_operand" "w")]
5796 "TARGET_SIMD &&TARGET_CRYPTO"
5797 "sha256su0\\t%0.4s, %2.4s"
5798 [(set_attr "type" "crypto_sha256_fast")]
5801 (define_insn "aarch64_crypto_sha256su1v4si"
5802 [(set (match_operand:V4SI 0 "register_operand" "=w")
5803 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5804 (match_operand:V4SI 2 "register_operand" "w")
5805 (match_operand:V4SI 3 "register_operand" "w")]
5807 "TARGET_SIMD &&TARGET_CRYPTO"
5808 "sha256su1\\t%0.4s, %2.4s, %3.4s"
5809 [(set_attr "type" "crypto_sha256_slow")]
5814 (define_insn "aarch64_crypto_pmulldi"
5815 [(set (match_operand:TI 0 "register_operand" "=w")
5816 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
5817 (match_operand:DI 2 "register_operand" "w")]
5819 "TARGET_SIMD && TARGET_CRYPTO"
5820 "pmull\\t%0.1q, %1.1d, %2.1d"
5821 [(set_attr "type" "neon_mul_d_long")]
5824 (define_insn "aarch64_crypto_pmullv2di"
5825 [(set (match_operand:TI 0 "register_operand" "=w")
5826 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
5827 (match_operand:V2DI 2 "register_operand" "w")]
5829 "TARGET_SIMD && TARGET_CRYPTO"
5830 "pmull2\\t%0.1q, %1.2d, %2.2d"
5831 [(set_attr "type" "neon_mul_d_long")]