1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2014 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL 0 "aarch64_simd_nonimmediate_operand" "")
23 (match_operand:VALL 1 "aarch64_simd_general_operand" ""))]
26 if (GET_CODE (operands[0]) == MEM)
27 operands[1] = force_reg (<MODE>mode, operands[1]);
31 (define_expand "movmisalign<mode>"
32 [(set (match_operand:VALL 0 "aarch64_simd_nonimmediate_operand" "")
33 (match_operand:VALL 1 "aarch64_simd_general_operand" ""))]
36 /* This pattern is not permitted to fail during expansion: if both arguments
37 are non-registers (e.g. memory := constant, which can be created by the
38 auto-vectorizer), force operand 1 into a register. */
39 if (!register_operand (operands[0], <MODE>mode)
40 && !register_operand (operands[1], <MODE>mode))
41 operands[1] = force_reg (<MODE>mode, operands[1]);
44 (define_insn "aarch64_simd_dup<mode>"
45 [(set (match_operand:VDQ 0 "register_operand" "=w, w")
46 (vec_duplicate:VDQ (match_operand:<VEL> 1 "register_operand" "r, w")))]
49 dup\\t%0.<Vtype>, %<vw>1
50 dup\\t%0.<Vtype>, %1.<Vetype>[0]"
51 [(set_attr "type" "neon_from_gp<q>, neon_dup<q>")]
54 (define_insn "aarch64_simd_dup<mode>"
55 [(set (match_operand:VDQF 0 "register_operand" "=w")
56 (vec_duplicate:VDQF (match_operand:<VEL> 1 "register_operand" "w")))]
58 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
59 [(set_attr "type" "neon_dup<q>")]
62 (define_insn "aarch64_dup_lane<mode>"
63 [(set (match_operand:VALL 0 "register_operand" "=w")
66 (match_operand:VALL 1 "register_operand" "w")
67 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
70 "dup\\t%0.<Vtype>, %1.<Vetype>[%2]"
71 [(set_attr "type" "neon_dup<q>")]
74 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
75 [(set (match_operand:VALL 0 "register_operand" "=w")
78 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
79 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
82 "dup\\t%0.<Vtype>, %1.<Vetype>[%2]"
83 [(set_attr "type" "neon_dup<q>")]
86 (define_insn "*aarch64_simd_mov<mode>"
87 [(set (match_operand:VD 0 "aarch64_simd_nonimmediate_operand"
88 "=w, m, w, ?r, ?w, ?r, w")
89 (match_operand:VD 1 "aarch64_simd_general_operand"
90 "m, w, w, w, r, r, Dn"))]
92 && (register_operand (operands[0], <MODE>mode)
93 || register_operand (operands[1], <MODE>mode))"
95 switch (which_alternative)
97 case 0: return "ldr\\t%d0, %1";
98 case 1: return "str\\t%d1, %0";
99 case 2: return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
100 case 3: return "umov\t%0, %1.d[0]";
101 case 4: return "ins\t%0.d[0], %1";
102 case 5: return "mov\t%0, %1";
104 return aarch64_output_simd_mov_immediate (operands[1],
106 default: gcc_unreachable ();
109 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
110 neon_logic<q>, neon_to_gp<q>, neon_from_gp<q>,\
111 mov_reg, neon_move<q>")]
114 (define_insn "*aarch64_simd_mov<mode>"
115 [(set (match_operand:VQ 0 "aarch64_simd_nonimmediate_operand"
116 "=w, m, w, ?r, ?w, ?r, w")
117 (match_operand:VQ 1 "aarch64_simd_general_operand"
118 "m, w, w, w, r, r, Dn"))]
120 && (register_operand (operands[0], <MODE>mode)
121 || register_operand (operands[1], <MODE>mode))"
123 switch (which_alternative)
126 return "ldr\\t%q0, %1";
128 return "str\\t%q1, %0";
130 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
136 return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
141 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
142 neon_logic<q>, multiple, multiple, multiple,\
144 (set_attr "length" "4,4,4,8,8,8,4")]
148 [(set (match_operand:VQ 0 "register_operand" "")
149 (match_operand:VQ 1 "register_operand" ""))]
150 "TARGET_SIMD && reload_completed
151 && GP_REGNUM_P (REGNO (operands[0]))
152 && GP_REGNUM_P (REGNO (operands[1]))"
153 [(set (match_dup 0) (match_dup 1))
154 (set (match_dup 2) (match_dup 3))]
156 int rdest = REGNO (operands[0]);
157 int rsrc = REGNO (operands[1]);
160 dest[0] = gen_rtx_REG (DImode, rdest);
161 src[0] = gen_rtx_REG (DImode, rsrc);
162 dest[1] = gen_rtx_REG (DImode, rdest + 1);
163 src[1] = gen_rtx_REG (DImode, rsrc + 1);
165 aarch64_simd_disambiguate_copy (operands, dest, src, 2);
169 [(set (match_operand:VQ 0 "register_operand" "")
170 (match_operand:VQ 1 "register_operand" ""))]
171 "TARGET_SIMD && reload_completed
172 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
173 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
176 aarch64_split_simd_move (operands[0], operands[1]);
180 (define_expand "aarch64_split_simd_mov<mode>"
181 [(set (match_operand:VQ 0)
182 (match_operand:VQ 1))]
185 rtx dst = operands[0];
186 rtx src = operands[1];
188 if (GP_REGNUM_P (REGNO (src)))
190 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
191 rtx src_high_part = gen_highpart (<VHALF>mode, src);
194 (gen_move_lo_quad_<mode> (dst, src_low_part));
196 (gen_move_hi_quad_<mode> (dst, src_high_part));
201 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
202 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
203 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
204 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
207 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
209 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
215 (define_insn "aarch64_simd_mov_from_<mode>low"
216 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
218 (match_operand:VQ 1 "register_operand" "w")
219 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
220 "TARGET_SIMD && reload_completed"
222 [(set_attr "type" "neon_to_gp<q>")
223 (set_attr "length" "4")
226 (define_insn "aarch64_simd_mov_from_<mode>high"
227 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
229 (match_operand:VQ 1 "register_operand" "w")
230 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
231 "TARGET_SIMD && reload_completed"
233 [(set_attr "type" "neon_to_gp<q>")
234 (set_attr "length" "4")
237 (define_insn "orn<mode>3"
238 [(set (match_operand:VDQ 0 "register_operand" "=w")
239 (ior:VDQ (not:VDQ (match_operand:VDQ 1 "register_operand" "w"))
240 (match_operand:VDQ 2 "register_operand" "w")))]
242 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
243 [(set_attr "type" "neon_logic<q>")]
246 (define_insn "bic<mode>3"
247 [(set (match_operand:VDQ 0 "register_operand" "=w")
248 (and:VDQ (not:VDQ (match_operand:VDQ 1 "register_operand" "w"))
249 (match_operand:VDQ 2 "register_operand" "w")))]
251 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
252 [(set_attr "type" "neon_logic<q>")]
255 (define_insn "add<mode>3"
256 [(set (match_operand:VDQ 0 "register_operand" "=w")
257 (plus:VDQ (match_operand:VDQ 1 "register_operand" "w")
258 (match_operand:VDQ 2 "register_operand" "w")))]
260 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
261 [(set_attr "type" "neon_add<q>")]
264 (define_insn "sub<mode>3"
265 [(set (match_operand:VDQ 0 "register_operand" "=w")
266 (minus:VDQ (match_operand:VDQ 1 "register_operand" "w")
267 (match_operand:VDQ 2 "register_operand" "w")))]
269 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
270 [(set_attr "type" "neon_sub<q>")]
273 (define_insn "mul<mode>3"
274 [(set (match_operand:VDQM 0 "register_operand" "=w")
275 (mult:VDQM (match_operand:VDQM 1 "register_operand" "w")
276 (match_operand:VDQM 2 "register_operand" "w")))]
278 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
279 [(set_attr "type" "neon_mul_<Vetype><q>")]
282 (define_insn "*aarch64_mul3_elt<mode>"
283 [(set (match_operand:VMUL 0 "register_operand" "=w")
287 (match_operand:VMUL 1 "register_operand" "<h_con>")
288 (parallel [(match_operand:SI 2 "immediate_operand")])))
289 (match_operand:VMUL 3 "register_operand" "w")))]
291 "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"
292 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
295 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
296 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
297 (mult:VMUL_CHANGE_NLANES
298 (vec_duplicate:VMUL_CHANGE_NLANES
300 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
301 (parallel [(match_operand:SI 2 "immediate_operand")])))
302 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
304 "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"
305 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
308 (define_insn "*aarch64_mul3_elt_to_128df"
309 [(set (match_operand:V2DF 0 "register_operand" "=w")
312 (match_operand:DF 2 "register_operand" "w"))
313 (match_operand:V2DF 1 "register_operand" "w")))]
315 "fmul\\t%0.2d, %1.2d, %2.d[0]"
316 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
319 (define_insn "*aarch64_mul3_elt_to_64v2df"
320 [(set (match_operand:DF 0 "register_operand" "=w")
323 (match_operand:V2DF 1 "register_operand" "w")
324 (parallel [(match_operand:SI 2 "immediate_operand")]))
325 (match_operand:DF 3 "register_operand" "w")))]
327 "fmul\\t%0.2d, %3.2d, %1.d[%2]"
328 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
331 (define_insn "neg<mode>2"
332 [(set (match_operand:VDQ 0 "register_operand" "=w")
333 (neg:VDQ (match_operand:VDQ 1 "register_operand" "w")))]
335 "neg\t%0.<Vtype>, %1.<Vtype>"
336 [(set_attr "type" "neon_neg<q>")]
339 (define_insn "abs<mode>2"
340 [(set (match_operand:VDQ 0 "register_operand" "=w")
341 (abs:VDQ (match_operand:VDQ 1 "register_operand" "w")))]
343 "abs\t%0.<Vtype>, %1.<Vtype>"
344 [(set_attr "type" "neon_abs<q>")]
347 (define_insn "abd<mode>_3"
348 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
349 (abs:VDQ_BHSI (minus:VDQ_BHSI
350 (match_operand:VDQ_BHSI 1 "register_operand" "w")
351 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
353 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
354 [(set_attr "type" "neon_abd<q>")]
357 (define_insn "aba<mode>_3"
358 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
359 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
360 (match_operand:VDQ_BHSI 1 "register_operand" "w")
361 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
362 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
364 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
365 [(set_attr "type" "neon_arith_acc<q>")]
368 (define_insn "fabd<mode>_3"
369 [(set (match_operand:VDQF 0 "register_operand" "=w")
370 (abs:VDQF (minus:VDQF
371 (match_operand:VDQF 1 "register_operand" "w")
372 (match_operand:VDQF 2 "register_operand" "w"))))]
374 "fabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
375 [(set_attr "type" "neon_fp_abd_<Vetype><q>")]
378 (define_insn "*fabd_scalar<mode>3"
379 [(set (match_operand:GPF 0 "register_operand" "=w")
381 (match_operand:GPF 1 "register_operand" "w")
382 (match_operand:GPF 2 "register_operand" "w"))))]
384 "fabd\t%<s>0, %<s>1, %<s>2"
385 [(set_attr "type" "neon_fp_abd_<Vetype><q>")]
388 (define_insn "and<mode>3"
389 [(set (match_operand:VDQ 0 "register_operand" "=w")
390 (and:VDQ (match_operand:VDQ 1 "register_operand" "w")
391 (match_operand:VDQ 2 "register_operand" "w")))]
393 "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
394 [(set_attr "type" "neon_logic<q>")]
397 (define_insn "ior<mode>3"
398 [(set (match_operand:VDQ 0 "register_operand" "=w")
399 (ior:VDQ (match_operand:VDQ 1 "register_operand" "w")
400 (match_operand:VDQ 2 "register_operand" "w")))]
402 "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
403 [(set_attr "type" "neon_logic<q>")]
406 (define_insn "xor<mode>3"
407 [(set (match_operand:VDQ 0 "register_operand" "=w")
408 (xor:VDQ (match_operand:VDQ 1 "register_operand" "w")
409 (match_operand:VDQ 2 "register_operand" "w")))]
411 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
412 [(set_attr "type" "neon_logic<q>")]
415 (define_insn "one_cmpl<mode>2"
416 [(set (match_operand:VDQ 0 "register_operand" "=w")
417 (not:VDQ (match_operand:VDQ 1 "register_operand" "w")))]
419 "not\t%0.<Vbtype>, %1.<Vbtype>"
420 [(set_attr "type" "neon_logic<q>")]
423 (define_insn "aarch64_simd_vec_set<mode>"
424 [(set (match_operand:VQ_S 0 "register_operand" "=w,w")
427 (match_operand:<VEL> 1 "register_operand" "r,w"))
428 (match_operand:VQ_S 3 "register_operand" "0,0")
429 (match_operand:SI 2 "immediate_operand" "i,i")))]
432 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
433 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
434 switch (which_alternative)
437 return "ins\\t%0.<Vetype>[%p2], %w1";
439 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
444 [(set_attr "type" "neon_from_gp<q>, neon_ins<q>")]
447 (define_insn "aarch64_simd_lshr<mode>"
448 [(set (match_operand:VDQ 0 "register_operand" "=w")
449 (lshiftrt:VDQ (match_operand:VDQ 1 "register_operand" "w")
450 (match_operand:VDQ 2 "aarch64_simd_rshift_imm" "Dr")))]
452 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
453 [(set_attr "type" "neon_shift_imm<q>")]
456 (define_insn "aarch64_simd_ashr<mode>"
457 [(set (match_operand:VDQ 0 "register_operand" "=w")
458 (ashiftrt:VDQ (match_operand:VDQ 1 "register_operand" "w")
459 (match_operand:VDQ 2 "aarch64_simd_rshift_imm" "Dr")))]
461 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
462 [(set_attr "type" "neon_shift_imm<q>")]
465 (define_insn "aarch64_simd_imm_shl<mode>"
466 [(set (match_operand:VDQ 0 "register_operand" "=w")
467 (ashift:VDQ (match_operand:VDQ 1 "register_operand" "w")
468 (match_operand:VDQ 2 "aarch64_simd_lshift_imm" "Dl")))]
470 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
471 [(set_attr "type" "neon_shift_imm<q>")]
474 (define_insn "aarch64_simd_reg_sshl<mode>"
475 [(set (match_operand:VDQ 0 "register_operand" "=w")
476 (ashift:VDQ (match_operand:VDQ 1 "register_operand" "w")
477 (match_operand:VDQ 2 "register_operand" "w")))]
479 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
480 [(set_attr "type" "neon_shift_reg<q>")]
483 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
484 [(set (match_operand:VDQ 0 "register_operand" "=w")
485 (unspec:VDQ [(match_operand:VDQ 1 "register_operand" "w")
486 (match_operand:VDQ 2 "register_operand" "w")]
487 UNSPEC_ASHIFT_UNSIGNED))]
489 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
490 [(set_attr "type" "neon_shift_reg<q>")]
493 (define_insn "aarch64_simd_reg_shl<mode>_signed"
494 [(set (match_operand:VDQ 0 "register_operand" "=w")
495 (unspec:VDQ [(match_operand:VDQ 1 "register_operand" "w")
496 (match_operand:VDQ 2 "register_operand" "w")]
497 UNSPEC_ASHIFT_SIGNED))]
499 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
500 [(set_attr "type" "neon_shift_reg<q>")]
503 (define_expand "ashl<mode>3"
504 [(match_operand:VDQ 0 "register_operand" "")
505 (match_operand:VDQ 1 "register_operand" "")
506 (match_operand:SI 2 "general_operand" "")]
509 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
512 if (CONST_INT_P (operands[2]))
514 shift_amount = INTVAL (operands[2]);
515 if (shift_amount >= 0 && shift_amount < bit_width)
517 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
519 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
526 operands[2] = force_reg (SImode, operands[2]);
529 else if (MEM_P (operands[2]))
531 operands[2] = force_reg (SImode, operands[2]);
534 if (REG_P (operands[2]))
536 rtx tmp = gen_reg_rtx (<MODE>mode);
537 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
538 convert_to_mode (<VEL>mode,
541 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
550 (define_expand "lshr<mode>3"
551 [(match_operand:VDQ 0 "register_operand" "")
552 (match_operand:VDQ 1 "register_operand" "")
553 (match_operand:SI 2 "general_operand" "")]
556 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
559 if (CONST_INT_P (operands[2]))
561 shift_amount = INTVAL (operands[2]);
562 if (shift_amount > 0 && shift_amount <= bit_width)
564 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
566 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
572 operands[2] = force_reg (SImode, operands[2]);
574 else if (MEM_P (operands[2]))
576 operands[2] = force_reg (SImode, operands[2]);
579 if (REG_P (operands[2]))
581 rtx tmp = gen_reg_rtx (SImode);
582 rtx tmp1 = gen_reg_rtx (<MODE>mode);
583 emit_insn (gen_negsi2 (tmp, operands[2]));
584 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
585 convert_to_mode (<VEL>mode,
587 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
597 (define_expand "ashr<mode>3"
598 [(match_operand:VDQ 0 "register_operand" "")
599 (match_operand:VDQ 1 "register_operand" "")
600 (match_operand:SI 2 "general_operand" "")]
603 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
606 if (CONST_INT_P (operands[2]))
608 shift_amount = INTVAL (operands[2]);
609 if (shift_amount > 0 && shift_amount <= bit_width)
611 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
613 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
619 operands[2] = force_reg (SImode, operands[2]);
621 else if (MEM_P (operands[2]))
623 operands[2] = force_reg (SImode, operands[2]);
626 if (REG_P (operands[2]))
628 rtx tmp = gen_reg_rtx (SImode);
629 rtx tmp1 = gen_reg_rtx (<MODE>mode);
630 emit_insn (gen_negsi2 (tmp, operands[2]));
631 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
632 convert_to_mode (<VEL>mode,
634 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
644 (define_expand "vashl<mode>3"
645 [(match_operand:VDQ 0 "register_operand" "")
646 (match_operand:VDQ 1 "register_operand" "")
647 (match_operand:VDQ 2 "register_operand" "")]
650 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
655 ;; Using mode VQ_S as there is no V2DImode neg!
656 ;; Negating individual lanes most certainly offsets the
657 ;; gain from vectorization.
658 (define_expand "vashr<mode>3"
659 [(match_operand:VQ_S 0 "register_operand" "")
660 (match_operand:VQ_S 1 "register_operand" "")
661 (match_operand:VQ_S 2 "register_operand" "")]
664 rtx neg = gen_reg_rtx (<MODE>mode);
665 emit (gen_neg<mode>2 (neg, operands[2]));
666 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
671 (define_expand "vlshr<mode>3"
672 [(match_operand:VQ_S 0 "register_operand" "")
673 (match_operand:VQ_S 1 "register_operand" "")
674 (match_operand:VQ_S 2 "register_operand" "")]
677 rtx neg = gen_reg_rtx (<MODE>mode);
678 emit (gen_neg<mode>2 (neg, operands[2]));
679 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
684 (define_expand "vec_set<mode>"
685 [(match_operand:VQ_S 0 "register_operand")
686 (match_operand:<VEL> 1 "register_operand")
687 (match_operand:SI 2 "immediate_operand")]
690 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
691 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
692 GEN_INT (elem), operands[0]));
697 (define_insn "aarch64_simd_vec_setv2di"
698 [(set (match_operand:V2DI 0 "register_operand" "=w,w")
701 (match_operand:DI 1 "register_operand" "r,w"))
702 (match_operand:V2DI 3 "register_operand" "0,0")
703 (match_operand:SI 2 "immediate_operand" "i,i")))]
706 int elt = ENDIAN_LANE_N (V2DImode, exact_log2 (INTVAL (operands[2])));
707 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
708 switch (which_alternative)
711 return "ins\\t%0.d[%p2], %1";
713 return "ins\\t%0.d[%p2], %1.d[0]";
718 [(set_attr "type" "neon_from_gp, neon_ins_q")]
721 (define_expand "vec_setv2di"
722 [(match_operand:V2DI 0 "register_operand")
723 (match_operand:DI 1 "register_operand")
724 (match_operand:SI 2 "immediate_operand")]
727 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
728 emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
729 GEN_INT (elem), operands[0]));
734 (define_insn "aarch64_simd_vec_set<mode>"
735 [(set (match_operand:VDQF 0 "register_operand" "=w")
738 (match_operand:<VEL> 1 "register_operand" "w"))
739 (match_operand:VDQF 3 "register_operand" "0")
740 (match_operand:SI 2 "immediate_operand" "i")))]
743 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
745 operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
746 return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
748 [(set_attr "type" "neon_ins<q>")]
751 (define_expand "vec_set<mode>"
752 [(match_operand:VDQF 0 "register_operand" "+w")
753 (match_operand:<VEL> 1 "register_operand" "w")
754 (match_operand:SI 2 "immediate_operand" "")]
757 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
758 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
759 GEN_INT (elem), operands[0]));
765 (define_insn "aarch64_mla<mode>"
766 [(set (match_operand:VQ_S 0 "register_operand" "=w")
767 (plus:VQ_S (mult:VQ_S (match_operand:VQ_S 2 "register_operand" "w")
768 (match_operand:VQ_S 3 "register_operand" "w"))
769 (match_operand:VQ_S 1 "register_operand" "0")))]
771 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
772 [(set_attr "type" "neon_mla_<Vetype><q>")]
775 (define_insn "*aarch64_mla_elt<mode>"
776 [(set (match_operand:VDQHS 0 "register_operand" "=w")
781 (match_operand:VDQHS 1 "register_operand" "<h_con>")
782 (parallel [(match_operand:SI 2 "immediate_operand")])))
783 (match_operand:VDQHS 3 "register_operand" "w"))
784 (match_operand:VDQHS 4 "register_operand" "0")))]
786 "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
787 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
790 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
791 [(set (match_operand:VDQHS 0 "register_operand" "=w")
796 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
797 (parallel [(match_operand:SI 2 "immediate_operand")])))
798 (match_operand:VDQHS 3 "register_operand" "w"))
799 (match_operand:VDQHS 4 "register_operand" "0")))]
801 "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
802 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
805 (define_insn "aarch64_mls<mode>"
806 [(set (match_operand:VQ_S 0 "register_operand" "=w")
807 (minus:VQ_S (match_operand:VQ_S 1 "register_operand" "0")
808 (mult:VQ_S (match_operand:VQ_S 2 "register_operand" "w")
809 (match_operand:VQ_S 3 "register_operand" "w"))))]
811 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
812 [(set_attr "type" "neon_mla_<Vetype><q>")]
815 (define_insn "*aarch64_mls_elt<mode>"
816 [(set (match_operand:VDQHS 0 "register_operand" "=w")
818 (match_operand:VDQHS 4 "register_operand" "0")
822 (match_operand:VDQHS 1 "register_operand" "<h_con>")
823 (parallel [(match_operand:SI 2 "immediate_operand")])))
824 (match_operand:VDQHS 3 "register_operand" "w"))))]
826 "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
827 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
830 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
831 [(set (match_operand:VDQHS 0 "register_operand" "=w")
833 (match_operand:VDQHS 4 "register_operand" "0")
837 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
838 (parallel [(match_operand:SI 2 "immediate_operand")])))
839 (match_operand:VDQHS 3 "register_operand" "w"))))]
841 "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
842 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
845 ;; Max/Min operations.
846 (define_insn "<su><maxmin><mode>3"
847 [(set (match_operand:VQ_S 0 "register_operand" "=w")
848 (MAXMIN:VQ_S (match_operand:VQ_S 1 "register_operand" "w")
849 (match_operand:VQ_S 2 "register_operand" "w")))]
851 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
852 [(set_attr "type" "neon_minmax<q>")]
855 ;; Move into low-half clearing high half to 0.
857 (define_insn "move_lo_quad_<mode>"
858 [(set (match_operand:VQ 0 "register_operand" "=w,w,w")
860 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
861 (vec_duplicate:<VHALF> (const_int 0))))]
867 [(set_attr "type" "neon_dup<q>,fmov,neon_dup<q>")
868 (set_attr "simd" "yes,*,yes")
869 (set_attr "fp" "*,yes,*")
870 (set_attr "length" "4")]
873 ;; Move into high-half.
875 (define_insn "aarch64_simd_move_hi_quad_<mode>"
876 [(set (match_operand:VQ 0 "register_operand" "+w,w")
880 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
881 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
884 ins\\t%0.d[1], %1.d[0]
886 [(set_attr "type" "neon_ins")
887 (set_attr "length" "4")]
890 (define_expand "move_hi_quad_<mode>"
891 [(match_operand:VQ 0 "register_operand" "")
892 (match_operand:<VHALF> 1 "register_operand" "")]
895 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
896 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
901 ;; Narrowing operations.
904 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
905 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
906 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
908 "xtn\\t%0.<Vntype>, %1.<Vtype>"
909 [(set_attr "type" "neon_shift_imm_narrow_q")]
912 (define_expand "vec_pack_trunc_<mode>"
913 [(match_operand:<VNARROWD> 0 "register_operand" "")
914 (match_operand:VDN 1 "register_operand" "")
915 (match_operand:VDN 2 "register_operand" "")]
918 rtx tempreg = gen_reg_rtx (<VDBL>mode);
919 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
920 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
922 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
923 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
924 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
930 (define_insn "vec_pack_trunc_<mode>"
931 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "+&w")
932 (vec_concat:<VNARROWQ2>
933 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
934 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
937 if (BYTES_BIG_ENDIAN)
938 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
940 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
942 [(set_attr "type" "multiple")
943 (set_attr "length" "8")]
946 ;; Widening operations.
948 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
949 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
950 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
951 (match_operand:VQW 1 "register_operand" "w")
952 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
955 "<su>shll %0.<Vwtype>, %1.<Vhalftype>, 0"
956 [(set_attr "type" "neon_shift_imm_long")]
959 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
960 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
961 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
962 (match_operand:VQW 1 "register_operand" "w")
963 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
966 "<su>shll2 %0.<Vwtype>, %1.<Vtype>, 0"
967 [(set_attr "type" "neon_shift_imm_long")]
970 (define_expand "vec_unpack<su>_hi_<mode>"
971 [(match_operand:<VWIDE> 0 "register_operand" "")
972 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
975 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
976 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
982 (define_expand "vec_unpack<su>_lo_<mode>"
983 [(match_operand:<VWIDE> 0 "register_operand" "")
984 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
987 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
988 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
994 ;; Widening arithmetic.
996 (define_insn "*aarch64_<su>mlal_lo<mode>"
997 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1000 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1001 (match_operand:VQW 2 "register_operand" "w")
1002 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1003 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1004 (match_operand:VQW 4 "register_operand" "w")
1006 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1008 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1009 [(set_attr "type" "neon_mla_<Vetype>_long")]
1012 (define_insn "*aarch64_<su>mlal_hi<mode>"
1013 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1016 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1017 (match_operand:VQW 2 "register_operand" "w")
1018 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1019 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1020 (match_operand:VQW 4 "register_operand" "w")
1022 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1024 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1025 [(set_attr "type" "neon_mla_<Vetype>_long")]
1028 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1029 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1031 (match_operand:<VWIDE> 1 "register_operand" "0")
1033 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1034 (match_operand:VQW 2 "register_operand" "w")
1035 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1036 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1037 (match_operand:VQW 4 "register_operand" "w")
1040 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1041 [(set_attr "type" "neon_mla_<Vetype>_long")]
1044 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1045 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1047 (match_operand:<VWIDE> 1 "register_operand" "0")
1049 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1050 (match_operand:VQW 2 "register_operand" "w")
1051 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1052 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1053 (match_operand:VQW 4 "register_operand" "w")
1056 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1057 [(set_attr "type" "neon_mla_<Vetype>_long")]
1060 (define_insn "*aarch64_<su>mlal<mode>"
1061 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1065 (match_operand:VDW 1 "register_operand" "w"))
1067 (match_operand:VDW 2 "register_operand" "w")))
1068 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1070 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1071 [(set_attr "type" "neon_mla_<Vetype>_long")]
1074 (define_insn "*aarch64_<su>mlsl<mode>"
1075 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1077 (match_operand:<VWIDE> 1 "register_operand" "0")
1080 (match_operand:VDW 2 "register_operand" "w"))
1082 (match_operand:VDW 3 "register_operand" "w")))))]
1084 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1085 [(set_attr "type" "neon_mla_<Vetype>_long")]
1088 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1089 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1090 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1091 (match_operand:VQW 1 "register_operand" "w")
1092 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1093 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1094 (match_operand:VQW 2 "register_operand" "w")
1097 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1098 [(set_attr "type" "neon_mul_<Vetype>_long")]
1101 (define_expand "vec_widen_<su>mult_lo_<mode>"
1102 [(match_operand:<VWIDE> 0 "register_operand" "")
1103 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1104 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1107 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1108 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1115 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1116 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1117 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1118 (match_operand:VQW 1 "register_operand" "w")
1119 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1120 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1121 (match_operand:VQW 2 "register_operand" "w")
1124 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1125 [(set_attr "type" "neon_mul_<Vetype>_long")]
1128 (define_expand "vec_widen_<su>mult_hi_<mode>"
1129 [(match_operand:<VWIDE> 0 "register_operand" "")
1130 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1131 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1134 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1135 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1143 ;; FP vector operations.
1144 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1145 ;; double-precision (64-bit) floating-point data types and arithmetic as
1146 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1147 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1149 ;; Floating-point operations can raise an exception. Vectorizing such
1150 ;; operations are safe because of reasons explained below.
1152 ;; ARMv8 permits an extension to enable trapped floating-point
1153 ;; exception handling, however this is an optional feature. In the
1154 ;; event of a floating-point exception being raised by vectorised
1156 ;; 1. If trapped floating-point exceptions are available, then a trap
1157 ;; will be taken when any lane raises an enabled exception. A trap
1158 ;; handler may determine which lane raised the exception.
1159 ;; 2. Alternatively a sticky exception flag is set in the
1160 ;; floating-point status register (FPSR). Software may explicitly
1161 ;; test the exception flags, in which case the tests will either
1162 ;; prevent vectorisation, allowing precise identification of the
1163 ;; failing operation, or if tested outside of vectorisable regions
1164 ;; then the specific operation and lane are not of interest.
1166 ;; FP arithmetic operations.
1168 (define_insn "add<mode>3"
1169 [(set (match_operand:VDQF 0 "register_operand" "=w")
1170 (plus:VDQF (match_operand:VDQF 1 "register_operand" "w")
1171 (match_operand:VDQF 2 "register_operand" "w")))]
1173 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1174 [(set_attr "type" "neon_fp_addsub_<Vetype><q>")]
1177 (define_insn "sub<mode>3"
1178 [(set (match_operand:VDQF 0 "register_operand" "=w")
1179 (minus:VDQF (match_operand:VDQF 1 "register_operand" "w")
1180 (match_operand:VDQF 2 "register_operand" "w")))]
1182 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1183 [(set_attr "type" "neon_fp_addsub_<Vetype><q>")]
1186 (define_insn "mul<mode>3"
1187 [(set (match_operand:VDQF 0 "register_operand" "=w")
1188 (mult:VDQF (match_operand:VDQF 1 "register_operand" "w")
1189 (match_operand:VDQF 2 "register_operand" "w")))]
1191 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1192 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
1195 (define_insn "div<mode>3"
1196 [(set (match_operand:VDQF 0 "register_operand" "=w")
1197 (div:VDQF (match_operand:VDQF 1 "register_operand" "w")
1198 (match_operand:VDQF 2 "register_operand" "w")))]
1200 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1201 [(set_attr "type" "neon_fp_div_<Vetype><q>")]
1204 (define_insn "neg<mode>2"
1205 [(set (match_operand:VDQF 0 "register_operand" "=w")
1206 (neg:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
1208 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1209 [(set_attr "type" "neon_fp_neg_<Vetype><q>")]
1212 (define_insn "abs<mode>2"
1213 [(set (match_operand:VDQF 0 "register_operand" "=w")
1214 (abs:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
1216 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1217 [(set_attr "type" "neon_fp_abs_<Vetype><q>")]
1220 (define_insn "fma<mode>4"
1221 [(set (match_operand:VDQF 0 "register_operand" "=w")
1222 (fma:VDQF (match_operand:VDQF 1 "register_operand" "w")
1223 (match_operand:VDQF 2 "register_operand" "w")
1224 (match_operand:VDQF 3 "register_operand" "0")))]
1226 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1227 [(set_attr "type" "neon_fp_mla_<Vetype><q>")]
1230 (define_insn "*aarch64_fma4_elt<mode>"
1231 [(set (match_operand:VDQF 0 "register_operand" "=w")
1235 (match_operand:VDQF 1 "register_operand" "<h_con>")
1236 (parallel [(match_operand:SI 2 "immediate_operand")])))
1237 (match_operand:VDQF 3 "register_operand" "w")
1238 (match_operand:VDQF 4 "register_operand" "0")))]
1240 "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
1241 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1244 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1245 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1247 (vec_duplicate:VDQSF
1249 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1250 (parallel [(match_operand:SI 2 "immediate_operand")])))
1251 (match_operand:VDQSF 3 "register_operand" "w")
1252 (match_operand:VDQSF 4 "register_operand" "0")))]
1254 "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
1255 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1258 (define_insn "*aarch64_fma4_elt_to_128df"
1259 [(set (match_operand:V2DF 0 "register_operand" "=w")
1262 (match_operand:DF 1 "register_operand" "w"))
1263 (match_operand:V2DF 2 "register_operand" "w")
1264 (match_operand:V2DF 3 "register_operand" "0")))]
1266 "fmla\\t%0.2d, %2.2d, %1.2d[0]"
1267 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1270 (define_insn "*aarch64_fma4_elt_to_64v2df"
1271 [(set (match_operand:DF 0 "register_operand" "=w")
1274 (match_operand:V2DF 1 "register_operand" "w")
1275 (parallel [(match_operand:SI 2 "immediate_operand")]))
1276 (match_operand:DF 3 "register_operand" "w")
1277 (match_operand:DF 4 "register_operand" "0")))]
1279 "fmla\\t%0.2d, %3.2d, %1.2d[%2]"
1280 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1283 (define_insn "fnma<mode>4"
1284 [(set (match_operand:VDQF 0 "register_operand" "=w")
1286 (match_operand:VDQF 1 "register_operand" "w")
1288 (match_operand:VDQF 2 "register_operand" "w"))
1289 (match_operand:VDQF 3 "register_operand" "0")))]
1291 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1292 [(set_attr "type" "neon_fp_mla_<Vetype><q>")]
1295 (define_insn "*aarch64_fnma4_elt<mode>"
1296 [(set (match_operand:VDQF 0 "register_operand" "=w")
1299 (match_operand:VDQF 3 "register_operand" "w"))
1302 (match_operand:VDQF 1 "register_operand" "<h_con>")
1303 (parallel [(match_operand:SI 2 "immediate_operand")])))
1304 (match_operand:VDQF 4 "register_operand" "0")))]
1306 "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
1307 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1310 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1311 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1314 (match_operand:VDQSF 3 "register_operand" "w"))
1315 (vec_duplicate:VDQSF
1317 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1318 (parallel [(match_operand:SI 2 "immediate_operand")])))
1319 (match_operand:VDQSF 4 "register_operand" "0")))]
1321 "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
1322 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1325 (define_insn "*aarch64_fnma4_elt_to_128df"
1326 [(set (match_operand:V2DF 0 "register_operand" "=w")
1329 (match_operand:V2DF 2 "register_operand" "w"))
1331 (match_operand:DF 1 "register_operand" "w"))
1332 (match_operand:V2DF 3 "register_operand" "0")))]
1334 "fmls\\t%0.2d, %2.2d, %1.2d[0]"
1335 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1338 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1339 [(set (match_operand:DF 0 "register_operand" "=w")
1342 (match_operand:V2DF 1 "register_operand" "w")
1343 (parallel [(match_operand:SI 2 "immediate_operand")]))
1345 (match_operand:DF 3 "register_operand" "w"))
1346 (match_operand:DF 4 "register_operand" "0")))]
1348 "fmls\\t%0.2d, %3.2d, %1.2d[%2]"
1349 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1352 ;; Vector versions of the floating-point frint patterns.
1353 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round.
1354 (define_insn "<frint_pattern><mode>2"
1355 [(set (match_operand:VDQF 0 "register_operand" "=w")
1356 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
1359 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1360 [(set_attr "type" "neon_fp_round_<Vetype><q>")]
1363 ;; Vector versions of the fcvt standard patterns.
1364 ;; Expands to lbtrunc, lround, lceil, lfloor
1365 (define_insn "l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2"
1366 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1367 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1368 [(match_operand:VDQF 1 "register_operand" "w")]
1371 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1372 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1375 (define_expand "<optab><VDQF:mode><fcvt_target>2"
1376 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1377 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1378 [(match_operand:VDQF 1 "register_operand")]
1383 (define_expand "<fix_trunc_optab><VDQF:mode><fcvt_target>2"
1384 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1385 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1386 [(match_operand:VDQF 1 "register_operand")]
1391 (define_expand "ftrunc<VDQF:mode>2"
1392 [(set (match_operand:VDQF 0 "register_operand")
1393 (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
1398 (define_insn "<optab><fcvt_target><VDQF:mode>2"
1399 [(set (match_operand:VDQF 0 "register_operand" "=w")
1401 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
1403 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
1404 [(set_attr "type" "neon_int_to_fp_<Vetype><q>")]
1407 ;; Conversions between vectors of floats and doubles.
1408 ;; Contains a mix of patterns to match standard pattern names
1409 ;; and those for intrinsics.
1411 ;; Float widening operations.
1413 (define_insn "vec_unpacks_lo_v4sf"
1414 [(set (match_operand:V2DF 0 "register_operand" "=w")
1417 (match_operand:V4SF 1 "register_operand" "w")
1418 (parallel [(const_int 0) (const_int 1)])
1421 "fcvtl\\t%0.2d, %1.2s"
1422 [(set_attr "type" "neon_fp_cvt_widen_s")]
1425 (define_insn "aarch64_float_extend_lo_v2df"
1426 [(set (match_operand:V2DF 0 "register_operand" "=w")
1428 (match_operand:V2SF 1 "register_operand" "w")))]
1430 "fcvtl\\t%0.2d, %1.2s"
1431 [(set_attr "type" "neon_fp_cvt_widen_s")]
1434 (define_insn "vec_unpacks_hi_v4sf"
1435 [(set (match_operand:V2DF 0 "register_operand" "=w")
1438 (match_operand:V4SF 1 "register_operand" "w")
1439 (parallel [(const_int 2) (const_int 3)])
1442 "fcvtl2\\t%0.2d, %1.4s"
1443 [(set_attr "type" "neon_fp_cvt_widen_s")]
1446 ;; Float narrowing operations.
1448 (define_insn "aarch64_float_truncate_lo_v2sf"
1449 [(set (match_operand:V2SF 0 "register_operand" "=w")
1450 (float_truncate:V2SF
1451 (match_operand:V2DF 1 "register_operand" "w")))]
1453 "fcvtn\\t%0.2s, %1.2d"
1454 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1457 (define_insn "aarch64_float_truncate_hi_v4sf"
1458 [(set (match_operand:V4SF 0 "register_operand" "=w")
1460 (match_operand:V2SF 1 "register_operand" "0")
1461 (float_truncate:V2SF
1462 (match_operand:V2DF 2 "register_operand" "w"))))]
1464 "fcvtn2\\t%0.4s, %2.2d"
1465 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1468 (define_expand "vec_pack_trunc_v2df"
1469 [(set (match_operand:V4SF 0 "register_operand")
1471 (float_truncate:V2SF
1472 (match_operand:V2DF 1 "register_operand"))
1473 (float_truncate:V2SF
1474 (match_operand:V2DF 2 "register_operand"))
1478 rtx tmp = gen_reg_rtx (V2SFmode);
1479 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1480 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1482 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
1483 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
1484 tmp, operands[hi]));
1489 (define_expand "vec_pack_trunc_df"
1490 [(set (match_operand:V2SF 0 "register_operand")
1493 (match_operand:DF 1 "register_operand"))
1495 (match_operand:DF 2 "register_operand"))
1499 rtx tmp = gen_reg_rtx (V2SFmode);
1500 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1501 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1503 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
1504 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
1505 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
1510 (define_insn "aarch64_vmls<mode>"
1511 [(set (match_operand:VDQF 0 "register_operand" "=w")
1512 (minus:VDQF (match_operand:VDQF 1 "register_operand" "0")
1513 (mult:VDQF (match_operand:VDQF 2 "register_operand" "w")
1514 (match_operand:VDQF 3 "register_operand" "w"))))]
1516 "fmls\\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1517 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1521 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
1523 ;; a = (b < c) ? b : c;
1524 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
1525 ;; either explicitly or indirectly via -ffast-math.
1527 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
1528 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
1529 ;; operand will be returned when both operands are zero (i.e. they may not
1530 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
1531 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
1534 (define_insn "<su><maxmin><mode>3"
1535 [(set (match_operand:VDQF 0 "register_operand" "=w")
1536 (FMAXMIN:VDQF (match_operand:VDQF 1 "register_operand" "w")
1537 (match_operand:VDQF 2 "register_operand" "w")))]
1539 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1540 [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
1543 (define_insn "<maxmin_uns><mode>3"
1544 [(set (match_operand:VDQF 0 "register_operand" "=w")
1545 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
1546 (match_operand:VDQF 2 "register_operand" "w")]
1549 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1550 [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
1553 ;; 'across lanes' add.
1555 (define_insn "reduc_<sur>plus_<mode>"
1556 [(set (match_operand:VDQV 0 "register_operand" "=w")
1557 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
1560 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
1561 [(set_attr "type" "neon_reduc_add<q>")]
1564 (define_insn "reduc_<sur>plus_v2si"
1565 [(set (match_operand:V2SI 0 "register_operand" "=w")
1566 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
1569 "addp\\t%0.2s, %1.2s, %1.2s"
1570 [(set_attr "type" "neon_reduc_add")]
1573 (define_insn "reduc_splus_<mode>"
1574 [(set (match_operand:V2F 0 "register_operand" "=w")
1575 (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
1578 "faddp\\t%<Vetype>0, %1.<Vtype>"
1579 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
1582 (define_insn "aarch64_addpv4sf"
1583 [(set (match_operand:V4SF 0 "register_operand" "=w")
1584 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
1587 "faddp\\t%0.4s, %1.4s, %1.4s"
1588 [(set_attr "type" "neon_fp_reduc_add_s_q")]
1591 (define_expand "reduc_splus_v4sf"
1592 [(set (match_operand:V4SF 0 "register_operand")
1593 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
1597 emit_insn (gen_aarch64_addpv4sf (operands[0], operands[1]));
1598 emit_insn (gen_aarch64_addpv4sf (operands[0], operands[0]));
1602 (define_insn "clz<mode>2"
1603 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1604 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
1606 "clz\\t%0.<Vtype>, %1.<Vtype>"
1607 [(set_attr "type" "neon_cls<q>")]
1610 ;; 'across lanes' max and min ops.
1612 (define_insn "reduc_<maxmin_uns>_<mode>"
1613 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
1614 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
1617 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
1618 [(set_attr "type" "neon_reduc_minmax<q>")]
1621 (define_insn "reduc_<maxmin_uns>_v2si"
1622 [(set (match_operand:V2SI 0 "register_operand" "=w")
1623 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
1626 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
1627 [(set_attr "type" "neon_reduc_minmax")]
1630 (define_insn "reduc_<maxmin_uns>_<mode>"
1631 [(set (match_operand:V2F 0 "register_operand" "=w")
1632 (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
1635 "<maxmin_uns_op>p\\t%<Vetype>0, %1.<Vtype>"
1636 [(set_attr "type" "neon_fp_reduc_minmax_<Vetype><q>")]
1639 (define_insn "reduc_<maxmin_uns>_v4sf"
1640 [(set (match_operand:V4SF 0 "register_operand" "=w")
1641 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
1644 "<maxmin_uns_op>v\\t%s0, %1.4s"
1645 [(set_attr "type" "neon_fp_reduc_minmax_s_q")]
1648 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
1650 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
1653 ;; Thus our BSL is of the form:
1654 ;; op0 = bsl (mask, op2, op3)
1655 ;; We can use any of:
1658 ;; bsl mask, op1, op2
1659 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
1660 ;; bit op0, op2, mask
1661 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
1662 ;; bif op0, op1, mask
1664 (define_insn "aarch64_simd_bsl<mode>_internal"
1665 [(set (match_operand:VALLDIF 0 "register_operand" "=w,w,w")
1668 (match_operand:<V_cmp_result> 1 "register_operand" " 0,w,w")
1669 (match_operand:VALLDIF 2 "register_operand" " w,w,0"))
1672 (match_dup:<V_cmp_result> 1))
1673 (match_operand:VALLDIF 3 "register_operand" " w,0,w"))
1677 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
1678 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
1679 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
1680 [(set_attr "type" "neon_bsl<q>")]
1683 (define_expand "aarch64_simd_bsl<mode>"
1684 [(match_operand:VALLDIF 0 "register_operand")
1685 (match_operand:<V_cmp_result> 1 "register_operand")
1686 (match_operand:VALLDIF 2 "register_operand")
1687 (match_operand:VALLDIF 3 "register_operand")]
1690 /* We can't alias operands together if they have different modes. */
1691 operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]);
1692 emit_insn (gen_aarch64_simd_bsl<mode>_internal (operands[0], operands[1],
1693 operands[2], operands[3]));
1697 (define_expand "aarch64_vcond_internal<mode><mode>"
1698 [(set (match_operand:VDQ 0 "register_operand")
1700 (match_operator 3 "comparison_operator"
1701 [(match_operand:VDQ 4 "register_operand")
1702 (match_operand:VDQ 5 "nonmemory_operand")])
1703 (match_operand:VDQ 1 "nonmemory_operand")
1704 (match_operand:VDQ 2 "nonmemory_operand")))]
1707 int inverse = 0, has_zero_imm_form = 0;
1708 rtx op1 = operands[1];
1709 rtx op2 = operands[2];
1710 rtx mask = gen_reg_rtx (<MODE>mode);
1712 switch (GET_CODE (operands[3]))
1722 has_zero_imm_form = 1;
1732 if (!REG_P (operands[5])
1733 && (operands[5] != CONST0_RTX (<MODE>mode) || !has_zero_imm_form))
1734 operands[5] = force_reg (<MODE>mode, operands[5]);
1736 switch (GET_CODE (operands[3]))
1740 emit_insn (gen_aarch64_cmge<mode> (mask, operands[4], operands[5]));
1745 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5]));
1750 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5]));
1755 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5]));
1760 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[4], operands[5]));
1773 /* If we have (a = (b CMP c) ? -1 : 0);
1774 Then we can simply move the generated mask. */
1776 if (op1 == CONSTM1_RTX (<V_cmp_result>mode)
1777 && op2 == CONST0_RTX (<V_cmp_result>mode))
1778 emit_move_insn (operands[0], mask);
1782 op1 = force_reg (<MODE>mode, op1);
1784 op2 = force_reg (<MODE>mode, op2);
1785 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask,
1792 (define_expand "aarch64_vcond_internal<VDQF_COND:mode><VDQF:mode>"
1793 [(set (match_operand:VDQF_COND 0 "register_operand")
1795 (match_operator 3 "comparison_operator"
1796 [(match_operand:VDQF 4 "register_operand")
1797 (match_operand:VDQF 5 "nonmemory_operand")])
1798 (match_operand:VDQF_COND 1 "nonmemory_operand")
1799 (match_operand:VDQF_COND 2 "nonmemory_operand")))]
1803 int use_zero_form = 0;
1804 int swap_bsl_operands = 0;
1805 rtx op1 = operands[1];
1806 rtx op2 = operands[2];
1807 rtx mask = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
1808 rtx tmp = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
1810 rtx (*base_comparison) (rtx, rtx, rtx);
1811 rtx (*complimentary_comparison) (rtx, rtx, rtx);
1813 switch (GET_CODE (operands[3]))
1820 if (operands[5] == CONST0_RTX (<MODE>mode))
1827 if (!REG_P (operands[5]))
1828 operands[5] = force_reg (<VDQF:MODE>mode, operands[5]);
1831 switch (GET_CODE (operands[3]))
1841 base_comparison = gen_aarch64_cmge<VDQF:mode>;
1842 complimentary_comparison = gen_aarch64_cmgt<VDQF:mode>;
1850 base_comparison = gen_aarch64_cmgt<VDQF:mode>;
1851 complimentary_comparison = gen_aarch64_cmge<VDQF:mode>;
1856 base_comparison = gen_aarch64_cmeq<VDQF:mode>;
1857 complimentary_comparison = gen_aarch64_cmeq<VDQF:mode>;
1863 switch (GET_CODE (operands[3]))
1870 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
1871 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1877 Note that there also exist direct comparison against 0 forms,
1878 so catch those as a special case. */
1882 switch (GET_CODE (operands[3]))
1885 base_comparison = gen_aarch64_cmlt<VDQF:mode>;
1888 base_comparison = gen_aarch64_cmle<VDQF:mode>;
1891 /* Do nothing, other zero form cases already have the correct
1898 emit_insn (base_comparison (mask, operands[4], operands[5]));
1900 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1907 /* FCM returns false for lanes which are unordered, so if we use
1908 the inverse of the comparison we actually want to emit, then
1909 swap the operands to BSL, we will end up with the correct result.
1910 Note that a NE NaN and NaN NE b are true for all a, b.
1912 Our transformations are:
1917 a NE b -> !(a EQ b) */
1920 emit_insn (base_comparison (mask, operands[4], operands[5]));
1922 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1924 swap_bsl_operands = 1;
1927 /* We check (a > b || b > a). combining these comparisons give us
1928 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1929 will then give us (a == b || a UNORDERED b) as intended. */
1931 emit_insn (gen_aarch64_cmgt<VDQF:mode> (mask, operands[4], operands[5]));
1932 emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[5], operands[4]));
1933 emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
1934 swap_bsl_operands = 1;
1937 /* Operands are ORDERED iff (a > b || b >= a).
1938 Swapping the operands to BSL will give the UNORDERED case. */
1939 swap_bsl_operands = 1;
1942 emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[4], operands[5]));
1943 emit_insn (gen_aarch64_cmge<VDQF:mode> (mask, operands[5], operands[4]));
1944 emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
1950 if (swap_bsl_operands)
1956 /* If we have (a = (b CMP c) ? -1 : 0);
1957 Then we can simply move the generated mask. */
1959 if (op1 == CONSTM1_RTX (<VDQF_COND:V_cmp_result>mode)
1960 && op2 == CONST0_RTX (<VDQF_COND:V_cmp_result>mode))
1961 emit_move_insn (operands[0], mask);
1965 op1 = force_reg (<VDQF_COND:MODE>mode, op1);
1967 op2 = force_reg (<VDQF_COND:MODE>mode, op2);
1968 emit_insn (gen_aarch64_simd_bsl<VDQF_COND:mode> (operands[0], mask,
1975 (define_expand "vcond<mode><mode>"
1976 [(set (match_operand:VALL 0 "register_operand")
1978 (match_operator 3 "comparison_operator"
1979 [(match_operand:VALL 4 "register_operand")
1980 (match_operand:VALL 5 "nonmemory_operand")])
1981 (match_operand:VALL 1 "nonmemory_operand")
1982 (match_operand:VALL 2 "nonmemory_operand")))]
1985 emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
1986 operands[2], operands[3],
1987 operands[4], operands[5]));
1991 (define_expand "vcond<v_cmp_result><mode>"
1992 [(set (match_operand:<V_cmp_result> 0 "register_operand")
1993 (if_then_else:<V_cmp_result>
1994 (match_operator 3 "comparison_operator"
1995 [(match_operand:VDQF 4 "register_operand")
1996 (match_operand:VDQF 5 "nonmemory_operand")])
1997 (match_operand:<V_cmp_result> 1 "nonmemory_operand")
1998 (match_operand:<V_cmp_result> 2 "nonmemory_operand")))]
2001 emit_insn (gen_aarch64_vcond_internal<v_cmp_result><mode> (
2002 operands[0], operands[1],
2003 operands[2], operands[3],
2004 operands[4], operands[5]));
2008 (define_expand "vcondu<mode><mode>"
2009 [(set (match_operand:VDQ 0 "register_operand")
2011 (match_operator 3 "comparison_operator"
2012 [(match_operand:VDQ 4 "register_operand")
2013 (match_operand:VDQ 5 "nonmemory_operand")])
2014 (match_operand:VDQ 1 "nonmemory_operand")
2015 (match_operand:VDQ 2 "nonmemory_operand")))]
2018 emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
2019 operands[2], operands[3],
2020 operands[4], operands[5]));
2024 ;; Patterns for AArch64 SIMD Intrinsics.
2026 (define_expand "aarch64_create<mode>"
2027 [(match_operand:VD_RE 0 "register_operand" "")
2028 (match_operand:DI 1 "general_operand" "")]
2031 rtx src = gen_lowpart (<MODE>mode, operands[1]);
2032 emit_move_insn (operands[0], src);
2036 ;; Lane extraction with sign extension to general purpose register.
2037 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2038 [(set (match_operand:GPI 0 "register_operand" "=r")
2041 (match_operand:VDQQH 1 "register_operand" "w")
2042 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2045 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2046 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
2048 [(set_attr "type" "neon_to_gp<q>")]
2051 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
2052 [(set (match_operand:SI 0 "register_operand" "=r")
2055 (match_operand:VDQQH 1 "register_operand" "w")
2056 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2059 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2060 return "umov\\t%w0, %1.<Vetype>[%2]";
2062 [(set_attr "type" "neon_to_gp<q>")]
2065 ;; Lane extraction of a value, neither sign nor zero extension
2066 ;; is guaranteed so upper bits should be considered undefined.
2067 (define_insn "aarch64_get_lane<mode>"
2068 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2070 (match_operand:VALL 1 "register_operand" "w, w, w")
2071 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
2074 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2075 switch (which_alternative)
2078 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2080 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2082 return "st1\\t{%1.<Vetype>}[%2], %0";
2087 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
2090 (define_expand "aarch64_get_lanedi"
2091 [(match_operand:DI 0 "register_operand")
2092 (match_operand:DI 1 "register_operand")
2093 (match_operand:SI 2 "immediate_operand")]
2096 aarch64_simd_lane_bounds (operands[2], 0, 1);
2097 emit_move_insn (operands[0], operands[1]);
2101 (define_expand "aarch64_reinterpretv8qi<mode>"
2102 [(match_operand:V8QI 0 "register_operand" "")
2103 (match_operand:VDC 1 "register_operand" "")]
2106 aarch64_simd_reinterpret (operands[0], operands[1]);
2110 (define_expand "aarch64_reinterpretv4hi<mode>"
2111 [(match_operand:V4HI 0 "register_operand" "")
2112 (match_operand:VDC 1 "register_operand" "")]
2115 aarch64_simd_reinterpret (operands[0], operands[1]);
2119 (define_expand "aarch64_reinterpretv2si<mode>"
2120 [(match_operand:V2SI 0 "register_operand" "")
2121 (match_operand:VDC 1 "register_operand" "")]
2124 aarch64_simd_reinterpret (operands[0], operands[1]);
2128 (define_expand "aarch64_reinterpretv2sf<mode>"
2129 [(match_operand:V2SF 0 "register_operand" "")
2130 (match_operand:VDC 1 "register_operand" "")]
2133 aarch64_simd_reinterpret (operands[0], operands[1]);
2137 (define_expand "aarch64_reinterpretdi<mode>"
2138 [(match_operand:DI 0 "register_operand" "")
2139 (match_operand:VD_RE 1 "register_operand" "")]
2142 aarch64_simd_reinterpret (operands[0], operands[1]);
2146 (define_expand "aarch64_reinterpretv16qi<mode>"
2147 [(match_operand:V16QI 0 "register_operand" "")
2148 (match_operand:VQ 1 "register_operand" "")]
2151 aarch64_simd_reinterpret (operands[0], operands[1]);
2155 (define_expand "aarch64_reinterpretv8hi<mode>"
2156 [(match_operand:V8HI 0 "register_operand" "")
2157 (match_operand:VQ 1 "register_operand" "")]
2160 aarch64_simd_reinterpret (operands[0], operands[1]);
2164 (define_expand "aarch64_reinterpretv4si<mode>"
2165 [(match_operand:V4SI 0 "register_operand" "")
2166 (match_operand:VQ 1 "register_operand" "")]
2169 aarch64_simd_reinterpret (operands[0], operands[1]);
2173 (define_expand "aarch64_reinterpretv4sf<mode>"
2174 [(match_operand:V4SF 0 "register_operand" "")
2175 (match_operand:VQ 1 "register_operand" "")]
2178 aarch64_simd_reinterpret (operands[0], operands[1]);
2182 (define_expand "aarch64_reinterpretv2di<mode>"
2183 [(match_operand:V2DI 0 "register_operand" "")
2184 (match_operand:VQ 1 "register_operand" "")]
2187 aarch64_simd_reinterpret (operands[0], operands[1]);
2191 (define_expand "aarch64_reinterpretv2df<mode>"
2192 [(match_operand:V2DF 0 "register_operand" "")
2193 (match_operand:VQ 1 "register_operand" "")]
2196 aarch64_simd_reinterpret (operands[0], operands[1]);
2200 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2203 (define_insn "*aarch64_combinez<mode>"
2204 [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
2206 (match_operand:VDIC 1 "register_operand" "w")
2207 (match_operand:VDIC 2 "aarch64_simd_imm_zero" "Dz")))]
2209 "mov\\t%0.8b, %1.8b"
2210 [(set_attr "type" "neon_move<q>")]
2213 (define_insn_and_split "aarch64_combine<mode>"
2214 [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
2215 (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
2216 (match_operand:VDC 2 "register_operand" "w")))]
2219 "&& reload_completed"
2222 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
2225 [(set_attr "type" "multiple")]
2228 (define_expand "aarch64_simd_combine<mode>"
2229 [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
2230 (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
2231 (match_operand:VDC 2 "register_operand" "w")))]
2234 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
2235 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
2238 [(set_attr "type" "multiple")]
2241 ;; <su><addsub>l<q>.
2243 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
2244 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2245 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2246 (match_operand:VQW 1 "register_operand" "w")
2247 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2248 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2249 (match_operand:VQW 2 "register_operand" "w")
2252 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2253 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2256 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
2257 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2258 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2259 (match_operand:VQW 1 "register_operand" "w")
2260 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2261 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2262 (match_operand:VQW 2 "register_operand" "w")
2265 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2266 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2270 (define_expand "aarch64_saddl2<mode>"
2271 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2272 (match_operand:VQW 1 "register_operand" "w")
2273 (match_operand:VQW 2 "register_operand" "w")]
2276 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2277 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
2282 (define_expand "aarch64_uaddl2<mode>"
2283 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2284 (match_operand:VQW 1 "register_operand" "w")
2285 (match_operand:VQW 2 "register_operand" "w")]
2288 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2289 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
2294 (define_expand "aarch64_ssubl2<mode>"
2295 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2296 (match_operand:VQW 1 "register_operand" "w")
2297 (match_operand:VQW 2 "register_operand" "w")]
2300 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2301 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
2306 (define_expand "aarch64_usubl2<mode>"
2307 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2308 (match_operand:VQW 1 "register_operand" "w")
2309 (match_operand:VQW 2 "register_operand" "w")]
2312 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2313 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
2318 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
2319 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2320 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
2321 (match_operand:VDW 1 "register_operand" "w"))
2323 (match_operand:VDW 2 "register_operand" "w"))))]
2325 "<ANY_EXTEND:su><ADDSUB:optab>l %0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2326 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2329 ;; <su><addsub>w<q>.
2331 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
2332 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2333 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2335 (match_operand:VDW 2 "register_operand" "w"))))]
2337 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
2338 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2341 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
2342 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2343 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2346 (match_operand:VQW 2 "register_operand" "w")
2347 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
2349 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
2350 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2353 (define_expand "aarch64_saddw2<mode>"
2354 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2355 (match_operand:<VWIDE> 1 "register_operand" "w")
2356 (match_operand:VQW 2 "register_operand" "w")]
2359 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2360 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
2365 (define_expand "aarch64_uaddw2<mode>"
2366 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2367 (match_operand:<VWIDE> 1 "register_operand" "w")
2368 (match_operand:VQW 2 "register_operand" "w")]
2371 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2372 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
2378 (define_expand "aarch64_ssubw2<mode>"
2379 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2380 (match_operand:<VWIDE> 1 "register_operand" "w")
2381 (match_operand:VQW 2 "register_operand" "w")]
2384 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2385 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
2390 (define_expand "aarch64_usubw2<mode>"
2391 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2392 (match_operand:<VWIDE> 1 "register_operand" "w")
2393 (match_operand:VQW 2 "register_operand" "w")]
2396 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2397 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
2402 ;; <su><r>h<addsub>.
2404 (define_insn "aarch64_<sur>h<addsub><mode>"
2405 [(set (match_operand:VQ_S 0 "register_operand" "=w")
2406 (unspec:VQ_S [(match_operand:VQ_S 1 "register_operand" "w")
2407 (match_operand:VQ_S 2 "register_operand" "w")]
2410 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2411 [(set_attr "type" "neon_<addsub>_halve<q>")]
2414 ;; <r><addsub>hn<q>.
2416 (define_insn "aarch64_<sur><addsub>hn<mode>"
2417 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
2418 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
2419 (match_operand:VQN 2 "register_operand" "w")]
2422 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
2423 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
2426 (define_insn "aarch64_<sur><addsub>hn2<mode>"
2427 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
2428 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
2429 (match_operand:VQN 2 "register_operand" "w")
2430 (match_operand:VQN 3 "register_operand" "w")]
2433 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
2434 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
2439 (define_insn "aarch64_pmul<mode>"
2440 [(set (match_operand:VB 0 "register_operand" "=w")
2441 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
2442 (match_operand:VB 2 "register_operand" "w")]
2445 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2446 [(set_attr "type" "neon_mul_<Vetype><q>")]
2451 (define_insn "aarch64_<su_optab><optab><mode>"
2452 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
2453 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
2454 (match_operand:VSDQ_I 2 "register_operand" "w")))]
2456 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
2457 [(set_attr "type" "neon_<optab><q>")]
2460 ;; suqadd and usqadd
2462 (define_insn "aarch64_<sur>qadd<mode>"
2463 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
2464 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
2465 (match_operand:VSDQ_I 2 "register_operand" "w")]
2468 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
2469 [(set_attr "type" "neon_qadd<q>")]
2474 (define_insn "aarch64_sqmovun<mode>"
2475 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
2476 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
2479 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
2480 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
2483 ;; sqmovn and uqmovn
2485 (define_insn "aarch64_<sur>qmovn<mode>"
2486 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
2487 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
2490 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
2491 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
2496 (define_insn "aarch64_s<optab><mode>"
2497 [(set (match_operand:VSDQ_I_BHSI 0 "register_operand" "=w")
2499 (match_operand:VSDQ_I_BHSI 1 "register_operand" "w")))]
2501 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
2502 [(set_attr "type" "neon_<optab><q>")]
2507 (define_insn "aarch64_sq<r>dmulh<mode>"
2508 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
2510 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
2511 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
2514 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
2515 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
2520 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
2521 [(set (match_operand:VDQHS 0 "register_operand" "=w")
2523 [(match_operand:VDQHS 1 "register_operand" "w")
2525 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
2526 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
2530 aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode));
2531 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
2532 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
2535 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
2536 [(set (match_operand:VDQHS 0 "register_operand" "=w")
2538 [(match_operand:VDQHS 1 "register_operand" "w")
2540 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
2541 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
2545 aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
2546 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
2547 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
2550 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
2551 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
2553 [(match_operand:SD_HSI 1 "register_operand" "w")
2555 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
2556 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
2560 aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
2561 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
2562 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
2567 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
2568 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2570 (match_operand:<VWIDE> 1 "register_operand" "0")
2573 (sign_extend:<VWIDE>
2574 (match_operand:VSD_HSI 2 "register_operand" "w"))
2575 (sign_extend:<VWIDE>
2576 (match_operand:VSD_HSI 3 "register_operand" "w")))
2579 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
2580 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
2585 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal"
2586 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2588 (match_operand:<VWIDE> 1 "register_operand" "0")
2591 (sign_extend:<VWIDE>
2592 (match_operand:VD_HSI 2 "register_operand" "w"))
2593 (sign_extend:<VWIDE>
2594 (vec_duplicate:VD_HSI
2596 (match_operand:<VCON> 3 "register_operand" "<vwx>")
2597 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
2601 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"
2602 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
2605 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal"
2606 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2608 (match_operand:<VWIDE> 1 "register_operand" "0")
2611 (sign_extend:<VWIDE>
2612 (match_operand:SD_HSI 2 "register_operand" "w"))
2613 (sign_extend:<VWIDE>
2615 (match_operand:<VCON> 3 "register_operand" "<vwx>")
2616 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
2620 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"
2621 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
2624 (define_expand "aarch64_sqdmlal_lane<mode>"
2625 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2626 (match_operand:<VWIDE> 1 "register_operand" "0")
2627 (match_operand:VSD_HSI 2 "register_operand" "w")
2628 (match_operand:<VCON> 3 "register_operand" "<vwx>")
2629 (match_operand:SI 4 "immediate_operand" "i")]
2632 aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode) / 2);
2633 emit_insn (gen_aarch64_sqdmlal_lane<mode>_internal (operands[0], operands[1],
2634 operands[2], operands[3],
2639 (define_expand "aarch64_sqdmlal_laneq<mode>"
2640 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2641 (match_operand:<VWIDE> 1 "register_operand" "0")
2642 (match_operand:VSD_HSI 2 "register_operand" "w")
2643 (match_operand:<VCON> 3 "register_operand" "<vwx>")
2644 (match_operand:SI 4 "immediate_operand" "i")]
2647 aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode));
2648 emit_insn (gen_aarch64_sqdmlal_lane<mode>_internal (operands[0], operands[1],
2649 operands[2], operands[3],
2654 (define_expand "aarch64_sqdmlsl_lane<mode>"
2655 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2656 (match_operand:<VWIDE> 1 "register_operand" "0")
2657 (match_operand:VSD_HSI 2 "register_operand" "w")
2658 (match_operand:<VCON> 3 "register_operand" "<vwx>")
2659 (match_operand:SI 4 "immediate_operand" "i")]
2662 aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode) / 2);
2663 emit_insn (gen_aarch64_sqdmlsl_lane<mode>_internal (operands[0], operands[1],
2664 operands[2], operands[3],
2669 (define_expand "aarch64_sqdmlsl_laneq<mode>"
2670 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2671 (match_operand:<VWIDE> 1 "register_operand" "0")
2672 (match_operand:VSD_HSI 2 "register_operand" "w")
2673 (match_operand:<VCON> 3 "register_operand" "<vwx>")
2674 (match_operand:SI 4 "immediate_operand" "i")]
2677 aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode));
2678 emit_insn (gen_aarch64_sqdmlsl_lane<mode>_internal (operands[0], operands[1],
2679 operands[2], operands[3],
2686 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
2687 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2689 (match_operand:<VWIDE> 1 "register_operand" "0")
2692 (sign_extend:<VWIDE>
2693 (match_operand:VD_HSI 2 "register_operand" "w"))
2694 (sign_extend:<VWIDE>
2695 (vec_duplicate:VD_HSI
2696 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
2699 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
2700 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
2705 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
2706 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2708 (match_operand:<VWIDE> 1 "register_operand" "0")
2711 (sign_extend:<VWIDE>
2713 (match_operand:VQ_HSI 2 "register_operand" "w")
2714 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
2715 (sign_extend:<VWIDE>
2717 (match_operand:VQ_HSI 3 "register_operand" "w")
2721 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
2722 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
2725 (define_expand "aarch64_sqdmlal2<mode>"
2726 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2727 (match_operand:<VWIDE> 1 "register_operand" "w")
2728 (match_operand:VQ_HSI 2 "register_operand" "w")
2729 (match_operand:VQ_HSI 3 "register_operand" "w")]
2732 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2733 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
2734 operands[2], operands[3], p));
2738 (define_expand "aarch64_sqdmlsl2<mode>"
2739 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2740 (match_operand:<VWIDE> 1 "register_operand" "w")
2741 (match_operand:VQ_HSI 2 "register_operand" "w")
2742 (match_operand:VQ_HSI 3 "register_operand" "w")]
2745 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2746 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
2747 operands[2], operands[3], p));
2753 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
2754 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2756 (match_operand:<VWIDE> 1 "register_operand" "0")
2759 (sign_extend:<VWIDE>
2761 (match_operand:VQ_HSI 2 "register_operand" "w")
2762 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
2763 (sign_extend:<VWIDE>
2764 (vec_duplicate:<VHALF>
2766 (match_operand:<VCON> 3 "register_operand" "<vwx>")
2767 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
2771 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"
2772 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
2775 (define_expand "aarch64_sqdmlal2_lane<mode>"
2776 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2777 (match_operand:<VWIDE> 1 "register_operand" "w")
2778 (match_operand:VQ_HSI 2 "register_operand" "w")
2779 (match_operand:<VCON> 3 "register_operand" "<vwx>")
2780 (match_operand:SI 4 "immediate_operand" "i")]
2783 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2784 aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode) / 2);
2785 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
2786 operands[2], operands[3],
2791 (define_expand "aarch64_sqdmlal2_laneq<mode>"
2792 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2793 (match_operand:<VWIDE> 1 "register_operand" "w")
2794 (match_operand:VQ_HSI 2 "register_operand" "w")
2795 (match_operand:<VCON> 3 "register_operand" "<vwx>")
2796 (match_operand:SI 4 "immediate_operand" "i")]
2799 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2800 aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
2801 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
2802 operands[2], operands[3],
2807 (define_expand "aarch64_sqdmlsl2_lane<mode>"
2808 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2809 (match_operand:<VWIDE> 1 "register_operand" "w")
2810 (match_operand:VQ_HSI 2 "register_operand" "w")
2811 (match_operand:<VCON> 3 "register_operand" "<vwx>")
2812 (match_operand:SI 4 "immediate_operand" "i")]
2815 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2816 aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode) / 2);
2817 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
2818 operands[2], operands[3],
2823 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
2824 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2825 (match_operand:<VWIDE> 1 "register_operand" "w")
2826 (match_operand:VQ_HSI 2 "register_operand" "w")
2827 (match_operand:<VCON> 3 "register_operand" "<vwx>")
2828 (match_operand:SI 4 "immediate_operand" "i")]
2831 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2832 aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
2833 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
2834 operands[2], operands[3],
2839 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
2840 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2842 (match_operand:<VWIDE> 1 "register_operand" "0")
2845 (sign_extend:<VWIDE>
2847 (match_operand:VQ_HSI 2 "register_operand" "w")
2848 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
2849 (sign_extend:<VWIDE>
2850 (vec_duplicate:<VHALF>
2851 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
2854 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
2855 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
2858 (define_expand "aarch64_sqdmlal2_n<mode>"
2859 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2860 (match_operand:<VWIDE> 1 "register_operand" "w")
2861 (match_operand:VQ_HSI 2 "register_operand" "w")
2862 (match_operand:<VEL> 3 "register_operand" "w")]
2865 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2866 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
2867 operands[2], operands[3],
2872 (define_expand "aarch64_sqdmlsl2_n<mode>"
2873 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2874 (match_operand:<VWIDE> 1 "register_operand" "w")
2875 (match_operand:VQ_HSI 2 "register_operand" "w")
2876 (match_operand:<VEL> 3 "register_operand" "w")]
2879 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2880 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
2881 operands[2], operands[3],
2888 (define_insn "aarch64_sqdmull<mode>"
2889 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2892 (sign_extend:<VWIDE>
2893 (match_operand:VSD_HSI 1 "register_operand" "w"))
2894 (sign_extend:<VWIDE>
2895 (match_operand:VSD_HSI 2 "register_operand" "w")))
2898 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
2899 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
2904 (define_insn "aarch64_sqdmull_lane<mode>_internal"
2905 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2908 (sign_extend:<VWIDE>
2909 (match_operand:VD_HSI 1 "register_operand" "w"))
2910 (sign_extend:<VWIDE>
2911 (vec_duplicate:VD_HSI
2913 (match_operand:<VCON> 2 "register_operand" "<vwx>")
2914 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
2918 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"
2919 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
2922 (define_insn "aarch64_sqdmull_lane<mode>_internal"
2923 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2926 (sign_extend:<VWIDE>
2927 (match_operand:SD_HSI 1 "register_operand" "w"))
2928 (sign_extend:<VWIDE>
2930 (match_operand:<VCON> 2 "register_operand" "<vwx>")
2931 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
2935 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"
2936 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
2939 (define_expand "aarch64_sqdmull_lane<mode>"
2940 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2941 (match_operand:VSD_HSI 1 "register_operand" "w")
2942 (match_operand:<VCON> 2 "register_operand" "<vwx>")
2943 (match_operand:SI 3 "immediate_operand" "i")]
2946 aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCON>mode) / 2);
2947 emit_insn (gen_aarch64_sqdmull_lane<mode>_internal (operands[0], operands[1],
2948 operands[2], operands[3]));
2952 (define_expand "aarch64_sqdmull_laneq<mode>"
2953 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2954 (match_operand:VD_HSI 1 "register_operand" "w")
2955 (match_operand:<VCON> 2 "register_operand" "<vwx>")
2956 (match_operand:SI 3 "immediate_operand" "i")]
2959 aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCON>mode));
2960 emit_insn (gen_aarch64_sqdmull_lane<mode>_internal
2961 (operands[0], operands[1], operands[2], operands[3]));
2967 (define_insn "aarch64_sqdmull_n<mode>"
2968 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2971 (sign_extend:<VWIDE>
2972 (match_operand:VD_HSI 1 "register_operand" "w"))
2973 (sign_extend:<VWIDE>
2974 (vec_duplicate:VD_HSI
2975 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
2979 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
2980 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
2987 (define_insn "aarch64_sqdmull2<mode>_internal"
2988 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2991 (sign_extend:<VWIDE>
2993 (match_operand:VQ_HSI 1 "register_operand" "w")
2994 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2995 (sign_extend:<VWIDE>
2997 (match_operand:VQ_HSI 2 "register_operand" "w")
3002 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3003 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3006 (define_expand "aarch64_sqdmull2<mode>"
3007 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3008 (match_operand:VQ_HSI 1 "register_operand" "w")
3009 (match_operand:<VCON> 2 "register_operand" "w")]
3012 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3013 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
3020 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
3021 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3024 (sign_extend:<VWIDE>
3026 (match_operand:VQ_HSI 1 "register_operand" "w")
3027 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3028 (sign_extend:<VWIDE>
3029 (vec_duplicate:<VHALF>
3031 (match_operand:<VCON> 2 "register_operand" "<vwx>")
3032 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3036 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"
3037 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3040 (define_expand "aarch64_sqdmull2_lane<mode>"
3041 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3042 (match_operand:VQ_HSI 1 "register_operand" "w")
3043 (match_operand:<VCON> 2 "register_operand" "<vwx>")
3044 (match_operand:SI 3 "immediate_operand" "i")]
3047 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3048 aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode) / 2);
3049 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
3050 operands[2], operands[3],
3055 (define_expand "aarch64_sqdmull2_laneq<mode>"
3056 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3057 (match_operand:VQ_HSI 1 "register_operand" "w")
3058 (match_operand:<VCON> 2 "register_operand" "<vwx>")
3059 (match_operand:SI 3 "immediate_operand" "i")]
3062 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3063 aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3064 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
3065 operands[2], operands[3],
3072 (define_insn "aarch64_sqdmull2_n<mode>_internal"
3073 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3076 (sign_extend:<VWIDE>
3078 (match_operand:VQ_HSI 1 "register_operand" "w")
3079 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3080 (sign_extend:<VWIDE>
3081 (vec_duplicate:<VHALF>
3082 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3086 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3087 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3090 (define_expand "aarch64_sqdmull2_n<mode>"
3091 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3092 (match_operand:VQ_HSI 1 "register_operand" "w")
3093 (match_operand:<VEL> 2 "register_operand" "w")]
3096 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3097 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
3104 (define_insn "aarch64_<sur>shl<mode>"
3105 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3107 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
3108 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
3111 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
3112 [(set_attr "type" "neon_shift_reg<q>")]
3118 (define_insn "aarch64_<sur>q<r>shl<mode>"
3119 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3121 [(match_operand:VSDQ_I 1 "register_operand" "w")
3122 (match_operand:VSDQ_I 2 "register_operand" "w")]
3125 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
3126 [(set_attr "type" "neon_sat_shift_reg<q>")]
3131 (define_insn "aarch64_<sur>shll_n<mode>"
3132 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3133 (unspec:<VWIDE> [(match_operand:VDW 1 "register_operand" "w")
3134 (match_operand:SI 2 "immediate_operand" "i")]
3138 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3139 aarch64_simd_const_bounds (operands[2], 0, bit_width + 1);
3140 if (INTVAL (operands[2]) == bit_width)
3142 return \"shll\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3145 return \"<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3147 [(set_attr "type" "neon_shift_imm_long")]
3152 (define_insn "aarch64_<sur>shll2_n<mode>"
3153 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3154 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
3155 (match_operand:SI 2 "immediate_operand" "i")]
3159 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3160 aarch64_simd_const_bounds (operands[2], 0, bit_width + 1);
3161 if (INTVAL (operands[2]) == bit_width)
3163 return \"shll2\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3166 return \"<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3168 [(set_attr "type" "neon_shift_imm_long")]
3173 (define_insn "aarch64_<sur>shr_n<mode>"
3174 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3175 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
3176 (match_operand:SI 2 "immediate_operand" "i")]
3180 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3181 aarch64_simd_const_bounds (operands[2], 1, bit_width + 1);
3182 return \"<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2\";"
3183 [(set_attr "type" "neon_sat_shift_imm<q>")]
3188 (define_insn "aarch64_<sur>sra_n<mode>"
3189 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3190 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
3191 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
3192 (match_operand:SI 3 "immediate_operand" "i")]
3196 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3197 aarch64_simd_const_bounds (operands[3], 1, bit_width + 1);
3198 return \"<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3\";"
3199 [(set_attr "type" "neon_shift_acc<q>")]
3204 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
3205 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3206 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
3207 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
3208 (match_operand:SI 3 "immediate_operand" "i")]
3212 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3213 aarch64_simd_const_bounds (operands[3], 1 - <VSLRI:offsetlr>,
3214 bit_width - <VSLRI:offsetlr> + 1);
3215 return \"s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3\";"
3216 [(set_attr "type" "neon_shift_imm<q>")]
3221 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
3222 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3223 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
3224 (match_operand:SI 2 "immediate_operand" "i")]
3228 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3229 aarch64_simd_const_bounds (operands[2], 0, bit_width);
3230 return \"<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2\";"
3231 [(set_attr "type" "neon_sat_shift_imm<q>")]
3237 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
3238 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3239 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
3240 (match_operand:SI 2 "immediate_operand" "i")]
3244 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3245 aarch64_simd_const_bounds (operands[2], 1, bit_width + 1);
3246 return \"<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2\";"
3247 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3251 ;; cm(eq|ge|gt|lt|le)
3252 ;; Note, we have constraints for Dz and Z as different expanders
3253 ;; have different ideas of what should be passed to this pattern.
3255 (define_insn "aarch64_cm<optab><mode>"
3256 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
3258 (COMPARISONS:<V_cmp_result>
3259 (match_operand:VDQ 1 "register_operand" "w,w")
3260 (match_operand:VDQ 2 "aarch64_simd_reg_or_zero" "w,ZDz")
3264 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
3265 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
3266 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
3269 (define_insn_and_split "aarch64_cm<optab>di"
3270 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
3273 (match_operand:DI 1 "register_operand" "w,w,r")
3274 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
3276 (clobber (reg:CC CC_REGNUM))]
3279 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
3280 cm<optab>\t%d0, %d1, #0
3283 /* We need to prevent the split from
3284 happening in the 'w' constraint cases. */
3285 && GP_REGNUM_P (REGNO (operands[0]))
3286 && GP_REGNUM_P (REGNO (operands[1]))"
3289 enum machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
3290 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
3291 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
3292 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
3295 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
3300 (define_insn "aarch64_cm<optab><mode>"
3301 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
3303 (UCOMPARISONS:<V_cmp_result>
3304 (match_operand:VDQ 1 "register_operand" "w")
3305 (match_operand:VDQ 2 "register_operand" "w")
3308 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
3309 [(set_attr "type" "neon_compare<q>")]
3312 (define_insn_and_split "aarch64_cm<optab>di"
3313 [(set (match_operand:DI 0 "register_operand" "=w,r")
3316 (match_operand:DI 1 "register_operand" "w,r")
3317 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
3319 (clobber (reg:CC CC_REGNUM))]
3322 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
3325 /* We need to prevent the split from
3326 happening in the 'w' constraint cases. */
3327 && GP_REGNUM_P (REGNO (operands[0]))
3328 && GP_REGNUM_P (REGNO (operands[1]))"
3331 enum machine_mode mode = CCmode;
3332 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
3333 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
3334 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
3337 [(set_attr "type" "neon_compare, neon_compare_zero")]
3342 (define_insn "aarch64_cmtst<mode>"
3343 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
3347 (match_operand:VDQ 1 "register_operand" "w")
3348 (match_operand:VDQ 2 "register_operand" "w"))
3349 (vec_duplicate:<V_cmp_result> (const_int 0)))))]
3351 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3352 [(set_attr "type" "neon_tst<q>")]
3355 (define_insn_and_split "aarch64_cmtstdi"
3356 [(set (match_operand:DI 0 "register_operand" "=w,r")
3360 (match_operand:DI 1 "register_operand" "w,r")
3361 (match_operand:DI 2 "register_operand" "w,r"))
3363 (clobber (reg:CC CC_REGNUM))]
3366 cmtst\t%d0, %d1, %d2
3369 /* We need to prevent the split from
3370 happening in the 'w' constraint cases. */
3371 && GP_REGNUM_P (REGNO (operands[0]))
3372 && GP_REGNUM_P (REGNO (operands[1]))"
3375 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
3376 enum machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
3377 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
3378 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
3379 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
3382 [(set_attr "type" "neon_tst")]
3385 ;; fcm(eq|ge|gt|le|lt)
3387 (define_insn "aarch64_cm<optab><mode>"
3388 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
3390 (COMPARISONS:<V_cmp_result>
3391 (match_operand:VALLF 1 "register_operand" "w,w")
3392 (match_operand:VALLF 2 "aarch64_simd_reg_or_zero" "w,YDz")
3396 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
3397 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
3398 [(set_attr "type" "neon_fp_compare_<Vetype><q>")]
3402 ;; Note we can also handle what would be fac(le|lt) by
3403 ;; generating fac(ge|gt).
3405 (define_insn "*aarch64_fac<optab><mode>"
3406 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
3408 (FAC_COMPARISONS:<V_cmp_result>
3409 (abs:VALLF (match_operand:VALLF 1 "register_operand" "w"))
3410 (abs:VALLF (match_operand:VALLF 2 "register_operand" "w"))
3413 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
3414 [(set_attr "type" "neon_fp_compare_<Vetype><q>")]
3419 (define_insn "aarch64_addp<mode>"
3420 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
3422 [(match_operand:VD_BHSI 1 "register_operand" "w")
3423 (match_operand:VD_BHSI 2 "register_operand" "w")]
3426 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3427 [(set_attr "type" "neon_reduc_add<q>")]
3430 (define_insn "aarch64_addpdi"
3431 [(set (match_operand:DI 0 "register_operand" "=w")
3433 [(match_operand:V2DI 1 "register_operand" "w")]
3437 [(set_attr "type" "neon_reduc_add")]
3442 (define_insn "sqrt<mode>2"
3443 [(set (match_operand:VDQF 0 "register_operand" "=w")
3444 (sqrt:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
3446 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
3447 [(set_attr "type" "neon_fp_sqrt_<Vetype><q>")]
3450 ;; Patterns for vector struct loads and stores.
3452 (define_insn "vec_load_lanesoi<mode>"
3453 [(set (match_operand:OI 0 "register_operand" "=w")
3454 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
3455 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3458 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
3459 [(set_attr "type" "neon_load2_2reg<q>")]
3462 (define_insn "vec_store_lanesoi<mode>"
3463 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
3464 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
3465 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3468 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
3469 [(set_attr "type" "neon_store2_2reg<q>")]
3472 (define_insn "vec_load_lanesci<mode>"
3473 [(set (match_operand:CI 0 "register_operand" "=w")
3474 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
3475 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3478 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
3479 [(set_attr "type" "neon_load3_3reg<q>")]
3482 (define_insn "vec_store_lanesci<mode>"
3483 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
3484 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
3485 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3488 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
3489 [(set_attr "type" "neon_store3_3reg<q>")]
3492 (define_insn "vec_load_lanesxi<mode>"
3493 [(set (match_operand:XI 0 "register_operand" "=w")
3494 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
3495 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3498 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
3499 [(set_attr "type" "neon_load4_4reg<q>")]
3502 (define_insn "vec_store_lanesxi<mode>"
3503 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
3504 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
3505 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3508 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
3509 [(set_attr "type" "neon_store4_4reg<q>")]
3512 ;; Reload patterns for AdvSIMD register list operands.
3514 (define_expand "mov<mode>"
3515 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "")
3516 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" ""))]
3519 if (can_create_pseudo_p ())
3521 if (GET_CODE (operands[0]) != REG)
3522 operands[1] = force_reg (<MODE>mode, operands[1]);
3526 (define_insn "*aarch64_mov<mode>"
3527 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
3528 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
3530 && (register_operand (operands[0], <MODE>mode)
3531 || register_operand (operands[1], <MODE>mode))"
3534 switch (which_alternative)
3537 case 1: return "st1\\t{%S1.16b - %<Vendreg>1.16b}, %0";
3538 case 2: return "ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1";
3539 default: gcc_unreachable ();
3542 [(set_attr "type" "neon_move,neon_store<nregs>_<nregs>reg_q,\
3543 neon_load<nregs>_<nregs>reg_q")
3544 (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))]
3548 [(set (match_operand:OI 0 "register_operand" "")
3549 (match_operand:OI 1 "register_operand" ""))]
3550 "TARGET_SIMD && reload_completed"
3551 [(set (match_dup 0) (match_dup 1))
3552 (set (match_dup 2) (match_dup 3))]
3554 int rdest = REGNO (operands[0]);
3555 int rsrc = REGNO (operands[1]);
3556 rtx dest[2], src[2];
3558 dest[0] = gen_rtx_REG (TFmode, rdest);
3559 src[0] = gen_rtx_REG (TFmode, rsrc);
3560 dest[1] = gen_rtx_REG (TFmode, rdest + 1);
3561 src[1] = gen_rtx_REG (TFmode, rsrc + 1);
3563 aarch64_simd_disambiguate_copy (operands, dest, src, 2);
3567 [(set (match_operand:CI 0 "register_operand" "")
3568 (match_operand:CI 1 "register_operand" ""))]
3569 "TARGET_SIMD && reload_completed"
3570 [(set (match_dup 0) (match_dup 1))
3571 (set (match_dup 2) (match_dup 3))
3572 (set (match_dup 4) (match_dup 5))]
3574 int rdest = REGNO (operands[0]);
3575 int rsrc = REGNO (operands[1]);
3576 rtx dest[3], src[3];
3578 dest[0] = gen_rtx_REG (TFmode, rdest);
3579 src[0] = gen_rtx_REG (TFmode, rsrc);
3580 dest[1] = gen_rtx_REG (TFmode, rdest + 1);
3581 src[1] = gen_rtx_REG (TFmode, rsrc + 1);
3582 dest[2] = gen_rtx_REG (TFmode, rdest + 2);
3583 src[2] = gen_rtx_REG (TFmode, rsrc + 2);
3585 aarch64_simd_disambiguate_copy (operands, dest, src, 3);
3589 [(set (match_operand:XI 0 "register_operand" "")
3590 (match_operand:XI 1 "register_operand" ""))]
3591 "TARGET_SIMD && reload_completed"
3592 [(set (match_dup 0) (match_dup 1))
3593 (set (match_dup 2) (match_dup 3))
3594 (set (match_dup 4) (match_dup 5))
3595 (set (match_dup 6) (match_dup 7))]
3597 int rdest = REGNO (operands[0]);
3598 int rsrc = REGNO (operands[1]);
3599 rtx dest[4], src[4];
3601 dest[0] = gen_rtx_REG (TFmode, rdest);
3602 src[0] = gen_rtx_REG (TFmode, rsrc);
3603 dest[1] = gen_rtx_REG (TFmode, rdest + 1);
3604 src[1] = gen_rtx_REG (TFmode, rsrc + 1);
3605 dest[2] = gen_rtx_REG (TFmode, rdest + 2);
3606 src[2] = gen_rtx_REG (TFmode, rsrc + 2);
3607 dest[3] = gen_rtx_REG (TFmode, rdest + 3);
3608 src[3] = gen_rtx_REG (TFmode, rsrc + 3);
3610 aarch64_simd_disambiguate_copy (operands, dest, src, 4);
3613 (define_insn "aarch64_ld2<mode>_dreg"
3614 [(set (match_operand:OI 0 "register_operand" "=w")
3618 (unspec:VD [(match_operand:TI 1 "aarch64_simd_struct_operand" "Utv")]
3620 (vec_duplicate:VD (const_int 0)))
3622 (unspec:VD [(match_dup 1)]
3624 (vec_duplicate:VD (const_int 0)))) 0))]
3626 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
3627 [(set_attr "type" "neon_load2_2reg<q>")]
3630 (define_insn "aarch64_ld2<mode>_dreg"
3631 [(set (match_operand:OI 0 "register_operand" "=w")
3635 (unspec:DX [(match_operand:TI 1 "aarch64_simd_struct_operand" "Utv")]
3639 (unspec:DX [(match_dup 1)]
3641 (const_int 0))) 0))]
3643 "ld1\\t{%S0.1d - %T0.1d}, %1"
3644 [(set_attr "type" "neon_load1_2reg<q>")]
3647 (define_insn "aarch64_ld3<mode>_dreg"
3648 [(set (match_operand:CI 0 "register_operand" "=w")
3653 (unspec:VD [(match_operand:EI 1 "aarch64_simd_struct_operand" "Utv")]
3655 (vec_duplicate:VD (const_int 0)))
3657 (unspec:VD [(match_dup 1)]
3659 (vec_duplicate:VD (const_int 0))))
3661 (unspec:VD [(match_dup 1)]
3663 (vec_duplicate:VD (const_int 0)))) 0))]
3665 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
3666 [(set_attr "type" "neon_load3_3reg<q>")]
3669 (define_insn "aarch64_ld3<mode>_dreg"
3670 [(set (match_operand:CI 0 "register_operand" "=w")
3675 (unspec:DX [(match_operand:EI 1 "aarch64_simd_struct_operand" "Utv")]
3679 (unspec:DX [(match_dup 1)]
3683 (unspec:DX [(match_dup 1)]
3685 (const_int 0))) 0))]
3687 "ld1\\t{%S0.1d - %U0.1d}, %1"
3688 [(set_attr "type" "neon_load1_3reg<q>")]
3691 (define_insn "aarch64_ld4<mode>_dreg"
3692 [(set (match_operand:XI 0 "register_operand" "=w")
3697 (unspec:VD [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")]
3699 (vec_duplicate:VD (const_int 0)))
3701 (unspec:VD [(match_dup 1)]
3703 (vec_duplicate:VD (const_int 0))))
3706 (unspec:VD [(match_dup 1)]
3708 (vec_duplicate:VD (const_int 0)))
3710 (unspec:VD [(match_dup 1)]
3712 (vec_duplicate:VD (const_int 0))))) 0))]
3714 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
3715 [(set_attr "type" "neon_load4_4reg<q>")]
3718 (define_insn "aarch64_ld4<mode>_dreg"
3719 [(set (match_operand:XI 0 "register_operand" "=w")
3724 (unspec:DX [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")]
3728 (unspec:DX [(match_dup 1)]
3733 (unspec:DX [(match_dup 1)]
3737 (unspec:DX [(match_dup 1)]
3739 (const_int 0)))) 0))]
3741 "ld1\\t{%S0.1d - %V0.1d}, %1"
3742 [(set_attr "type" "neon_load1_4reg<q>")]
3745 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
3746 [(match_operand:VSTRUCT 0 "register_operand" "=w")
3747 (match_operand:DI 1 "register_operand" "r")
3748 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3751 enum machine_mode mode = <VSTRUCT:VSTRUCT_DREG>mode;
3752 rtx mem = gen_rtx_MEM (mode, operands[1]);
3754 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
3758 (define_expand "aarch64_ld1<VALL:mode>"
3759 [(match_operand:VALL 0 "register_operand")
3760 (match_operand:DI 1 "register_operand")]
3763 enum machine_mode mode = <VALL:MODE>mode;
3764 rtx mem = gen_rtx_MEM (mode, operands[1]);
3765 emit_move_insn (operands[0], mem);
3769 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
3770 [(match_operand:VSTRUCT 0 "register_operand" "=w")
3771 (match_operand:DI 1 "register_operand" "r")
3772 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3775 enum machine_mode mode = <VSTRUCT:MODE>mode;
3776 rtx mem = gen_rtx_MEM (mode, operands[1]);
3778 emit_insn (gen_vec_load_lanes<VSTRUCT:mode><VQ:mode> (operands[0], mem));
3782 ;; Expanders for builtins to extract vector registers from large
3783 ;; opaque integer modes.
3787 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
3788 [(match_operand:VDC 0 "register_operand" "=w")
3789 (match_operand:VSTRUCT 1 "register_operand" "w")
3790 (match_operand:SI 2 "immediate_operand" "i")]
3793 int part = INTVAL (operands[2]);
3794 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
3795 int offset = part * 16;
3797 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
3798 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
3804 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
3805 [(match_operand:VQ 0 "register_operand" "=w")
3806 (match_operand:VSTRUCT 1 "register_operand" "w")
3807 (match_operand:SI 2 "immediate_operand" "i")]
3810 int part = INTVAL (operands[2]);
3811 int offset = part * 16;
3813 emit_move_insn (operands[0],
3814 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
3818 ;; Permuted-store expanders for neon intrinsics.
3820 ;; Permute instructions
3824 (define_expand "vec_perm_const<mode>"
3825 [(match_operand:VALL 0 "register_operand")
3826 (match_operand:VALL 1 "register_operand")
3827 (match_operand:VALL 2 "register_operand")
3828 (match_operand:<V_cmp_result> 3)]
3831 if (aarch64_expand_vec_perm_const (operands[0], operands[1],
3832 operands[2], operands[3]))
3838 (define_expand "vec_perm<mode>"
3839 [(match_operand:VB 0 "register_operand")
3840 (match_operand:VB 1 "register_operand")
3841 (match_operand:VB 2 "register_operand")
3842 (match_operand:VB 3 "register_operand")]
3845 aarch64_expand_vec_perm (operands[0], operands[1],
3846 operands[2], operands[3]);
3850 (define_insn "aarch64_tbl1<mode>"
3851 [(set (match_operand:VB 0 "register_operand" "=w")
3852 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
3853 (match_operand:VB 2 "register_operand" "w")]
3856 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
3857 [(set_attr "type" "neon_tbl1<q>")]
3860 ;; Two source registers.
3862 (define_insn "aarch64_tbl2v16qi"
3863 [(set (match_operand:V16QI 0 "register_operand" "=w")
3864 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
3865 (match_operand:V16QI 2 "register_operand" "w")]
3868 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
3869 [(set_attr "type" "neon_tbl2_q")]
3872 (define_insn_and_split "aarch64_combinev16qi"
3873 [(set (match_operand:OI 0 "register_operand" "=w")
3874 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
3875 (match_operand:V16QI 2 "register_operand" "w")]
3879 "&& reload_completed"
3882 aarch64_split_combinev16qi (operands);
3885 [(set_attr "type" "multiple")]
3888 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
3889 [(set (match_operand:VALL 0 "register_operand" "=w")
3890 (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")
3891 (match_operand:VALL 2 "register_operand" "w")]
3894 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3895 [(set_attr "type" "neon_permute<q>")]
3898 (define_insn "aarch64_st2<mode>_dreg"
3899 [(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv")
3900 (unspec:TI [(match_operand:OI 1 "register_operand" "w")
3901 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3904 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
3905 [(set_attr "type" "neon_store2_2reg")]
3908 (define_insn "aarch64_st2<mode>_dreg"
3909 [(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv")
3910 (unspec:TI [(match_operand:OI 1 "register_operand" "w")
3911 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3914 "st1\\t{%S1.1d - %T1.1d}, %0"
3915 [(set_attr "type" "neon_store1_2reg")]
3918 (define_insn "aarch64_st3<mode>_dreg"
3919 [(set (match_operand:EI 0 "aarch64_simd_struct_operand" "=Utv")
3920 (unspec:EI [(match_operand:CI 1 "register_operand" "w")
3921 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3924 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
3925 [(set_attr "type" "neon_store3_3reg")]
3928 (define_insn "aarch64_st3<mode>_dreg"
3929 [(set (match_operand:EI 0 "aarch64_simd_struct_operand" "=Utv")
3930 (unspec:EI [(match_operand:CI 1 "register_operand" "w")
3931 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3934 "st1\\t{%S1.1d - %U1.1d}, %0"
3935 [(set_attr "type" "neon_store1_3reg")]
3938 (define_insn "aarch64_st4<mode>_dreg"
3939 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
3940 (unspec:OI [(match_operand:XI 1 "register_operand" "w")
3941 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3944 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
3945 [(set_attr "type" "neon_store4_4reg")]
3948 (define_insn "aarch64_st4<mode>_dreg"
3949 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
3950 (unspec:OI [(match_operand:XI 1 "register_operand" "w")
3951 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3954 "st1\\t{%S1.1d - %V1.1d}, %0"
3955 [(set_attr "type" "neon_store1_4reg")]
3958 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
3959 [(match_operand:DI 0 "register_operand" "r")
3960 (match_operand:VSTRUCT 1 "register_operand" "w")
3961 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3964 enum machine_mode mode = <VSTRUCT:VSTRUCT_DREG>mode;
3965 rtx mem = gen_rtx_MEM (mode, operands[0]);
3967 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
3971 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
3972 [(match_operand:DI 0 "register_operand" "r")
3973 (match_operand:VSTRUCT 1 "register_operand" "w")
3974 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3977 enum machine_mode mode = <VSTRUCT:MODE>mode;
3978 rtx mem = gen_rtx_MEM (mode, operands[0]);
3980 emit_insn (gen_vec_store_lanes<VSTRUCT:mode><VQ:mode> (mem, operands[1]));
3984 (define_expand "aarch64_st1<VALL:mode>"
3985 [(match_operand:DI 0 "register_operand")
3986 (match_operand:VALL 1 "register_operand")]
3989 enum machine_mode mode = <VALL:MODE>mode;
3990 rtx mem = gen_rtx_MEM (mode, operands[0]);
3991 emit_move_insn (mem, operands[1]);
3995 ;; Expander for builtins to insert vector registers into large
3996 ;; opaque integer modes.
3998 ;; Q-register list. We don't need a D-reg inserter as we zero
3999 ;; extend them in arm_neon.h and insert the resulting Q-regs.
4001 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
4002 [(match_operand:VSTRUCT 0 "register_operand" "+w")
4003 (match_operand:VSTRUCT 1 "register_operand" "0")
4004 (match_operand:VQ 2 "register_operand" "w")
4005 (match_operand:SI 3 "immediate_operand" "i")]
4008 int part = INTVAL (operands[3]);
4009 int offset = part * 16;
4011 emit_move_insn (operands[0], operands[1]);
4012 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
4017 ;; Standard pattern name vec_init<mode>.
4019 (define_expand "vec_init<mode>"
4020 [(match_operand:VALL 0 "register_operand" "")
4021 (match_operand 1 "" "")]
4024 aarch64_expand_vector_init (operands[0], operands[1]);
4028 (define_insn "*aarch64_simd_ld1r<mode>"
4029 [(set (match_operand:VALLDI 0 "register_operand" "=w")
4030 (vec_duplicate:VALLDI
4031 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
4033 "ld1r\\t{%0.<Vtype>}, %1"
4034 [(set_attr "type" "neon_load1_all_lanes")]
4037 (define_insn "aarch64_frecpe<mode>"
4038 [(set (match_operand:VDQF 0 "register_operand" "=w")
4039 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
4042 "frecpe\\t%0.<Vtype>, %1.<Vtype>"
4043 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")]
4046 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
4047 [(set (match_operand:GPF 0 "register_operand" "=w")
4048 (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
4051 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
4052 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF:Vetype><GPF:q>")]
4055 (define_insn "aarch64_frecps<mode>"
4056 [(set (match_operand:VALLF 0 "register_operand" "=w")
4057 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
4058 (match_operand:VALLF 2 "register_operand" "w")]
4061 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4062 [(set_attr "type" "neon_fp_recps_<Vetype><q>")]
4065 ;; Standard pattern name vec_extract<mode>.
4067 (define_expand "vec_extract<mode>"
4068 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
4069 (match_operand:VALL 1 "register_operand" "")
4070 (match_operand:SI 2 "immediate_operand" "")]
4074 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
4080 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
4081 [(set (match_operand:V16QI 0 "register_operand" "=w")
4082 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
4083 (match_operand:V16QI 2 "register_operand" "w")]
4085 "TARGET_SIMD && TARGET_CRYPTO"
4086 "aes<aes_op>\\t%0.16b, %2.16b"
4087 [(set_attr "type" "crypto_aes")]
4090 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
4091 [(set (match_operand:V16QI 0 "register_operand" "=w")
4092 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
4094 "TARGET_SIMD && TARGET_CRYPTO"
4095 "aes<aesmc_op>\\t%0.16b, %1.16b"
4096 [(set_attr "type" "crypto_aes")]
4101 (define_insn "aarch64_crypto_sha1hsi"
4102 [(set (match_operand:SI 0 "register_operand" "=w")
4103 (unspec:SI [(match_operand:SI 1
4104 "register_operand" "w")]
4106 "TARGET_SIMD && TARGET_CRYPTO"
4108 [(set_attr "type" "crypto_sha1_fast")]
4111 (define_insn "aarch64_crypto_sha1su1v4si"
4112 [(set (match_operand:V4SI 0 "register_operand" "=w")
4113 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
4114 (match_operand:V4SI 2 "register_operand" "w")]
4116 "TARGET_SIMD && TARGET_CRYPTO"
4117 "sha1su1\\t%0.4s, %2.4s"
4118 [(set_attr "type" "crypto_sha1_fast")]
4121 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
4122 [(set (match_operand:V4SI 0 "register_operand" "=w")
4123 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
4124 (match_operand:SI 2 "register_operand" "w")
4125 (match_operand:V4SI 3 "register_operand" "w")]
4127 "TARGET_SIMD && TARGET_CRYPTO"
4128 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
4129 [(set_attr "type" "crypto_sha1_slow")]
4132 (define_insn "aarch64_crypto_sha1su0v4si"
4133 [(set (match_operand:V4SI 0 "register_operand" "=w")
4134 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
4135 (match_operand:V4SI 2 "register_operand" "w")
4136 (match_operand:V4SI 3 "register_operand" "w")]
4138 "TARGET_SIMD && TARGET_CRYPTO"
4139 "sha1su0\\t%0.4s, %2.4s, %3.4s"
4140 [(set_attr "type" "crypto_sha1_xor")]
4145 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
4146 [(set (match_operand:V4SI 0 "register_operand" "=w")
4147 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
4148 (match_operand:V4SI 2 "register_operand" "w")
4149 (match_operand:V4SI 3 "register_operand" "w")]
4151 "TARGET_SIMD && TARGET_CRYPTO"
4152 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
4153 [(set_attr "type" "crypto_sha256_slow")]
4156 (define_insn "aarch64_crypto_sha256su0v4si"
4157 [(set (match_operand:V4SI 0 "register_operand" "=w")
4158 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
4159 (match_operand:V4SI 2 "register_operand" "w")]
4161 "TARGET_SIMD &&TARGET_CRYPTO"
4162 "sha256su0\\t%0.4s, %2.4s"
4163 [(set_attr "type" "crypto_sha256_fast")]
4166 (define_insn "aarch64_crypto_sha256su1v4si"
4167 [(set (match_operand:V4SI 0 "register_operand" "=w")
4168 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
4169 (match_operand:V4SI 2 "register_operand" "w")
4170 (match_operand:V4SI 3 "register_operand" "w")]
4172 "TARGET_SIMD &&TARGET_CRYPTO"
4173 "sha256su1\\t%0.4s, %2.4s, %3.4s"
4174 [(set_attr "type" "crypto_sha256_slow")]
4179 (define_insn "aarch64_crypto_pmulldi"
4180 [(set (match_operand:TI 0 "register_operand" "=w")
4181 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
4182 (match_operand:DI 2 "register_operand" "w")]
4184 "TARGET_SIMD && TARGET_CRYPTO"
4185 "pmull\\t%0.1q, %1.1d, %2.1d"
4186 [(set_attr "type" "neon_mul_d_long")]
4189 (define_insn "aarch64_crypto_pmullv2di"
4190 [(set (match_operand:TI 0 "register_operand" "=w")
4191 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
4192 (match_operand:V2DI 2 "register_operand" "w")]
4194 "TARGET_SIMD && TARGET_CRYPTO"
4195 "pmull2\\t%0.1q, %1.2d, %2.2d"
4196 [(set_attr "type" "neon_mul_d_long")]